## Imports

In [1]:
import os, re, glob, json
from typing import List
from datetime import datetime

from scripts.ollama_handler import OllamaMediaAnalysis, OllamaHandler
from scripts.wordcloud_handler import WordCloudHandler
from scripts.document_handler import PdfDocument

from pypdf import PdfReader
import spacy
import dill as pickle
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import TfidfVectorizer

## Parameters

In [2]:
# Set model for ollama
# ollama.list() # to get all models
MODEL = "granite3.1-moe:3b-instruct-q8_0" #"granite3.1-moe" #"granite3.1-dense:8b-instruct-q8_0" #"granite3.1-dense:8b"
#SYSTEM_PROMPT = f"You are a senior researcher, working on a media analysis of articles published in arabic newspapers about ChatGPT and the effect of Artificial Intelligence on society. For your answers only focus on topics that were mentioned in the text without adding any further information. Before answering, thoroughly think about the task, the content provided and build your answer with chain of thought reasoning."
SYSTEM_PROMPT = (
    "You are a senior researcher conducting a media analysis of Arabic newspaper articles "
    "about ChatGPT and the societal effects of Artificial Intelligence. Your role is to focus "
    "exclusively on the topics mentioned in the provided text, without introducing external information. "
    "Before responding, carefully analyze the task, thoroughly evaluate the content of the articles, "
    "and construct your answer using a clear chain of thought reasoning approach."
)

# Settings
PROCESS_DOCUMENTS = True

# Load spacy model
SPACY_MODEL = "en_core_web_lg"
try:
    nlp = spacy.load(SPACY_MODEL)
except OSError:
    spacy.cli.download(SPACY_MODEL)
    nlp = spacy.load(SPACY_MODEL)


# Set folder paths
DOC_FOLDER = "docs"
PDF_FOLDER = os.path.join(DOC_FOLDER, "PDFs") # PDFs
OUTPUT_FOLDER = os.path.join(DOC_FOLDER, "Processed", MODEL)
PROCESSED_DOC_FILENAME = f"{datetime.now().strftime("%y%m%d")}-{MODEL}-processed_documents.pkl"

## Helper functions

In [3]:
def flatten_dict(d):
    """Recursively flatten a dictionary with nested lists."""
    flattened_dict = dict()
    for k, v in d.items():
        flat_list = []
        if isinstance(v, list):
            flat_list.extend(flatten_list(v))
        elif isinstance(v, dict):
            flat_list.extend(flatten_dict(v))
        
        flattened_dict[k] = flat_list
    return flattened_dict

def flatten_list(lst):
    """Recursively flatten a nested list."""
    flat_list = []
    for item in lst:
        if isinstance(item, list):
            flat_list.extend(flatten_list(item))
        else:
            flat_list.append(item)
    return flat_list

In [4]:
class PdfAnalyzer:
    def __init__(self, ollama_handler: OllamaMediaAnalysis, entity_collection = "all", output_folder: str = "", get_highlights: bool = False, questions: list[str] = None, debug=True, speed=False):
        self.ollama_handler = ollama_handler
        self.wordcloud = WordCloudHandler()
        self.nlp = nlp
        self.output_folder = output_folder
        self.entitiy_collection = entity_collection if entity_collection in ["all", "ollama", "spacy"] else "all"
        self.pdf_documents = []
        self.get_highlights = get_highlights
        self.questions = questions
        self.debug = debug
        self.speed = speed
        self.analysis = dict()
        
    def extract_text_from_pdf(self, pdf_path: str) -> str:
        """Iterates over all pages in the document and stores the text in instance."""

        reader = PdfReader(pdf_path)
        text = ""
        num_pages = reader.pages
        
        for page_count, page in enumerate(num_pages):
            text_current_page = page.extract_text()
            print(f"{datetime.now().strftime("%H:%M:%S")}\t Adding page {page_count}/{len(num_pages)} with {len(text_current_page)} characters")
            text += text_current_page
        return text
    
    def extract_entities(self, text: str) -> List[str]:
        """Extracts entities from text using spacy PERSON and ORG labels."""
        
        doc = self.nlp(text)
        entities = [ent.text for ent in doc.ents if ent.label_ in ["PERSON", "ORG"]]
        print(f"{datetime.now().strftime("%H:%M:%S")}\t Found {len(entities)} in text")
        return list(set(entities))
    
    def get_tokens(self, text):
        # Tokenize and remove stop words
        doc = self.nlp(text)
        tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.is_currency and not token.is_digit and token.is_alpha]
        tokens = [token.title() for token in tokens if token.isupper() or token.capitalize]
        return ' '.join(tokens)
    
    def process_pdf(self, pdf_path: str) -> PdfDocument:
        """Main function that processes a single PDF document with it's subfunctions. Prints status updates."""
        
        content = self.extract_text_from_pdf(pdf_path)
        content = self.clean_input(content, line_breaks=False)

        title = os.path.splitext(os.path.basename(pdf_path))[0]
        title = title.split("_")[0].strip()
        
        # Initialize PdfDocument object
        print(f"{datetime.now().strftime("%H:%M:%S")}\t Create PDF document <{title[:20]}...> with content of length {len(content)}")
        pdf_doc = PdfDocument(pdf_path, content, title)
        
        # Generating tokenized content
        print(f"{datetime.now().strftime("%H:%M:%S")}\t Generating tokenized content")
        pdf_doc.content_tokens = self.get_tokens(content)
        
        # Generating short summary
        short_summary_response = self.ollama_handler.generate_short_summary(content)
        pdf_doc.short_summary = self.clean_input(short_summary_response)
        
        # Generate long summary        
        summary_response = self.ollama_handler.generate_summary(content)
        pdf_doc.summary = self.clean_input(summary_response, soft_clean=True)
        
        if self.debug:
            print(f"Types: summary={pdf_doc.summary}, short={pdf_doc.short_summary}")

        if not self.speed:
            # Get answers to questions
            print(f"{datetime.now().strftime("%H:%M:%S")}\t Finding answer to {len(self.questions)} question{"s" if len(self.questions) > 1 else ""}")
            for question in self.questions:
                question_response = self.ollama_handler.answer_question(content, question)
                question_response = self.clean_input(question_response)
                
                pdf_doc.answers[question] = question_response.get("answer")
                        
            # Get sentiment           
            sentiment_response = self.ollama_handler.analyze_sentiment(content) 
            pdf_doc.sentiment = sentiment_response.get("sentiment_value")
            
            # Get entities
            print(f"{datetime.now().strftime("%H:%M:%S")}\t Extracting entities from text")
            if self.entitiy_collection in ["all", "spacy"]:
                # Get entities with spacy
                entities_response = self.extract_entities(pdf_doc.content_tokens)
                pdf_doc.entities = self.clean_input(entities_response)
                
            # Get highlights
            if self.get_highlights:
                print(f"{datetime.now().strftime("%H:%M:%S")}\t Extracting text-highlights")
                pdf_doc.extract_highlighted_sentences()
            
            # Get topic clusters
            topics_response = self.ollama_handler.extract_topics(content)
            topic_clusters_response = self.ollama_handler.create_topic_clusters(topics_response)
            
            pdf_doc.topic_clusters = topic_clusters_response
        
        # Process wordclouds
        pdf_doc = self.create_wordcloud(pdf_doc, wordcloud_names=["highlights", "content", "summary"])
        
        return pdf_doc
    
    def create_wordcloud(self, pdf_doc, wordcloud_names: list):
        for wordcloud_name in wordcloud_names:
            path = os.path.join(self.output_folder, pdf_doc.filename)

            if wordcloud_name == "highlights":
                sentences = pdf_doc.highlighted_sentences
            else:
                content = pdf_doc.__dict__.get(wordcloud_name)
                sentences = content.split(".")

            new_wordcloud_data = self.wordcloud.process_wordcloud(input=sentences, path=path, wordcloud_name=wordcloud_name)
            pdf_doc.wordcloud_data.update(new_wordcloud_data)
        
        return pdf_doc
        
    
    def process_folder(self, PDF_FOLDER: str) -> List[PdfDocument]:
        """Iterates over all PDF files in the folder and processes them."""
        pdf_documents = []
        for filename in os.listdir(PDF_FOLDER):
            if filename.endswith(".pdf"):
                print(f"{datetime.now().strftime("%H:%M:%S")} Analyzing file from folder: {filename}")
                pdf_path = os.path.join(PDF_FOLDER, filename)
                pdf_doc = self.process_pdf(pdf_path)
                pdf_documents.append(pdf_doc)
                
                print("Temporarily storing documents")
                self.save_documents(self.pdf_documents)
                
        return pdf_documents
    
    def clean_input(self, input, soft_clean=False, line_breaks=True):
        if isinstance(input, str):
            # Apply the cleaning steps for strings
            
            # Replace ’ with '
            input = input.replace("’", "'")
            
            # Remove line breaks if line_breaks is True and not soft_clean
            if line_breaks and not soft_clean:
                input = input.replace("\n", " ")
            
            # Remove non-ascii characters if not soft_clean
            if not soft_clean:
                input = input.encode("ascii", "ignore").decode()
            
            # Remove all special characters except "-" if not soft_clean
            if not soft_clean:
                input = re.sub(r"[^a-zA-Z0-9.,*' -]", " ", input)
            
            # Remove all double spaces
            input = re.sub(r"  +", " ", input)
            
            # Remove leading and trailing whitespaces
            input = input.strip()
            
            return input
        
        elif isinstance(input, list):
            # If input is a list, clean each element recursively
            return [self.clean_input(item, soft_clean, line_breaks) for item in input]
        
        elif isinstance(input, dict):
            # If input is a dictionary, clean each value recursively
            return {key: self.clean_input(value, soft_clean, line_breaks) for key, value in input.items()}
        
        else:
            # Return the input unchanged if it is not a string, list, or dictionary
            return input
    
    def save_documents(self, documents: List[PdfDocument]):
        """Saves the processed documents to a pickle file."""
        
        path = os.path.join(self.output_folder, PROCESSED_DOC_FILENAME)
        
        if not os.path.exists(self.output_folder):
            os.makedirs(self.output_folder)
        
        with open(path, "wb") as f:
            pickle.dump([doc.to_dict() for doc in documents], f)
    
    def load_documents(self, input_path: str, load_latest=False) -> List[PdfDocument]:
        """Loads the processed documents from a pickle file"""
        
        if os.path.exists(input_path):
            # Open the provided file
            with open(input_path, "rb") as f:
                data = pickle.load(f)
                
        # Load latest file for the model
        elif load_latest:
            # Get the list of all pickle files in the OUTPUT_FOLDER
            pkl_files = glob.glob(os.path.join(self.output_folder, "*.pkl"))

            # Find the latest pickle file based on the modification time
            latest_pkl_file = max(pkl_files, key=os.path.getmtime)

            # Load the latest pickle file
            with open(latest_pkl_file, "rb") as f:
                data = pickle.load(f)
            
        self.pdf_documents = [PdfDocument.from_dict(doc_dict) for doc_dict in data]
    
    def export_docx_files(self):
        for doc in self.pdf_documents:
            filename = f"cai_media_analysis_{doc.filename}.docx"
            file_path = os.path.join(self.output_folder, filename)
            doc.save_as_docx(file_path=file_path)
    
    def export_markdown_files(self):
        for doc in self.pdf_documents:
            # Create folders
            filename = f"cai_media_analysis_{doc.filename}.md"
            file_path = os.path.join(self.output_folder, filename)
            
            # Write markdown file
            with open(file_path, "w") as f:
                markdown = doc.get_markdown()
                f.write(markdown)
                
    def __iter__(self, which="all") -> list[PdfDocument]:
        if which == "all":
            return iter(self.pdf_documents)
        else:
            for doc in self.pdf_documents:
                if doc.title == which:
                    return iter(doc)

In [5]:
# Initialize the PdfAnalyzer class
questions =  [
    "How do the media in this article frame the public discussion about ChatGPT? Are there certain **metaphors** that keep cropping up?",
    "Which role does or might the Arabic World play in the development of Artificial Intelligence? Answer with 'Not mentioned' if not applicable.",
    "Which use cases of Artificial Intelligence are helpful for the Arabic world based on this article?",
    "What is the final message of the article that the author wants to convey? Keep your answer short and precise!"]

In [6]:
# Initialize llm as instance of OllamaMediaAnalysis
llm = OllamaMediaAnalysis(model_name=MODEL, system_prompt=SYSTEM_PROMPT, debug=True)

# Initialize analyzer
analyzer = PdfAnalyzer(ollama_handler=llm, entity_collection="spacy", output_folder=OUTPUT_FOLDER, questions=questions, debug=False, speed=False)

In [7]:
if PROCESS_DOCUMENTS:
    # Process the documents in the folder where the PDFs are
    documents = analyzer.process_folder(PDF_FOLDER)

    # Save documents to the output folder
    analyzer.save_documents(documents)

16:50:25 Analyzing file from folder: Will ChatGPT and AI have an impact on Saudi workforce productivity_ _ Arab News.pdf
16:50:25	 Adding page 0/6 with 852 characters
16:50:25	 Adding page 1/6 with 1285 characters
16:50:25	 Adding page 2/6 with 940 characters
16:50:25	 Adding page 3/6 with 1750 characters
16:50:25	 Adding page 4/6 with 1841 characters
16:50:25	 Adding page 5/6 with 387 characters
16:50:25	 Create PDF document <Will ChatGPT and AI ...> with content of length 7007
16:50:25 Initialized PdfDocument: <Will ChatGPT and AI have an impact on Saudi workforce productivity>
16:50:25	 Generating tokenized content
16:50:25	 Generating short summary
Validation Error: 1 validation error for ShortSummary
short_summary
  Value error, The summary must be exactly one sentence. [type=value_error, input_value='{\n"short_summary": "The...arning environment."\n}', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/value_error
16:50:39	 Generating short summa

In [8]:
# Load already analyzed documents
analyzer.load_documents(os.path.join(OUTPUT_FOLDER, PROCESSED_DOC_FILENAME), load_latest=True)

17:04:27 Initialized PdfDocument: <Will ChatGPT and AI have an impact on Saudi workforce productivity>
17:04:27 Initialized PdfDocument: <AI is not smarter than humans>
17:04:27 Initialized PdfDocument: <ChatGPT>
17:04:27 Initialized PdfDocument: <ChatGPT outperforms copywriters in STEP Conference’s outdoor adverts>
17:04:27 Initialized PdfDocument: <Is the Arab world ready for the uncertain age of AI-powered web tools>
17:04:27 Initialized PdfDocument: <‘I am not here to take your job,’ ChatGPT tells Frankly Speaking host>
17:04:27 Initialized PdfDocument: <ChatGPT is the ‘Netscape moment’ for artificial intelligence’>
17:04:27 Initialized PdfDocument: <No need to demonize ChatGPT but AI regulation is a must>


## Get insights on the documents

In [9]:
# Print a summary of every loaded file (Optional: with highlights)
for doc in analyzer:
    print(doc, end="\n"+"- "*50+"\n"*2)

Title: Will ChatGPT and AI have an impact on Saudi workforce productivity
Short Summary: The article explores how ChatGPT and AI technologies are reshaping Saudi workforce productivity via tailored training, online courses, collaboration enhancement, upskilling, reskilling, and knowledge management, yet acknowledges potential job displacement due to automation and underscores the necessity for strategic implementation considering operational challenges.
Summary:
1. **AI technologies like ChatGPT are revolutionizing global workforces, providing an opportunity to boost productivity in Saudi Arabia.**
2. **ChatGPT's popularity has alleviated fears among employees about job replacement by AI, emphasizing the potential for collaboration between humans and machines.**
3. **AI can significantly impact career-related skills through tailored training programs, access to online courses, and fostering team collaboration.**
4. **In the public sector, healthcare, transportation, energy, finance, an

### Export the files as word-docx and markdown files

In [10]:
# Export docx files with wordclouds
analyzer.export_docx_files()

In [11]:
# Write a markdown file for every document
analyzer.export_markdown_files()

## Analysis of all files

### Apply latent dirichlet allocation algorithm
Algorighm selects all topics out of the articles. LLM then adds a title that summarizes the topics into categories. 

Thereby, all different topics can be extracted out of **all** documents.

In [12]:
# Get the content of all documents
all_content_tokens = [doc.content_tokens for doc in analyzer.pdf_documents]


# Create a document-term matrix
vectorizer = CountVectorizer(max_df=0.95, min_df=5, stop_words='english', analyzer="word")
doc_term_matrix = vectorizer.fit_transform(all_content_tokens)

# Apply LDA
lda = LatentDirichletAllocation(n_components=20, learning_method="batch", random_state=42, n_jobs=-1)
lda.fit(doc_term_matrix)

In [31]:
# Function to generate unique topics
from pyexpat import model


def get_unique_topics(model, vectorizer, top_n=10):
    unique_topics = {}
    for idx, topic in enumerate(model.components_):
        # Get the top features for the topic
        top_features = tuple(vectorizer.get_feature_names_out()[i] for i in topic.argsort()[:-top_n - 1:-1])
        
        # Use the tuple as a key to ensure uniqueness
        if top_features not in unique_topics:
            unique_topics[top_features] = idx

    return unique_topics

# Generate a title for each unique topic
def generate_topic_titles(llm, unique_topics):
    titles = {}
    
    for features, idx in unique_topics.items():
        # Create a prompt with the top features
        prompt = (
        "Generate a concise and meaningful title, exactly four words long, that summarizes the following features. "
        "The title should capture the main theme or topic of these features. "
        "Example outputs: 'Language Revolution', 'Shaping Future Technology Trends', 'Global Knowledge Network'. Features: "
        f"{', '.join(features)}"
        )
        
        # Use the LLM to generate a title
        titles[idx] = llm.ollama.generate(model=llm.model, prompt=prompt)["response"]
    return titles


# Get unique topics and their titles
unique_topics = get_unique_topics(lda, vectorizer, top_n=20)
topic_titles = generate_topic_titles(llm, unique_topics)


analyzer.analysis["LDA"] = {topic_titles[idx].replace('"', ""): topics for idx, topics in zip(topic_titles, unique_topics)}

# Print unique topics and their generated titles
for idx, (title, topics) in enumerate(analyzer.analysis["LDA"].items()):

    print(f"Topic {idx} - {title}:")
    print(f"Features: {', '.join(topics)}")

Topic 0 - Global Language Expansion: Google's Role in World Education:
Features: write, language, google, government, help, include, internet, know, large, need, learn, learning, like, likely, look, world, generative, express, explain, ect
Topic 1 - Language Technology's Global Impact: A New Model for News and Data Retrieval:
Features: datum, language, say, model, replace, news, large, technology, information, include, impact, time, internet, task, arab, base, new, answer, openai, likely
Topic 2 - Transforming Saudi Future: Technology's Impact and Opportunities:
Features: technology, impact, say, help, believe, like, time, saudi, task, world, explain, understand, opportunity, replace, likely, need, look, ect, express, rst
Topic 3 - Tech Giants' Collaborative Knowledge Initiatives:
Features: say, google, answer, base, arab, microsoft, way, power, openai, people, program, time, set, university, replace, government, look, help, learn, point
Topic 4 - Transforming Global Communication: AI 

### Apply Term frequency inverse term frequency
This model iterates over each document and returns those words, that do not appear often in other documents. The top n words are then used to create a topic for every article!

- A high TF-IDF score (FROM_LOW_TO_HIGH = False) indicates that a word is both important within a document and rare across all document.
- A low TF-IDF score suggests that a word is either common in the document but rare overall, or vice versa.

By analyzing TF-IDF scores for a set of words, you can identify:
Important keywords in a document
Rare or unique words that distinguish one document from another
Words with varying levels of importance across different documents


In [39]:
# Create a TfidfVectorizer object
tfidf_vectorizer = TfidfVectorizer(stop_words='english', norm="l2", analyzer="word", min_df=3)

# Fit and transform the documents into a TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(all_content_tokens)

# Get the feature names (i.e., words)
feature_names = tfidf_vectorizer.get_feature_names_out()

# Convert the TF-IDF matrix to a DataFrame for better readability
df = pd.DataFrame(tfidf_matrix.toarray(), columns=feature_names)

# Show the TF-IDF values for each term in each document
#print("TF-IDF Matrix:")
#print(df)

# Display the most important words (top N) for each document
TOP_N = 15
FROM_LOW_TO_HIGH = False

analyzer.analysis["TFIDF"] = dict()

for i, row in df.iterrows():
    doc = analyzer.pdf_documents[i]
    print(f"\nTop {TOP_N} terms for Document {i + 1}: <{doc.title}>")
          
    # Generate a title for each unique topic
    top_terms = row.sort_values(ascending=FROM_LOW_TO_HIGH).head(TOP_N*2)
    
    indices = top_terms.index
    values = top_terms.values
    
    # Create a prompt with the top features
    prompt = (
        "Generate a concise and meaningful title, exactly four words long, that summarizes the following features. "
        "The title should capture the main theme or topic of these features. "
        "Example outputs: 'Language Revolution', 'Shaping Future Technology Trends', 'Global Knowledge Network'. Features: "
        f"{'\n'.join([f'{str(indices[i])} - {values[i]}' for i in range(len(indices))])}"
        )
            
    # Use the LLM to generate a title
    title = llm.ollama.generate(model=llm.model, prompt=prompt)["response"]
    
    analyzer.analysis["TFIDF"].update({doc: {"title": title, "terms": top_terms[:TOP_N]}})
    
    print(title, "\n", ", ".join(top_terms[:TOP_N].index))


Top 15 terms for Document 1: <Will ChatGPT and AI have an impact on Saudi workforce productivity>
"AI-Driven Workforce Transformation in Saudi Arabia: Impact, Training, and Innovation Opportunities" 
 ai, organization, impact, chatgpt, worker, percent, say, workforce, require, training, believe, saudi, technology, add, explain

Top 15 terms for Document 2: <AI is not smarter than humans>
"AI-Driven Marketing Assistant: Transforming Business and Human Experience in the Digital Age" 
 technology, ai, marketing, business, help, like, experience, able, life, human, time, assistant, day, di, campaign

Top 15 terms for Document 3: <ChatGPT>
"AI-Driven Linguistic Transformation: OpenAI's Technological Impact on Global Society" 
 ai, chatgpt, human, platform, tool, technology, language, society, development, change, datum, https, openai, use, view

Top 15 terms for Document 4: <ChatGPT outperforms copywriters in STEP Conference’s outdoor adverts>
"AI-Driven Chatbot for Human Job Content Creat

In [40]:
content = json.dumps({doc.title: doc.content for doc in analyzer})

# Iterates over each question, provides answers to LLM and let them summarize
for question in questions:
    content = {doc.title: doc.answers.get(question) for doc in analyzer}
    response = llm.answer_question(text=json.dumps(content), question=question, multiple_articles=True)
    analyzer.analysis[question] = response

17:23:16	 Answering question <How do the media in this artic...>
17:23:25	 Answering question <Which role does or might the A...>
17:23:29	 Answering question <Which use cases of Artificial ...>
17:23:36	 Answering question <What is the final message of t...>


In [101]:
answers_questions = dict()
# Iterates over each question, provides answers to LLM and let them summarize
topic_question_all = (
    "Attached are the topics of every article. "
    "What **perspectives and aspects** are being widely covered? Which aspects are being ignored? "
    "In your answer consider topics such as, but not only, data privacy, costs/affordability, know-how, complexity, accuracy, accessibility, bias (towards age, gender, religion, sexuality), risks, opportunity, perception, limitations."
    "These are the topics of all arcticles: ")

# Example usage:
topic_clusters = {doc.title: [value for value in doc.topic_clusters.values()] for doc in analyzer}
content_topics = flatten_dict(topic_clusters)
response = llm.answer_question(text=json.dumps(content_topics), question=topic_question_all, multiple_articles=True)
analyzer.analysis["topic_question"] = response

17:41:39	 Answering question <Attached are the topics of eve...>


In [102]:
lda = analyzer.analysis.get("LDA")
print(f"Latent Dirichlet Allocation topics\n\t{'\n\t'.join(lda.keys())}")


tfidf = [elem.get("title") for elem in flatten_list(analyzer.analysis.get("TFIDF").values())]
print(f"TDIF for every article\n\t{'\n\t'.join(tfidf)}")


for question in questions + ["topic_question"]:
    answer = analyzer.analysis.get(question)
    print(f"\n{answer.get("question")}\n\t{answer.get("answer")}\nReason: {answer.get("reasoning")}")

Latent Dirichlet Allocation topics
	Global Language Expansion: Google's Role in World Education
	Language Technology's Global Impact: A New Model for News and Data Retrieval
	Transforming Saudi Future: Technology's Impact and Opportunities
	Tech Giants' Collaborative Knowledge Initiatives
	Transforming Global Communication: AI and Data Revolution
	Revolutionizing Communication: AI-Powered Language Tools and Their Global Impact
	Tech-Driven Linguistic Revolution: Saudi's Language Innovation
TDIF for every article
	"AI-Driven Workforce Transformation in Saudi Arabia: Impact, Training, and Innovation Opportunities"
	"AI-Driven Marketing Assistant: Transforming Business and Human Experience in the Digital Age"
	"AI-Driven Linguistic Transformation: OpenAI's Technological Impact on Global Society"
	"AI-Driven Chatbot for Human Job Content Creation: A Future Trend"
	"AI-Powered Chatbots: Global Knowledge Networking & Human-Tech Interaction"
	"AI-Driven Journalism: Revolutionizing News Accura