# Setup and Imports

In [1]:
!pip3 -qq install -r ../requirements.txt

In [2]:
import requests
import pandas as pd
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta
import re
from bs4 import BeautifulSoup
from fuzzywuzzy import fuzz
import spacy
import keras
from keybert import KeyBERT
from langdetect import detect
import textwrap
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import openai
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage

  from tqdm.autonotebook import tqdm, trange





In [3]:
!python -m spacy download en

[38;5;3m⚠ As of spaCy v3.0, shortcuts like 'en' are deprecated. Please use the
full pipeline package name 'en_core_web_sm' instead.[0m
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     --------------------------------------- 0.0/12.8 MB 131.3 kB/s eta 0:01:38
     --------------------------------------- 0.0/12.8 MB 163.8 kB/s eta 0:01:18
     --------------------------------------- 0.0/12.8 MB 195.7 kB/s eta 0:01:06
     --------------------------------------- 0.1/12.8 MB 374.1 kB/s eta 0:00:34
      -------------------------------------- 0.2/12.8 MB 615.9 kB/s eta 0:00:21
      -------------------------------------- 0.2/12.8 MB 758.5 kB/s eta 0:00:17
     - ------------------------------------- 0.4/12.8 MB 967.1 


[notice] A new release of pip is available: 24.0 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
load_dotenv()

NEWS_API_KEY = os.getenv("NEWS_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# Data Ingestion (Pipeline)

In [5]:
class NewsAPILoader:

    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "https://newsapi.org/v2/everything"

    def fetch_news(self, query, from_date=None, to_date=None, page_size=10, language="en", sort_by="publishedAt"):
        """
        Fetches news articles from NewsAPI based on the given parameters.
        
        Parameters:
            query (str): Search query string (e.g., 'AI', '"Elon Musk"', 'crypto AND bitcoin NOT ethereum').
            from_date (str): Start date for news articles (format: YYYY-MM-DD).
            to_date (str): End date for news articles (format: YYYY-MM-DD).
            page_size (int): Number of articles to retrieve per request (max 100).
            language (str): Language of articles (default: 'en').
            sort_by (str): Sorting order ('relevancy', 'popularity', 'publishedAt').
        
        Returns:
            list: List of retrieved news articles.
        """
        params = {
            "q": query,  # Supports Boolean operators like AND, OR, NOT
            "apiKey": self.api_key,
            "pageSize": page_size,
            "language": language,
            "sortBy": sort_by,
        }
        
        # Handling optional date filters
        if from_date:
            params["from"] = from_date
        if to_date:
            params["to"] = to_date

        try:
            response = requests.get(self.base_url, params=params)
            
            response.raise_for_status()  # Raising an error for HTTP issues

            data = response.json()

            if data["status"] == "ok":
                return data["articles"]
            else:
                print(f"Error: {data.get('message', 'Unknown error')}")
                
                return None
        except requests.exceptions.RequestException as e:
            print(f"API Request Failed: {e}")
            
            return None

    def save_to_csv(self, articles, filename="news_data.csv"):
        """
        Saves news articles to a CSV file.

        Parameters:
            articles (list): List of article dictionaries.
            filename (str): Output CSV filename.
        """

        if len(articles) == 0:
            print("No articles found!")
        
            return

        # Extracting relevant fields
        data = []
        
        for keyword_based_articles in articles:
        
            for article in keyword_based_articles:
                data.append({
                    "Title": article.get("title", ""),
                    "Content": article.get("content", ""),  # Full content if available
                    "Description": article.get("description", ""),
                    "URL": article.get("url", ""),
                    "Source": article["source"]["name"] if article.get("source") else "Unknown",
                    "Published Date": article.get("publishedAt", ""),
                    "Image URL": article.get("urlToImage", ""),
                    "Author": article.get("author", "Unknown"),
                })

        # Converting to DataFrame and save
        df = pd.DataFrame(data)
        df.to_csv(filename, index=False)
        
        print(f"Data saved to {filename}")

In [6]:
class DataIngestionPipeline:
    
    def __init__(self, api_key):
        self.api_loader = NewsAPILoader(api_key)
        # self.file_loader = FileLoader()

    def ingest_data(self, keywords=None, file_path=None):
        
        if len(keywords) > 0:
            articles = []

            for query in keywords:
            
                if query:
                    from_date = (datetime.now() - timedelta(days=3)).strftime("%Y-%m-%d")
                    articles.append(self.api_loader.fetch_news(query=query, from_date=from_date, page_size=5))

                # elif file_path:
                #     return self.file_loader.load_text(file_path)
            
            # Saving to CSV
            self.api_loader.save_to_csv(articles, f"../data/news_data.csv")


In [7]:
data_ingestion_pipeline = DataIngestionPipeline(NEWS_API_KEY)

user_query = "What has been the impact of deepseek on the stock markets worldwide?"

kw_model = KeyBERT()

keywords = [item[0] for item in kw_model.extract_keywords(user_query, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=5)]

data_ingestion_pipeline.ingest_data(keywords=keywords)
# data_ingestion_pipeline.ingest_data(file_path="sample.pdf")

Data saved to ../data/news_data.csv


In [8]:
# For, testing
data = pd.read_csv("../data/news_data.csv")

data.head()
# data.shape
# data["Content"][0]

Unnamed: 0,Title,Content,Description,URL,Source,Published Date,Image URL,Author
0,Alibaba denies it has made $1 billion investme...,"On Feb. 7, reports circulated online that Alib...","On Feb. 7, reports circulated online that Alib...",http://technode.com/2025/02/08/alibaba-denies-...,TechNode,2025-02-08T09:38:00Z,https://i0.wp.com/technode.com/wp-content/uplo...,TechNode Feed
1,Apple Stock Jumps on Artificial Intelligence (...,The news of DeepSeek's launch last week spread...,The news of DeepSeek's launch last week spread...,https://biztoc.com/x/519f28455f9d8f33,Biztoc.com,2025-02-08T09:35:30Z,https://biztoc.com/cdn/808/og.png,aol.com
2,Apple Stock Jumps on Artificial Intelligence (...,"If you click 'Accept all', we and our partners...",,https://consent.yahoo.com/v2/collectConsent?se...,Yahoo Entertainment,2025-02-08T09:05:00Z,,
3,Stock-market concentration rivals 1929 and 199...,How much longer can the Magnificent Seven stoc...,How much longer can the Magnificent Seven stoc...,https://www.businessinsider.com/stock-market-c...,Business Insider,2025-02-08T09:00:01Z,https://i.insider.com/67a667f06630eb10385bf7fe...,wedwards@businessinsider.com (William Edwards)
4,"Cloudastructure, Inc. Class A Common Stock (CS...",We recently compiled a list of the 10 AI News ...,We recently compiled a list of the 10 AI News ...,https://finance.yahoo.com/news/cloudastructure...,Yahoo Entertainment,2025-02-08T07:24:19Z,https://s.yimg.com/ny/api/res/1.2/jES.4QWlDHc9...,Affan Mir


# Data Preparation (Pipeline)

In [10]:
class DataPreparationPipeline:
    
    def __init__(self, file_paths):
        self.file_paths = file_paths
        self.nlp = spacy.load("en_core_web_sm")
        self.kw_model = KeyBERT()

    def clean_text(self, text):

        if pd.isna(text):
            return ""

        # text = BeautifulSoup(text, "html.parser").get_text()  # If you still need HTML parsing
        text = re.sub(r'\s+', ' ', text)  # Removing extra whitespace
        text = re.sub(r'\[.*?\]', '', text)  # Removing text in square brackets
        text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Removing URLs

        # Removing words ending in 3 or more dots, ellipses, or standalone dots
        text = re.sub(r'\b\w*\.\.\.+\b', '', text)  
        text = re.sub(r'\b\w*\…\b', '', text)       
        text = re.sub(r'\s*\.{2,}\s*', ' ', text)   
        text = re.sub(r'\b\w*…\w*\b', '', text)     

        words = text.split()  # Splitting into words
        filtered_words = [word for word in words if not re.search(r'\.\.\.|…', word)] 
        text = ' '.join(filtered_words)  # Reconstructing sentence

        import string

        # Removing punctuation
        text = text.translate(str.maketrans('', '', string.punctuation))

        return text.lower().strip()

    def remove_duplicates(self, df):
        """Removes duplicate articles based on name, URL, and publisher."""

        # Creating a combined key for comparison
        df['combined_key'] = df['Title'].astype(str) + df['URL'].astype(str) + df['Author'].astype(str)

        # Droping duplicates based on the combined key, keeping the first occurrence
        df_deduplicated = df.drop_duplicates(subset='combined_key', keep='first')

        # Removing the temporary combined key column
        df_deduplicated = df_deduplicated.drop(columns=['combined_key'])

        return df_deduplicated
    
    def detect_language(self, text):
        try:
            return detect(text)
        except:
            return "unknown"

    def extract_entities(self, text):
        doc = self.nlp(text)

        return {
            "Persons": [ent.text for ent in doc.ents if ent.label_ == "PERSON"],
            "Organizations": [ent.text for ent in doc.ents if ent.label_ == "ORG"],
            "Locations": [ent.text for ent in doc.ents if ent.label_ in ["GPE", "LOC"]]
        }

    def extract_keywords(self, text):
        return self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words="english", top_n=5)

    def chunk_text(self, text, chunk_size=512, overlap=50):
        words = text.split()

        return [" ".join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size - overlap)]

    def data_saving(self, df):
    
        for file_path in self.file_paths:

            if os.path.exists(file_path):
            
                try:
                    os.remove(file_path)
                except OSError as e:
                    print(f"Error deleting file '{file_path}': {e}")
            
            if file_path.endswith(".json"):
                df.to_json(file_path, orient="records", indent=4)
            else:
                df.to_csv(file_path, index=False)

    def prepare_data(self, df):
        df = self.remove_duplicates(df)

        print(f"✅ Deduplication Completed! {len(df)} unique articles remain. Remaining articles: {len(df)}")
        
        df["Language"] = df["Content"].apply(self.detect_language)
        df = df[df["Language"] == "en"]

        print(f"✅ Language Filtering Completed! Remaining articles: {len(df)}")
        
        df["Entities"] = df["Content"].apply(self.extract_entities)
        df["Keywords"] = df["Content"].apply(self.extract_keywords)

        print(f"✅ Named Entity Recognition (NER) & Topic Extraction Completed! Remaining articles: {len(df)}")
        
        df["Chunks"] = df["Content"].apply(self.chunk_text)

        print(f"✅ Chunking Completed! Remaining articles: {len(df)}")
        
        df["Metadata"] = df.apply(lambda row: {
            "Source": row["Source"],
            "Published Date": row["Published Date"],
            "Keywords": row["Keywords"],
            "Entities": row["Entities"]
        }, axis=1)

        print(f"✅ Annotation Completed! Remaining articles: {len(df)}")

        df["Title"] = df["Title"].apply(self.clean_text)
        df["Content"] = df["Content"].apply(self.clean_text)
        df["Description"] = df["Description"].apply(self.clean_text)

        print(f"✅ Text Cleaning Completed! Remaining articles: {len(df)}")
        
        self.data_saving(df)

        print(f"✅ Data Preparation Completed! Remaining articles: {len(df)}")

        return df

In [11]:
file_paths = [
    "../data/processed_news_data.json",
    "../data/processed_news_data.csv"
]

In [12]:
data_preparation_pipeline = DataPreparationPipeline(file_paths)

processed_data = data_preparation_pipeline.prepare_data(data)

✅ Deduplication Completed! 17 unique articles remain. Remaining articles: 17
✅ Language Filtering Completed! Remaining articles: 17
✅ Named Entity Recognition (NER) & Topic Extraction Completed! Remaining articles: 17
✅ Chunking Completed! Remaining articles: 17
✅ Annotation Completed! Remaining articles: 17
✅ Text Cleaning Completed! Remaining articles: 17
✅ Data Preparation Completed! Remaining articles: 17


In [13]:
# For, testing
processed_data.head()

processed_data["Content"][0]

'on feb 7 reports circulated online that alibaba was looking to invest 1 billion in deepseek to take a 10 stake valuing the ai company at 10 billion and potentially forging a strong partnership'

# Data Embedding and Indexing (Pipeline)

In [28]:
data = pd.read_json("../data/processed_news_data.json")

data.head()

Unnamed: 0,Title,Content,Description,URL,Source,Published Date,Image URL,Author,Language,Entities,Keywords,Chunks,Metadata
0,alibaba denies it has made 1 billion investmen...,on feb 7 reports circulated online that alibab...,on feb 7 reports circulated online that alibab...,http://technode.com/2025/02/08/alibaba-denies-...,TechNode,2025-02-08T09:38:00Z,https://i0.wp.com/technode.com/wp-content/uplo...,TechNode Feed,en,"{'Persons': [], 'Organizations': ['AI'], 'Loca...","[[billion deepseek, 0.6047], [deepseek, 0.5423...","[On Feb. 7, reports circulated online that Ali...","{'Source': 'TechNode', 'Published Date': '2025..."
1,apple stock jumps on artificial intelligence a...,the news of deepseeks launch last week spread ...,the news of deepseeks launch last week spread ...,https://biztoc.com/x/519f28455f9d8f33,Biztoc.com,2025-02-08T09:35:30Z,https://biztoc.com/cdn/808/og.png,aol.com,en,"{'Persons': [], 'Organizations': ['DeepSeek', ...","[[deepseek launch, 0.6475000000000001], [deeps...",[The news of DeepSeek's launch last week sprea...,"{'Source': 'Biztoc.com', 'Published Date': '20..."
2,apple stock jumps on artificial intelligence a...,if you click accept all we and our partners in...,,https://consent.yahoo.com/v2/collectConsent?se...,Yahoo Entertainment,2025-02-08T09:05:00Z,,,en,"{'Persons': ['Consent Framework'], 'Organizati...","[[access information, 0.5177], [iab transparen...","[If you click 'Accept all', we and our partner...","{'Source': 'Yahoo Entertainment', 'Published D..."
3,stockmarket concentration rivals 1929 and 1999...,how much longer can the magnificent seven stoc...,how much longer can the magnificent seven stoc...,https://www.businessinsider.com/stock-market-c...,Business Insider,2025-02-08T09:00:01Z,https://i.insider.com/67a667f06630eb10385bf7fe...,wedwards@businessinsider.com (William Edwards),en,"{'Persons': ['Glenmede'], 'Organizations': [],...","[[seven stocks, 0.6125], [stocks continue, 0.5...",[How much longer can the Magnificent Seven sto...,"{'Source': 'Business Insider', 'Published Date..."
4,cloudastructure inc class a common stock csai ...,we recently compiled a list of the 10 ai news ...,we recently compiled a list of the 10 ai news ...,https://finance.yahoo.com/news/cloudastructure...,Yahoo Entertainment,2025-02-08T07:24:19Z,https://s.yimg.com/ny/api/res/1.2/jES.4QWlDHc9...,Affan Mir,en,"{'Persons': [], 'Organizations': ['Cloudastruc...","[[nasdaq csai, 0.5572], [stock nasdaq, 0.5526]...",[We recently compiled a list of the 10 AI News...,"{'Source': 'Yahoo Entertainment', 'Published D..."


### Utility function(s)

In [56]:
def checking_for_file_existence(file_path):
    
    if os.path.exists(file_path):
            
        try:
            os.remove(file_path)
        except OSError as e:
            print(f"Error deleting file '{file_path}': {e}")

In [60]:
class EmbeddingGenerator:
    
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def generate_embedding(self, text):
        return self.model.encode(text, convert_to_numpy=True)

class VectorDatabase:
    
    def __init__(self, embedding_dim=384, index_file="../data/faiss_index.bin", metadata_file="../data/metadata.json"):
        self.index = faiss.IndexFlatL2(embedding_dim)  # L2 (Euclidean) distance index
        self.metadata = []
        
        checking_for_file_existence(index_file)
        
        self.index_file = index_file
        
        checking_for_file_existence(metadata_file)
        
        self.metadata_file = metadata_file

    def add_embedding(self, embedding, metadata):
        """Adds a new embedding and its metadata."""

        self.index.add(np.array([embedding], dtype=np.float32))
        self.metadata.append(metadata)

    def search(self, query_embedding, top_k=5):
        """Performs similarity search and retrieves top_k results."""
        
        distances, indices = self.index.search(np.array([query_embedding], dtype=np.float32), top_k)
        
        return [(self.metadata[i], distances[0][j]) for j, i in enumerate(indices[0]) if i < len(self.metadata)]

    def save_index(self):
        """Saves FAISS index and metadata to disk."""
        
        faiss.write_index(self.index, self.index_file)
        
        import json

        with open(self.metadata_file, "w") as f:
            json.dump(self.metadata, f, indent=4)
        
        print(f"✅ FAISS index saved to {self.index_file}")
        print(f"✅ Metadata saved to {self.metadata_file}")

    def load_index(self):
        """Loads FAISS index and metadata from disk."""
        
        if os.path.exists(self.index_file):
            self.index = faiss.read_index(self.index_file)
            print(f"✅ FAISS index loaded from {self.index_file}")
        else:
            print(f"⚠️ No FAISS index found at {self.index_file}")

        if os.path.exists(self.metadata_file):
            import json
            
            with open(self.metadata_file, "r") as f:
                self.metadata = json.load(f)
            print(f"✅ Metadata loaded from {self.metadata_file}")
        else:
            print(f"⚠️ No metadata found at {self.metadata_file}")

In [61]:
class DataEmbeddingPipeline:
    
    def __init__(self, model_name="all-MiniLM-L6-v2", embedding_dim=384):
        self.embedder = EmbeddingGenerator(model_name)
        self.vector_db = VectorDatabase(embedding_dim)
        self.vector_db.load_index()  # Loading index on initialization

    def process_and_store(self, articles):
        """Processes articles, extracts relevant information, generates embeddings, and stores them."""
        
        for article in articles:
            # Constructing a text input with relevant fields
            text = f"{article.get('Title', '')} {article.get('Content', '')} {article.get('Description', '')}"
            
            # Extracting keywords and entities
            keywords = [kw[0] for kw in article.get("Keywords", [])]
            persons = article.get("Entities", {}).get("Persons", [])
            organizations = article.get("Entities", {}).get("Organizations", [])
            locations = article.get("Entities", {}).get("Locations", [])

            # Generating the embedding
            embedding = self.embedder.generate_embedding(text)

            # Storing data
            data = {
                "Source": article.get("Source", ""),
                "Published Date": article.get("Published Date", ""),
                "Keywords": keywords,
                "Entities": {
                    "Persons": persons,
                    "Organizations": organizations,
                    "Locations": locations
                },
                "URL": article.get("URL", ""),
                "Title": article.get('Title', ''),
                "Content": article.get("Content", ''),
                "Description": article.get("Description", '')
            }
            self.vector_db.add_embedding(embedding, data)

        self.vector_db.save_index()  # Saving index after processing

    def retrieve_similar_articles(self, query_text, top_k=5):
        """Retrieves similar articles based on semantic similarity."""
        
        query_embedding = self.embedder.generate_embedding(query_text)
        
        return self.vector_db.search(query_embedding, top_k)

In [62]:
embedding_pipeline = DataEmbeddingPipeline()

# Loading sample articles from JSON file
import json

with open("../data/processed_news_data.json", "r", encoding="utf-8") as f:
    articles = json.load(f)

embedding_pipeline.process_and_store(articles)  # Storing embeddings

query = "AI revolution in China"

results = embedding_pipeline.retrieve_similar_articles(query, top_k=5)

retrieved_articles = []

# For, testing
print("\n🔍 Search Results:")
for data, distance in results:
    retrieved_articles.append((data, distance))
    
    # print(metadata) # For, testing

    print(f"📰 Source: {data['Source']}")
    print(f"🌍 Published: {data['Published Date']}")
    print(f"🔗 URL: {data['URL']}")
    print(f"🎯 Similarity Score: {distance:.4f}\n")

⚠️ No FAISS index found at ../data/faiss_index.bin
⚠️ No metadata found at ../data/metadata.json
✅ FAISS index saved to ../data/faiss_index.bin
✅ Metadata saved to ../data/metadata.json

🔍 Search Results:
📰 Source: YLE News
🌍 Published: 2025-02-08T09:59:57Z
🔗 URL: https://yle.fi/a/74-20142384
🎯 Similarity Score: 0.9385

📰 Source: Fast Company
🌍 Published: 2025-02-08T10:00:00Z
🔗 URL: https://www.fastcompany.com/91273725/deepseek-benefit-apple-intelligence-china-ai-race-openai-google-microsoft-meta
🎯 Similarity Score: 1.0076

📰 Source: Yahoo Entertainment
🌍 Published: 2025-02-08T09:05:00Z
🔗 URL: https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_713cb470-d013-43f6-94ee-23b0db69939c
🎯 Similarity Score: 1.4240

📰 Source: Yahoo Entertainment
🌍 Published: 2025-02-08T07:24:19Z
🔗 URL: https://finance.yahoo.com/news/cloudastructure-inc-class-common-stock-072419300.html
🎯 Similarity Score: 1.4261

📰 Source: Biztoc.com
🌍 Published: 2025-02-08T09:35:30Z
🔗 URL: https://biztoc.com/x/

# Retrieval

**Note:** Already done within the `Data Ingestion` and `Data Embeddings and Indexing Pipelines`

# Summarization (Pipeline)

In [66]:
class SummarizationPipeline:

    def __init__(self, model_name="gpt-4o", api_key=None):
        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
    
        if not self.api_key:
            raise ValueError("❌ OpenAI API key is required!")

        self.llm = ChatOpenAI(model_name=model_name, openai_api_key=self.api_key)

        # Defining a prompt template for summarization
        self.prompt_template = PromptTemplate.from_template(
            '''You are a professional journalist summarizing news articles. 
            - Summarize each article (provided in the form of a list; the list contains the title, content and description of each of the articles) in **3-4 sentences**, keeping key details.
            - At the end, provide a **brief summary** of all the articles.
            - Draw **insights into future trends** based on the summarized content.
            
            ### **Example Response Format**:
            
            **Article 1: (Title of it)**
            📌 summary_1
            
            **Article 2: (Title of it)**
            📌 summary_2
            
            ...
            
            ### **Combined Summary & Future Trends**
            📊 combined_summary
            📈 future_trends
            
            Now, summarize the following articles accordingly:
            
            {article_text}
            '''
        )

    def summarize_article(self, article_text):
        """Generate a summary for a single news article."""
        prompt = self.prompt_template.format(article_text=article_text)
        response = self.llm([HumanMessage(content=prompt)])
        
        return response.content.strip()

    def summarize_retrieved_articles(self, retrieved_articles):
        """Generate summaries for multiple retrieved articles."""
        summaries = {}

        texts = []

        for data, score in retrieved_articles: 
            texts.append(f"Title: {data.get('Title', '')}, Content:{data.get('Content', '')} and Description:{data.get('Description', '')}")  

            # print(text) # For, testing
        
        return self.summarize_article(texts)

In [68]:
summarizer = SummarizationPipeline(api_key=OPENAI_API_KEY)  

summaries = summarizer.summarize_retrieved_articles(retrieved_articles)

from titlecase import titlecase
  
print("\n📜 **Summarized Articles:**")
print(summaries)


📜 **Summarized Articles:**
**Article 1: AI Expert Explores Finland’s Role in Global Tech Race**  
📌 Finland is positioning itself in the global AI competition by leveraging innovation that caters to its unique language and culture. Peter Sarlin, founder of Silo AI, emphasizes the importance of sovereignty, innovation, and education in enhancing Finland's competitiveness on the global stage. Finland's approach highlights the strategic use of cultural and linguistic assets to carve out a niche in the AI sector.

**Article 2: How Apple Could Work with DeepSeek to Pull Ahead in the AI Race**  
📌 The emergence of Chinese startup DeepSeek has sparked discussions about its impact on the AI strategies of major US companies like OpenAI, Microsoft, Google, and Meta. DeepSeek's accomplishments could potentially influence Apple’s strategy to collaborate or compete in the AI sector. There is speculation about how Apple might leverage partnerships to maintain its edge in the rapidly evolving AI lan

Propagation delay:
\(d_{prop} = \frac{\text{distance}}{\text{speed}} = \frac{2 \times 10^7}{2.5 \times 10^8} = 0.08 \text{ sec}\)

Bandwidth-delay product:
\(R \times d_{prop} = 5 \times 10^6 \times 0.08 = 400,000 \text{ bits}\)

Bit width:
\(\text{Bit width} = \frac{\text{speed}}{\text{rate}} = \frac{2.5 \times 10^8}{5 \times 10^6} = 50 \text{ meters}\)

Alternatively,
\(\text{Bit width} = \frac{s}{R}\)

Transmission delay:
\(\text{Transmission delay} = \frac{L}{R} = \frac{800,000}{5 \times 10^6} = 0.16 \text{ sec}\)

Total time:
\(0.16 \text{ sec}\)

End-to-end delay:
\(\text{End-to-end delay} = \frac{175}{100} = 1.75 \text{ hours}\)

For a 500-byte packet:
\(T_{trans} = \frac{500 \times 8}{R}\)

For a 125-byte packet:
\(T_{trans} = \frac{125 \times 8}{R}\)

Propagation delay formula:
\(T_{prop} = \frac{d}{s}\)

For Link 1:
\(T_{prop_{A \to B}}\)

For Link 2:
\(T_{prop_{B \to C}}\)

Total time:
\(T_{total} = T_{trans_{A \to B}} + T_{prop_{A \to B}} + T_{trans_{B \to C}} + T_{prop_{B \to C}}\)

