# Trending Wikipedia articles with current news context

- Removed LangChain dependency
- Updated wikipedia call to be an official API call vs their featured_feed call which would periodically return without the `mostread` section
- Compare results between `claude-3-7-sonnet` and `gpt-4o-mini`

In [None]:
# !pip install anthropic openai

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

SERPER_API_KEY = os.environ["SERPER_API_KEY"] 
ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"] 
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] 
WIKIPEDIA_APP_NAME = os.environ["WIKIPEDIA_APP_NAME"] 

### Set date to query

In [None]:
# Python 3

# Get today's date in YYYY/MM/DD format.
from datetime import datetime

today = datetime.now()

# date_to_query = today - datetime.timedelta(days=1)
date_to_query = today



### Get feed data from date specified

create an account here: https://api.wikimedia.org/wiki/Special:CreateAccount

In [None]:
import requests
import json

language_code = 'en' # English
headers = {
  #'Authorization': 'Bearer YOUR_ACCESS_TOKEN',
  'User-Agent': f"""YOUR_APP_NAME ({WIKIPEDIA_APP_NAME})"""
}

base_url = 'https://api.wikimedia.org/feed/v1/wikipedia/'
url = base_url + language_code + '/featured/' + date_to_query.strftime('%Y/%m/%d')
response = requests.get(url, headers=headers)

featured_feed = response.json()
feed_date_str = featured_feed["mostread"]["date"].rstrip("Z")
print(f"Feed date string: {feed_date_str}")
feed_date = datetime.strptime(feed_date_str, '%Y-%m-%d')
print(f"Feed date: {feed_date}")
feed_date_long =  feed_date.strftime('%B %d, %Y')
print(f"Feed date long: {feed_date_long}")
print(f"API call: {url}")

print(json.dumps(featured_feed, indent=2))

### Save raw Wikipedia data to file

In [None]:

# Ensure the 'featured-feed' folder exists
file_directory = "data"
os.makedirs(file_directory, exist_ok=True)

# Define the filename based on the date
file_path = f'{file_directory}/{feed_date}.json'

# Save to JSON file (overwrite if it already exists)
with open(file_path, 'w', encoding='utf-8') as file:
    json.dump(featured_feed, file, indent=4, ensure_ascii=False)

print(f'Saved Wikipedia response to {file_path}')

### Build data structure with all relevant information and placeholders for LLM responses

In [None]:
article_list = []


for item in featured_feed['mostread']['articles']:
    title = item['title']
    normalized_title = item['titles']['normalized']
    views = item['views']
    link = item['content_urls']['desktop']['page']
    extract = item['extract_html']
    thumbnail = item.get('thumbnail', {}).get('source', None)
    view_history = item['view_history']

    article={
        'title': title,
        'summary': '',
        'normalized_title': normalized_title,
        'views': views,
        'link': link,
        'thumbnail': thumbnail,
        'extract': extract,
        'text': '',
        'anthropic_trending_reason': '',
        'openai_trending_reason': '',
        'view_history': view_history,
        'is_newly_trending': '',
        'raw_new_results': '',
        'anthropic_news_relation': '',
        'openai_news_relation': ''
    }

    article_list.append(article)

## Determine if the article is newly trending. If it is, add to new list

- Filter list to only articles with meaningful spike in views



In [None]:
def is_newly_trending(view_history):
    view_history_length = len(view_history)

    yesterdays_views = view_history[view_history_length-2]['views']
    todays_views = view_history[view_history_length-1]['views']

    return todays_views*0.4 > yesterdays_views

newly_trending_article_list = []

for article in article_list:
    newly_trending = is_newly_trending(article['view_history'])
    
    article['is_newly_trending'] = newly_trending
    
    if newly_trending:
        print(article['title'])
        print(article['is_newly_trending'])
        print(article['view_history'])
        print("")
        newly_trending_article_list.append(article)
  

### Get first 5000 characters of article

In [None]:
for article in newly_trending_article_list:
      # Download raw text of article
  url = f"https://en.wikipedia.org/w/index.php?title={article['title']}&action=raw"
  print(url)

  article_text = requests.get(url).text
  article_text_truncated = article_text[:5000]
  article['text'] =  article_text_truncated

In [None]:
import anthropic

anthropiClient = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key=ANTHROPIC_API_KEY,
)


In [None]:
from openai import OpenAI
openAIClient = OpenAI(api_key=OPENAI_API_KEY)
    

In [None]:
def extract_text_from_message(message):
    # Assuming 'message' is the Message object you provided
    # Extract the first TextBlock's text content
    if message.content and len(message.content) > 0:
        text_block = message.content[0]  # Get the first TextBlock
        if hasattr(text_block, 'text'):
            return text_block.text
    return None

In [None]:

for article in newly_trending_article_list:

    title = article['normalized_title']
    text = article['text']

    summary_prompt = f"""Act as a professional news summarizer. Based on your knowledge of {title} 
    and the following extract. In 1-2 2 concise and confident sentences, explain what this article is about:\n\n{text}"""

    message = anthropiClient.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1024,
        messages=[
            {"role": "user", "content": summary_prompt}
        ]
    )
    summary = extract_text_from_message(message)
    print(summary)
    
    article['summary'] = summary

In [None]:
# import requests
# import json

url = "https://google.serper.dev/news"

for article in newly_trending_article_list:

  title = article['normalized_title']
  payload = json.dumps({
    "q": title,
    "tbs": "qdr:w"
  })
  headers = {
    'X-API-KEY': SERPER_API_KEY,
    'Content-Type': 'application/json'
  }

  response = requests.request("POST", url, headers=headers, data=payload)

  article['raw_new_results'] = response.json()

  print(response.text)

In [None]:

llm_miss_response = "False"

for article in newly_trending_article_list:

    title = article['title']
    news = article['raw_new_results']
    print(f"Analyzing {title}")

    news_prompt = f"""Does {title} relate to any current news found in this list {news}?
     If it does not, reply with '{llm_miss_response}'
     
     If it does, reply with a consise description with no leading text. For example:

     instead of 'The xxxxxx article might be trending on Wikipedia due to ..... [reason]'
     you will return: 'reason'

     You will follow this with 3 links to relevant articles in the html format:
    <br>
    <ul>
     <li><a href="link">Title</a> snippet</li>
     <li><a href="link">Title</a> snippet</li>
     <li><a href="link">Title</a> snippet</li>
    </ul>
     """

    anthropicNewsRelation = anthropiClient.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1024,
        messages=[
            {"role": "user", "content": news_prompt}
        ]
    )
    anthropicNewsResponse = extract_text_from_message(anthropicNewsRelation)
    print(anthropicNewsResponse)
    
    article['anthropic_news_relation'] =  anthropicNewsResponse



    

In [None]:

llm_miss_response = "False"

for article in newly_trending_article_list:

    title = article['title']
    news = article['raw_new_results']
    print(f"Analyzing {title}")

    news_prompt = f"""Does {title} relate to any current news found in this list {news}?
     If it does not, reply with '{llm_miss_response}'
     
     If it does, reply with a consise description with no leading text. For example:

     instead of 'The xxxxxx article might be trending on Wikipedia due to ..... [reason]'
     you will return: 'reason'

     You will follow this with 3 links to relevant articles in the html format:
    <br>
    <ul>
     <li><a href="link">Title</a> snippet</li>
     <li><a href="link">Title</a> snippet</li>
     <li><a href="link">Title</a> snippet</li>
    </ul>
     """

    openAINewsRelation = openAIClient.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
        "role": "user",
        "content": news_prompt
        }
    ],
    temperature=1,
    max_tokens=2048,
    top_p=1
    )

    openAINewsResponse = openAINewsRelation.choices[0].message.content

    print(openAINewsResponse)

    article['openai_news_relation'] =  openAINewsResponse



    

In [None]:

for article in newly_trending_article_list:
    title = article['title']

    print(f"Analyzing {title}")

    summary_and_trending_reason_prompt = f"""
        Based on the article extract: {article['extract']}
        and the news relation: {article['anthropic_news_relation'] }
        Give a 2-3 sentence description of why it is trending.
        Do not include any introductory phrases like 'Based on the provided article extract and news links,'
        Begin with something concise like '{title} is trending because'
        If there is no definitive answer, give a short summary of what you tried that didn't work.
    """

    articleSummaryTrendingReason = anthropiClient.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=1024,
        messages=[
            {"role": "user", "content": summary_and_trending_reason_prompt}
        ]
    )
    trendingResponse = extract_text_from_message(articleSummaryTrendingReason)
    print(trendingResponse)
    
    article['anthropic_trending_reason'] =  trendingResponse

    

In [None]:

for article in newly_trending_article_list:
    title = article['title']

    print(f"Analyzing {title}")

    summary_and_trending_reason_prompt = f"""
        Based on the article extract: {article['extract']}
        and the news relation: {article['openai_news_relation'] }
        Give a 2-3 sentence description of why it is trending.
        Do not include any introductory phrases like 'Based on the provided article extract and news links,'
        Begin with something concise like '{title} is trending because'
        If there is no definitive answer, give a short summary of what you tried that didn't work.
    """

    openAITrendingResponse = openAIClient.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
        "role": "user",
        "content": summary_and_trending_reason_prompt
        }
    ],
    temperature=1,
    max_tokens=2048,
    top_p=1
    )
    print(f"response: {openAITrendingResponse}")
    print(f"trendingreason: {openAITrendingResponse.choices[0].message.content}")

    article['openai_trending_reason'] =  openAITrendingResponse.choices[0].message.content
    



In [None]:

# Ensure the 'featured-feed' folder exists
file_directory = "data"
os.makedirs(file_directory, exist_ok=True)

# Define the filename based on the date
file_path = f'{file_directory}/{feed_date}_with_news.json'

# Save to JSON file (overwrite if it already exists)
with open(file_path, 'w', encoding='utf-8') as file:
    json.dump(newly_trending_article_list, file, indent=4, ensure_ascii=False)

print(f'Dumped trending list to {file_path}')

#### Build HTML Page to display results

In [None]:
# Start building the HTML
html_title = f"<h1>Newly Trending on {feed_date_long}</h1>\n"
if len(newly_trending_article_list) > 0:
    html_list = "<ol>\n"

    # Iterate through the data
    for item in newly_trending_article_list:
        title = item['normalized_title']
        link = item['link']
        views = item['views']
        thumbnail = item['thumbnail']
        anthropic_trending_reason = item['anthropic_trending_reason']
        openai_trending_reason = item['openai_trending_reason']
        anthropic_news_relation = item['anthropic_news_relation']
        openai_news_relation = item['openai_news_relation']
        
        extract = item['extract']
        summary = item['summary']

        # Handle null thumbnail
        if thumbnail:
            thumbnail_html = f'<img src="{thumbnail}" alt="Thumbnail for {title}"/><br>'
        else:
            thumbnail_html = ''
        
        anthropic_news_relation_output = f"<strong>News related to this:</strong> {anthropic_news_relation}<br><br>"
        openai_news_relation_output = f"<strong>News related to this:</strong> {openai_news_relation}<br><br>"

        view_history_list = "<ul>"
        for view in item['view_history']:
            view_history_list += f"<li><strong>{view['date'].split("Z")[0]}:</strong> {view['views']:,}</li>"
        view_history_list += "</ul>"

        # Create a list item for each entry
        html_list += f"""
        <li>
            <h2>
            <a href="{link}" target="_blank">{title}</a><br>
            </h2>
            {thumbnail_html}
            <strong>Views:</strong><br>
            {view_history_list}<br><br>
            
            <strong>Extract:</strong {extract}<br>
            <strong>Summary:</strong> {summary}<br>
            
            <h3>Anthropic Claude 3.7</h3>
            <strong>Reason for Trending:</strong> {anthropic_trending_reason}<br><br>
            {anthropic_news_relation_output}

            <h3>GPT 4o mini</h3>
            <strong>Reason for Trending:</strong> {openai_trending_reason}<br><br>
            {openai_news_relation_output}
            
        </li>
        """
        

    # Close the HTML list
    html_list += "\n</ol>"
else:
    html_list = "<p>No articles are trending today.</p>"
html_page = html_title + html_list



In [None]:
# Ensure the 'data' folder exists
file_directory = "data"
os.makedirs(file_directory, exist_ok=True)

# Save to html file (overwrite if it already exists)
file_path = f'{file_directory}/{feed_date}.html'

with open(file_path, 'w', encoding='utf-8') as file:
    file.write(html_page)

# Prepend to the master file
master_file_path = f'{file_directory}/master.html'

# Read the existing content of the master file if it exists
if os.path.exists(master_file_path):
    with open(master_file_path, 'r', encoding='utf-8') as master_file:
        master_content = master_file.read()
else:
    master_content = ''

# Combine the new content with the old master content
updated_master_content = html_page + '\n' + master_content

# Save the updated content back to the master file
with open(master_file_path, 'w', encoding='utf-8') as master_file:
    master_file.write(updated_master_content)

### Display generated html

In [7]:
# Display the HTML in the notebook (assuming Jupyter or similar)
from IPython.display import display, HTML
display(HTML(updated_master_content))