# Personalized News Digest Workflow

In [1]:
from dotenv import load_dotenv
import os
from agents import Agent, Runner, trace, function_tool
import asyncio
from typing import List, Optional, Dict
from IPython.display import display, Markdown
from agents.run_context import RunContextWrapper
from agents.mcp import MCPServerStdio
import chromadb
from chromadb.utils import embedding_functions
from typing import List, Dict
from pydantic import BaseModel, Field
from agents.mcp import MCPServerStdio
from openai.types.responses import ResponseTextDeltaEvent
import sendgrid
from sendgrid.helpers.mail import Mail, Email, To, Content
import requests

load_dotenv(override=True)

True

## Stage 1

In [2]:
def process_chroma_query_result(chroma_result: dict) -> List[Dict[str, str]]:
    """
    Transforms the output of a ChromaDB collection.query() call into a
    list of dictionaries, with each dictionary containing 'id' and 'text'.

    Args:
        chroma_result (dict): The dictionary returned by the collection.query() call.
                              Expected to have 'ids' and 'documents' keys, each
                              containing a nested list.

    Returns:
        list[dict]: A list of dictionaries, where each dictionary has
                    the keys 'id' and 'text'.
    """
    processed_list = []
    ids = chroma_result.get('ids', [[]])[0]
    documents = chroma_result.get('documents', [[]])[0]

    if len(ids) != len(documents):
        print("Warning: Mismatch between number of IDs and documents. Check the query result.")
        return []

    for unique_id, text in zip(ids, documents):
        processed_list.append({"url": unique_id, "text": text})

    return processed_list

In [3]:
@function_tool
def get_headlines(query: str) -> List[Dict[str, str]]:
    """
    Retrieve the top 5 most relevant headlines from the Chroma database
    based on a semantic search using the provided query string.

    Args:
        query (str): The natural language query to search against stored headlines.

    Returns:
        List[Dict[str, str]]: A list of the top 5 matching headlines, where each
                              headline is represented as {'id': str, 'text': str}.
    """

    # Initialize embedding function
    ef = embedding_functions.OpenAIEmbeddingFunction(
        api_key=os.getenv("OPENAI_API_KEY"),
        model_name="text-embedding-3-large"
    )

    # Connect to Chroma persistent client
    chroma_client = chromadb.PersistentClient()

    # Load the headlines collection with embeddings
    headlines = chroma_client.get_collection(
        name="headlines",
        embedding_function=ef
    )

    # Query the collection
    results = headlines.query(
        query_texts=[query],
        n_results=5
    )

    # Process results into clean structured list
    return process_chroma_query_result(results)

In [4]:
instructions = """
You are a News Retrieval Agent designed to use Retrieval-Augmented Generation (RAG) to provide users with relevant news headlines based on their expressed preferences.

Your Responsibilities:
1. Understand the User's Preferences:
   - The user will describe the kinds of news they are interested in.
   - Extract key topics, categories, or entities from the user's request.

2. Formulate Queries for Retrieval:
   - For each distinct topic or entity, generate one or more concise natural-language queries that best represent the user's interest.
   - Example: If the user says "I want to hear about Google and artificial intelligence," generate queries like "Google AI", "Google artificial intelligence", "AI research at Google".

3. Call the Retrieval Tool:
   - Use the function tool get_headlines(query: str) to retrieve candidate news articles.
   - Always request multiple queries if the user has multiple interests.
   - Expect up to 5 headlines per query, structured as {"id": str, "text": str}.

4. Filter and Rerank Results:
   - Carefully judge each headline's relevance only in the context of the user's stated interests.
   - Discard irrelevant or weakly related headlines.
   - Optionally rerank or cluster results if several headlines overlap.

5. Produce the Final Output:
   - Return only the filtered set of headlines that are highly relevant to the user's query.
   - Each headline should be listed clearly (e.g., as a bullet point).
   - Do not fabricate headlines — only return those retrieved from the tool.
   - If no relevant results are found, state that clearly.

Constraints:
- Always rely on get_headlines for retrieval. Do not generate news directly.
- Filter rigorously to avoid irrelevant content.
- Prioritize accuracy and user alignment over quantity of headlines.

Example:
User Prompt: "I'd like to hear about space exploration and Elon Musk."

Agent Steps:
- Extract interests: "space exploration", "Elon Musk".
- Generate queries: "space exploration", "NASA space missions", "Elon Musk news", "SpaceX updates".
- Call get_headlines with each query.
- Retrieve results and filter out irrelevant items.

Final Output:
- NASA Announces New Timeline for Artemis Moon Mission
- SpaceX Successfully Launches Starship Prototype
- Elon Musk Teases Mars Colonization Plans
"""

In [5]:
class Headline(BaseModel):
    """
    A single unique news headline.
    Each headline must be distinct across all categories.
    Do not repeat the same article URL or text in multiple places.
    """
    url: str = Field(
        description="The source URL of the news article. Must be unique across all headlines."
    )
    text: str = Field(
        description="The headline text of the news article. Must be unique across all headlines."
    )


class HeadlinesCategory(BaseModel):
    """
    A group of headlines that belong to a single topical category.
    Categories are only created when the user clearly expresses two or more distinct interests.
    Closely related interests (e.g., 'AI' and 'LLMs') should be merged into one category.
    Different interests (e.g., 'AI' vs. 'Trump tariffs') should be separated into different categories.
    """
    category: str = Field(
        description="The name of the category representing one distinct area of user interest."
    )
    headlines: List[Headline] = Field(
        description="A list of unique headlines relevant to this category. No duplicates within this list or across categories."
    )


class HeadlinesOutput(BaseModel):
    """
    The final structured output of relevant news headlines grouped by category.
    
    Rules:
    - Each headline must be unique across all categories (no duplicate URLs or texts).
    - Categories should only be created if the user has specified two or more distinct interests.
      - Merge closely related interests into one category.
      - Separate unrelated interests into different categories.
    - If the user has only one interest, return a single category with all unique headlines.
    """
    headlines: List[HeadlinesCategory] = Field(
        description="A list of categories containing unique, relevant news headlines. No duplicates across categories."
    )

In [6]:
rag_agent = Agent(
    name="RAG Agent",
    tools=[get_headlines],
    instructions=instructions,
    model="gpt-4.1",
    output_type=HeadlinesOutput
)

In [7]:
prompt = "Show me news on AI and on what Google is up to. Also include any news about Trump and the economy. Finally, include any news related to health and wellness."

In [8]:
with trace("News Semantic Search"):
    result = await Runner.run(rag_agent, input=prompt, max_turns=20)
    structured = result.final_output
    for category in structured.headlines:
        i = 1
        for headline in category.headlines:
            print(f"{i}. {headline.text.replace("\n", "")}: {headline.url}")
            i += 1

1. Duolingo CEO says controversial AI memo was misunderstood - TechCrunchWhile Duolingo CEO Luis von Ahn faced backlash after declaring that Duolingo would become an “AI-first company,” he suggested in a new interview the real issue was that he “did not give enough context.”: https://techcrunch.com/2025/08/17/duolingo-ceo-says-controversial-ai-memo-was-misunderstood/
2. This CEO laid off nearly 80% of his staff because they refused to adopt AI fast enough. 2 years later, he says he'd do it again - Fortune"It was extremely difficult," IgniteTech CEO Eric Vaughan tells Fortune. "But changing minds was harder than adding skills.": https://fortune.com/2025/08/17/ceo-laid-off-80-percent-workforce-ai-sabotage/
1. Duolingo CEO says controversial AI memo was misunderstood - TechCrunchWhile Duolingo CEO Luis von Ahn faced backlash after declaring that Duolingo would become an “AI-first company,” he suggested in a new interview the real issue was that he “did not give enough context.”: https://t

## Stage 2

In [9]:
params = {"command": "uvx", "args": ["mcp-server-fetch"]}

In [10]:
instructions = """
You are a News Article Extraction Agent. Your sole tool is the fetch MCP server from Anthropic, which you will use to retrieve content from web pages. Your task is to take a URL provided by the user and return the complete text of the news article located at that URL.

Follow these rules:

1. **Fetch the Page in Full**:
   - Use the fetch tool repeatedly if necessary to ensure you obtain the entire content of the webpage.
   - Handle cases where content may be paginated, loaded dynamically, or split across multiple fetch calls.

2. **Clean the Output**:
   - Remove all HTML tags, scripts, styles, ads, and navigation elements.
   - Remove any extra whitespace or formatting that is not part of the main article text.
   - Preserve the natural paragraphs and sentence structure of the article.

3. **Combine All Content**:
   - Merge all parts retrieved from multiple fetch calls into a single, coherent string representing the article.
   - Ensure that the final output reads as a smooth, continuous article without broken sentences or sections.

4. **Return the Result**:
   - Provide only the cleaned, combined article text as the output to the user.
   - Do not include any metadata, fetch URLs, or tool output details.

5. **Do Not Speculate**:
   - If the article cannot be fully retrieved or is missing, clearly indicate that you could not access the full content instead of guessing or summarizing.

Always prioritize accuracy and completeness of the extracted article text.
"""


In [11]:
async with MCPServerStdio(params=params) as fetch_server:
    await fetch_server.connect()

    news_agent = Agent(
        name="Fetch Agent",
        mcp_servers=[fetch_server],
        instructions=instructions,
        model="gpt-4.1"
    )

    articles = []

    for category in structured.headlines:
        for headline in category.headlines:
            with trace(f"Fetch news article {headline.url} - {category}"):
                result = await Runner.run(news_agent, input=headline.url, max_turns=20)
                articles.append(f"{headline.url}:" + "\n" + result.final_output)

In [12]:
articles

['https://techcrunch.com/2025/08/17/duolingo-ceo-says-controversial-ai-memo-was-misunderstood/:\nPosted: 9:34 AM PDT · August 17, 2025\n\nWhile Duolingo CEO Luis von Ahn was loudly criticized this year after declaring that Duolingo would become an “AI-first company,” he suggested in a new interview the real issue was that he “did not give enough context.”\n\n“Internally, this was not controversial,” von Ahn told The New York Times. “Externally, as a publicly traded company some people assume that it’s just for profit. Or that we’re trying to lay off humans. And that was not the intent at all.”\n\nOn the contrary, von Ahn said the company has “never laid off any full-time employees” and has no intention of doing so. And while he didn’t deny that Duolingo had cut its contractor workforce, he suggested that “from the beginning … our contractor workforce has gone up and down depending on needs.”\n\nDespite the criticism (which does not seem to have made a big impact on Duolingo’s bottom li

## Stage 3

In [13]:
instructions = """
You are a News Summary Writer Agent. Your task is to take in a list of full news article contents and the user's preferences for news topics, styles, or focus, and produce a concise, accurate, and well-written news summary in Markdown format.

Follow these rules:

1. **Understand User Preferences**:
   - Carefully consider the user's stated interests, preferred topics, or areas of focus.
   - Prioritize including information from articles that match the user's preferences.
   - If an article is less relevant, include only key points that add context or value.

2. **Generate a Markdown News Summary**:
   - Summarize the actual content of each article, capturing key facts, findings, or events.
   - Use clear headings and subheadings if necessary to organize the summary.
   - Include bullet points or numbered lists for clarity when summarizing multiple points.
   - Maintain clean Markdown syntax throughout (e.g., `#`, `##`, `###`, `-`, `*`, etc.).

3. **Write in a Readable, Engaging Style**:
   - Ensure the text is coherent, smooth, and easy to read.
   - Avoid repeating content unnecessarily.
   - Highlight the most important and relevant aspects of the articles according to user preferences.

4. **Return Only the Markdown Summary**:
   - Do not include full raw articles, tool outputs, metadata, or internal instructions.
   - Only include the synthesized summary based on the article content.

5. **Respect Accuracy**:
   - Do not fabricate information or speculate beyond what the articles provide.
   - Ensure the summary accurately reflects the content of each article.

Always aim to create a concise, user-friendly news summary that uses the actual content of the articles while aligning with the user's interests.
"""


In [14]:
def create_user_prompt(articles: list[str], user_preferences: str) -> str:
    """
    Generates a user prompt for the News Digest Writer Agent.
    
    Parameters:
    - articles: List of strings, each containing the full text of a news article.
    - user_preferences: String describing the user's news interests and preferences.
    
    Returns:
    - A formatted prompt string for the agent.
    """
    
    # Join articles with clear separation
    formatted_articles = "\n\n---\n\n".join(articles)
    
    prompt = f"""
    You are given the following news articles:

    {formatted_articles}

    The user has expressed the following preferences for their news digest:

    {user_preferences}

    Please create a well-organized, engaging news digest in Markdown format based on these articles and the user's preferences. 
    - Summarize each article in 1-3 sentences.
    - Use headings, subheadings, and lists where appropriate.
    - Focus on articles most relevant to the user's preferences.
    - Do not include raw article text beyond summaries or any metadata.
    - Ensure the digest is readable, coherent, and professional.
    """
    return prompt


In [15]:
user_preferences = "Show me news on AI and on what Google is up to. Also include any news about Trump and the economy. Finally, include any news related to health and wellness."

In [16]:
prompt = create_user_prompt(articles=articles, user_preferences=user_preferences)

prompt

'\n    You are given the following news articles:\n\n    https://techcrunch.com/2025/08/17/duolingo-ceo-says-controversial-ai-memo-was-misunderstood/:\nPosted: 9:34 AM PDT · August 17, 2025\n\nWhile Duolingo CEO Luis von Ahn was loudly criticized this year after declaring that Duolingo would become an “AI-first company,” he suggested in a new interview the real issue was that he “did not give enough context.”\n\n“Internally, this was not controversial,” von Ahn told The New York Times. “Externally, as a publicly traded company some people assume that it’s just for profit. Or that we’re trying to lay off humans. And that was not the intent at all.”\n\nOn the contrary, von Ahn said the company has “never laid off any full-time employees” and has no intention of doing so. And while he didn’t deny that Duolingo had cut its contractor workforce, he suggested that “from the beginning … our contractor workforce has gone up and down depending on needs.”\n\nDespite the criticism (which does not

In [17]:
news_agent = Agent(
    name="News Summarizer",
    instructions=instructions,
    model="gpt-4.1",
)

In [18]:
with trace("News Summary"):
    result = await Runner.run(news_agent, input=prompt)
    news_markdown = result.final_output
    display(Markdown(result.final_output))

# Weekly News Digest

## Artificial Intelligence

### Radical Workforce Overhaul at IgniteTech in AI Push
- **IgniteTech CEO Eric Vaughan** replaced nearly 80% of his staff in 2023-2024 to transform the enterprise software company into an AI-driven organization.
- Significant internal resistance, especially from technical staff, led to widespread layoffs, with new roles focused on "AI innovation specialists."
- The move resulted in strong financial performance, rapid product launches, and a restructured company reporting directly through the AI division.
- Industry-wide, resistance and even sabotage of AI rollouts is common as workers fear job loss or experience frustration with unproven tools; experts agree that company culture and buy-in are crucial for successful AI adoption.
- Other companies like **Ikea** are opting for augmentation rather than automation, reskilling workers instead of replacing them.

### Duolingo CEO Addresses Controversy Over AI Memo
- **Luis von Ahn**, CEO of Duolingo, clarified that the company’s shift to being "AI-first" was misunderstood; the move was not about laying off full-time staff.
- While contractor numbers have fluctuated, no full-time employees have been let go, and AI adoption remains a key focus with company-wide experimentation.
- Duolingo’s embrace of AI has not negatively impacted its business performance.

## Trump and International Affairs

### Trump and Putin: From Ceasefire to “Permanent Peace” in Ukraine
- President **Trump** reversed his stance after meeting with Russian President Putin in Alaska, abandoning his ceasefire push for Ukraine in favor of seeking a “permanent peace agreement.”
- The shift aligns Trump with Putin and contrasts with Ukrainian President **Zelenskyy’s** preferred approach of an immediate ceasefire, which Ukraine views as essential to halting bloodshed and opening diplomatic channels.
- The differing views set the stage for further diplomatic tension as Zelenskyy and European leaders visit the White House to push for Western support.

## Health and Wellness

### New Blood Pressure Guidelines Issued by Heart Experts
- The American Heart Association and the American College of Cardiology released updated blood pressure guidelines, stressing regular monitoring, early recognition of emergencies, sodium-restricted diets, weight management, and combined physical activity and healthy eating patterns (DASH diet).
- Losing just 5% of body weight can significantly reduce blood pressure; routine checks and stress management techniques like yoga or meditation are recommended.

### Supermodel Barbara Palvin Opens Up About Endometriosis
- Barbara Palvin revealed she underwent surgery for endometriosis after years of undiagnosed pain and difficult periods, encouraging others to seek early medical advice.
- Her candid disclosure drew widespread support and increased awareness of a condition affecting millions of women worldwide.

### Urban Mosquito Surges Linked to Climate Change in Las Vegas
- Las Vegas is experiencing a sharp rise in mosquito populations, including species carrying West Nile and dengue, due to urban development, climate change, and increased insecticide resistance.
- Experts warn of growing public health risks as mosquitoes adapt to new environments and existing control measures become less effective, highlighting the need for coordinated local government responses.

---

*For further in-depth articles on Google, inflation, or the wider economy, full article texts were unavailable this week. Please provide alternative sources if you’d like additional coverage on these topics.*

## Stage 4

In [19]:
def send_test_email():
    sg = sendgrid.SendGridAPIClient(api_key=os.environ.get('SENDGRID_API_KEY'))
    from_email = Email("arnav.thakrar@gmail.com")  # Change to your verified sender
    to_email = To("arnav.thakrar@gmail.com")  # Change to your recipient
    content = Content("text/plain", "This is an important test email")
    mail = Mail(from_email, to_email, "Test email", content).get()
    response = sg.client.mail.send.post(request_body=mail)
    print(response.status_code)

send_test_email()

202


In [20]:
@function_tool
def send_html_email(subject: str, html_body: str, recipient: str) -> Dict[str, str]:
    """
    Send an HTML email using SendGrid.

    Args:
        subject (str): The subject line of the email.
        html_body (str): The HTML content of the email.
        recipient (str): Recipient email address.

    Returns:
        Dict[str, str]: Status of the email sending operation.
    """
    try:
        api_key = os.environ.get("SENDGRID_API_KEY")
        if not api_key:
            return {"status": "error", "message": "SendGrid API key not set in environment"}

        sg_client = sendgrid.SendGridAPIClient(api_key=api_key)
        from_email = Email("arnav.thakrar@gmail.com")  # Must be verified in SendGrid
        to_email = To(recipient)
        content = Content("text/html", html_body)

        mail = Mail(from_email, to_email, subject, content)
        response = sg_client.client.mail.send.post(request_body=mail.get())

        if 200 <= response.status_code < 300:
            return {"status": "success", "message": f"Email sent to {recipient}"}
        else:
            return {
                "status": "error",
                "message": f"SendGrid responded with status {response.status_code}",
                "body": response.body.decode() if hasattr(response.body, 'decode') else str(response.body)
            }

    except Exception as e:
        return {"status": "error", "message": str(e)}

In [21]:
instructions = """
You are a News Email Agent. Your task is to take a Markdown-formatted news summary provided by the user, 
convert it to a clean HTML email, and send it to the recipient email address specified in the user's prompt.

Follow these rules carefully:

1. **Input Handling**:
   - The user will provide a news summary in Markdown format.
   - The user will also provide a recipient email address where the email should be sent.

2. **Markdown to HTML Conversion**:
   - Convert all Markdown elements to proper HTML:
       - Headings (#, ##, ###) → <h1>, <h2>, <h3>
       - Bold (**text**) → <strong>
       - Italics (*text*) → <em>
       - Links ([text](url)) → <a href="url">text</a>
       - Lists (- item or * item) → <ul><li>item</li></ul>
       - Paragraphs → <p>
   - Ensure the resulting HTML is clean and readable in an email client.

3. **Email Composition**:
   - Set the email subject line as "Daily News Summary".
   - Use the HTML content generated from the Markdown conversion as the body.

4. **Call the Tool**:
   - Use the `send_html_email` function tool.
   - Pass the following arguments:
       - `subject`: "Daily News Summary"
       - `html_body`: the HTML you generated
       - `recipient`: the email address provided by the user

5. **Error Handling**:
   - If the tool reports an error, provide a clear message to the user with the reason.
   - Confirm successful email sending to the user with the recipient address.

6. **Output**:
   - Never send raw Markdown; always convert it to HTML before sending.
   - Provide a short confirmation message to the user about email delivery status.
"""


In [22]:
recipient_email = "arnav.thakrar@gmail.com"

In [23]:
prompt = f"""
You are receiving a personalized news summary in Markdown format and need to send it as an HTML email.

Here is the news summary (Markdown format):
{news_markdown}

Please send this news summary as an HTML email to the recipient email address below:

Recipient Email: {recipient_email}

Instructions for yourself:
1. Convert the Markdown content above into proper HTML suitable for an email.
2. Set the email subject to "Daily News Summary".
3. Call the `send_html_email` function tool with the following parameters:
   - subject: "Daily News Summary"
   - html_body: the HTML content you generated
   - recipient: {recipient_email}
4. Report back whether the email was successfully sent or if there was an error.
"""


In [24]:
email_agent = Agent(
    name="Email Agent",
    instructions=instructions,
    tools=[send_html_email],
    model="gpt-4.1",
)

In [25]:
with trace("Email Agent"):
    result = await Runner.run(email_agent, input=prompt)
    display(Markdown(result.final_output))

The news summary was successfully converted to a clean HTML email and sent to arnav.thakrar@gmail.com with the subject "Daily News Summary".

If you need another summary sent or wish to add more recipients, just let me know!