# Run Agent
- Implement a workflow to write a daily AI newsletter
- see README.md for details


In [1]:
import os
import yaml
import dotenv
import logging
import json
import yaml
from datetime import datetime
import time
import random
import glob
import pickle
import sqlite3

from pathlib import Path

import asyncio
import nest_asyncio

import pydantic
from pydantic import BaseModel, Field, RootModel
from typing import Dict, TypedDict, Type, List, Optional, Any, Iterable, Text
from dataclasses import dataclass, field
from enum import Enum

import numpy as np
import pandas as pd

import pandas as pd
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
import hdbscan

import openai
from openai import AsyncOpenAI

import agents
from agents.exceptions import InputGuardrailTripwireTriggered
from agents import (Agent, Runner, Tool, OpenAIResponsesModel, 
                    ModelSettings, FunctionTool, InputGuardrail, GuardrailFunctionOutput,
                    SQLiteSession, set_default_openai_api, set_default_openai_client
                   )


import tenacity
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type

from IPython.display import HTML, Image, Markdown, display

from log_handler import SQLiteLogHandler, setup_sqlite_logging, sanitize_error_for_logging
from config import LOGDB
from llm import LLMagent, LangfuseClient  # methods to apply prompts async to large batches
from db import Url 

from fetch import Fetcher # fetch news urls
from newsletter_state import NewsletterAgentState, StepStatus
from news_agent import NewsletterAgent


In [2]:
print(f"OpenAI:            {openai.__version__}")
print(f"OpenAI Agents SDK  {agents.__version__}")
print(f"Pydantic           {pydantic.__version__}")


OpenAI:            1.109.0
OpenAI Agents SDK  0.3.1
Pydantic           2.11.9


In [3]:
dotenv.load_dotenv()

# to run async in jupyter notebook
nest_asyncio.apply()

# verbose OpenAI console logging if something doesn't work
# logging.basicConfig(level=logging.DEBUG)
# openai_logger = logging.getLogger("openai")
# openai_logger.setLevel(logging.DEBUG)


In [4]:
# modules create a default logger, or we can pass this logger

def setup_logging(session_id: str = "default", db_path: str = "agent_logs.db") -> logging.Logger:
    """Set up logging to console and SQLite database."""

    # Create logger
    logging.basicConfig(level=logging.INFO)

    logger = logging.getLogger(f"NewsletterAgent.{session_id}")
    logger.setLevel(logging.INFO)

    # Clear any existing handlers
    logger.handlers.clear()

    # Console handler
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_formatter = logging.Formatter(
        '%(asctime)s | %(name)s | %(levelname)s | %(message)s',
        datefmt='%H:%M:%S'
    )
    console_handler.setFormatter(console_formatter)

    # SQLite handler
    sqlite_handler = SQLiteLogHandler(db_path)
    sqlite_handler.setLevel(logging.INFO)
    sqlite_formatter = logging.Formatter('%(message)s')
    sqlite_handler.setFormatter(sqlite_formatter)

    # Add handlers to logger
    logger.addHandler(console_handler)
    logger.addHandler(sqlite_handler)

    # Prevent propagation to root logger
    logger.propagate = False

    return logger

logger = setup_logging("newsletter_agent", "test_logs.db")

# Log some test messages
logger.info("Test info message", extra={
    'step_name': 'test_step',
    'agent_session': 'demo_session'
})

logger.warning("Test warning message", extra={
    'step_name': 'test_step',
    'agent_session': 'demo_session'
})

logger.error("Test error message", extra={
    'step_name': 'error_step',
    'agent_session': 'demo_session'
})

sanitize_error_for_logging("log with some bad stuff for the filter: sk-proj-123456789012345678901234567890123456789012345678")

17:05:06 | NewsletterAgent.newsletter_agent | INFO | Test info message
17:05:06 | NewsletterAgent.newsletter_agent | ERROR | Test error message


'log with some bad stuff for the filter: [API_KEY_REDACTED]'

# Run Agent Worfklow

In [5]:
print("🚀 Creating NewsletterAgent...")

do_download=True
process_since=None
# process_since='2025-10-05 18:30:00'

try:
    # set up state
    session_id = 'test_newsletter_20251011082816006041'
    step_name = 'step_08_draft_sections'
#     del session_id, step_name
except Exception as e:
    print(e)

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable not set")

# Set up OpenAI client for the agents SDK
set_default_openai_client(AsyncOpenAI(api_key=api_key))

# Create agent with persistent state
if 'session_id' in vars():
    # load state from db for session_id and state
    print("session_id is defined")
    print(session_id)
    state = NewsletterAgentState(session_id=session_id, 
                                 db_path="newsletter_agent.db", 
                                 do_download=do_download,
                                 process_since=process_since,
                                 verbose=False
                                )
    state = state.load_from_db(step_name)
    agent = NewsletterAgent(session_id=session_id, state=state, verbose=True, timeout=30)    
else:
    # create new session
    print("session_id is not defined")
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S%f")    
    session_id = f"test_newsletter_{timestamp}"
    print(session_id)
    state = NewsletterAgentState(session_id=session_id, 
                                 db_path="newsletter_agent.db",
                                 do_download=do_download,
                                 process_since=process_since,
                                 verbose=False
                                ) 
    agent = NewsletterAgent(session_id=session_id, state=state, verbose=False, timeout=30)
    state.serialize_to_db("initialize")

17:05:06 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Using provided state with 106 articles


🚀 Creating NewsletterAgent...
session_id is defined
test_newsletter_20251011082816006041
Initialized NewsletterAgent with persistent state and 9-step workflow
Session ID: test_newsletter_20251011082816006041


In [None]:
agent.state.get_status()


In [None]:
# User prompt to run workflow
user_prompt = "Show the workflow status"

print(f"\n📝 User prompt: '{user_prompt}'")
print("=" * 80)

# Run the agent with persistent state
start_time = time.time()
result = await agent.run_step(user_prompt)
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)

In [None]:
# User prompt to run a workflow step
user_prompt = "Run step 1, fetch urls"

print(f"\n📝 User prompt: '{user_prompt}'")
print("=" * 80)

# Run the agent with persistent state
start_time = time.time()
result = await agent.run_step(user_prompt)
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [None]:
pd.DataFrame(state.headline_data) 


In [None]:
countdf = pd.DataFrame(state.headline_data) \
    .groupby("source") \
    .count()[["id"]] \
    .reset_index() \
    .rename(columns={'id': 'count'}) \
    .sort_values("count", ascending=False)
countdf 


In [None]:
# Run tool directly without LLM processing an input prompt or results
# user_prompt = "Run step 2, filter urls"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

# Run the agent with persistent state
start_time = time.time()
result = await agent.run_tool_direct("filter_urls")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [None]:
# User prompt to run workflow
# user_prompt = "Run step 3, download full articles"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

# Run the agent with persistent state
start_time = time.time()
result = await agent.run_tool_direct("download_articles")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)

In [None]:
headline_df = state.headline_df
with pd.option_context('display.max_columns', None, 'display.width', None, 'display.max_colwidth', None  ):
    display(headline_df[['id', 'source', 'title', 'status', 'final_url']].loc[headline_df["status"]!=200])




In [None]:
headline_df = state.headline_df
with pd.option_context('display.max_columns', None, 'display.width', None, 'display.max_colwidth', None  ):
    display(headline_df[['id', 'source', 'title', 'status', 'final_url']].loc[headline_df["html_path"]==""])



In [None]:
# User prompt to run workflow
# user_prompt = "Run step 4, Summarize articles"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("extract_summaries")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)

In [None]:
# if we get a refusal, examine prompt and delete rows
bad_stuff =["What\'s the Best Way ",
            "AI could make it eas",
            "AI can design toxic ",
           ]
print([len(s) for s in bad_stuff])
headline_df = state.headline_df
headline_df.loc[(headline_df["title"].str[:20].isin(bad_stuff))]



In [None]:
# remove
state.headline_df_to_dict(headline_df.loc[~headline_df["title"].str[:20].isin(bad_stuff)])


In [None]:
# User prompt to run workflow
# user_prompt = "Run step 5, Rate articles"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("rate_articles")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [None]:
headline_df=state.headline_df
with pd.option_context('display.max_columns', None, 'display.width', None, 'display.max_colwidth', None  ):
    display(headline_df.sort_values("rating", ascending=False)[['site_name', 'title', 'rating', 'short_summary']])


In [None]:
for row in headline_df.sort_values("rating", ascending=False).itertuples():
    display(Markdown(f"""
{row.rating:.1f} - [{row.title}]({row.url}) - {row.site_name}

{row.short_summary}
    """))
    

In [None]:
# User prompt to run workflow
# user_prompt = "Run step 6, Cluster articles by topic"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("cluster_by_topic")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [None]:
# User prompt to run workflow
# user_prompt = "Run step 7, select section topics"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("select_sections")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [None]:
# User prompt to run workflow
# user_prompt = "Run step 7, select section topics"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("draft_sections")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


In [6]:
# User prompt to run workflow
# user_prompt = "Run step 7, select section topics"
# print(f"\n📝 User prompt: '{user_prompt}'")
# print("=" * 80)

start_time = time.time()
result = await agent.run_tool_direct("finalize_newsletter")
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)


17:05:20 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Step 9a: Critiquing and optimizing individual sections
17:05:20 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Moving 2 singleton categories to Other: ['ChatGPT Bias Reduction', 'OpenAI Safety Issues']
INFO:llm:Initialized LangfuseClient
INFO:llm:Successfully retrieved prompt 'newsagent/critique_section' from Langfuse
INFO:llm:Parsed prompt 'newsagent/critique_section': model=gpt-5, system_len=1142, user_len=123
17:05:20 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Processing 17 sections
17:05:20 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Initialized LLMagent:
system_prompt:   You are an expert newsletter editor specializing in technology news curation. Your task is to critique individual newsletter sections
  and provide actionable recommendations to improve quality, coherence, and readability.

  For each section, you will:
  1. Assess thematic coherence -

▶ Starting Step 9: step_09_finalize_newsletter


17:05:21 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | User message: **Section Title:** Other News

**Available target_category values**: 
['Workforce AI Impact\nOpenAI Deal Network\nOpenAI Governance And Legal\nSam Altman AI Expansion\nCross-Sector AI Adoption\nLLM Reasoning Pretraining\nAI Cloud Infrastructure Surge\nBanking AI Impact\nHigh-Bandwidth Memory Growth\nLLM Inference Benchmarking\nTech Firms AI Hiring\nSoftware Development AI Tools\nAI Investment Growth\nAI Startup Funding\nAI Emerging Risks\nSemantic Search Across Data']

**Headlines:**
[{'id': 41, 'headline': 'AI video app piece lacks summary.', 'rating': 3.12589485, 'links': '[The Wall Street Journal](https://www.wsj.com/tech/personal-tech/i-tried-the-hot-new-ai-video-app-it-made-me-lonelier-than-ever-c9fdcceb)'}, {'id': 6, 'headline': "OpenAI touts GPT-5's 30% political-bias reduction after 100-topic stress test; adds tone controls and guidelines, as studies flag persistent biases across other AI.", 

17:05:21 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | User message: **Section Title:** AI Levels Up Reasoning

**Available target_category values**: 
['Workforce AI Impact\nOther\nOpenAI Deal Network\nOpenAI Governance And Legal\nSam Altman AI Expansion\nCross-Sector AI Adoption\nAI Cloud Infrastructure Surge\nBanking AI Impact\nHigh-Bandwidth Memory Growth\nLLM Inference Benchmarking\nTech Firms AI Hiring\nSoftware Development AI Tools\nAI Investment Growth\nAI Startup Funding\nAI Emerging Risks\nSemantic Search Across Data']

**Headlines:**
[{'id': 64, 'headline': 'Nvidia unveils RLP training that prompts LLMs to generate reasoning chains first, lifting reasoning benchmarks up to 17% and stabilizing enterprise fine-tuning.', 'rating': 8.0229878524, 'links': '[VentureBeat](https://venturebeat.com/ai/nvidia-researchers-boost-llms-reasoning-skills-by-getting-them-to-think)'}, {'id': 84, 'headline': 'Google DeepMind debuts Gemini Robotics 1.5 and ER 1.5, enabling robot

17:05:21 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | User message: **Section Title:** AI Talent Arms Race

**Available target_category values**: 
['Workforce AI Impact\nOther\nOpenAI Deal Network\nOpenAI Governance And Legal\nSam Altman AI Expansion\nCross-Sector AI Adoption\nLLM Reasoning Pretraining\nAI Cloud Infrastructure Surge\nBanking AI Impact\nHigh-Bandwidth Memory Growth\nLLM Inference Benchmarking\nSoftware Development AI Tools\nAI Investment Growth\nAI Startup Funding\nAI Emerging Risks\nSemantic Search Across Data']

**Headlines:**
17:05:21 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | User message: **Section Title:** When Software Writes Software

**Available target_category values**: 
['Workforce AI Impact\nOther\nOpenAI Deal Network\nOpenAI Governance And Legal\nSam Altman AI Expansion\nCross-Sector AI Adoption\nLLM Reasoning Pretraining\nAI Cloud Infrastructure Surge\nBanking AI Impact\nHigh-Bandwidth Memory Growth\nLLM Inference Be

17:05:21 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | User message: **Section Title:** AI Hype, Risk, Reality

**Available target_category values**: 
['Workforce AI Impact\nOther\nOpenAI Deal Network\nOpenAI Governance And Legal\nSam Altman AI Expansion\nCross-Sector AI Adoption\nLLM Reasoning Pretraining\nAI Cloud Infrastructure Surge\nBanking AI Impact\nHigh-Bandwidth Memory Growth\nLLM Inference Benchmarking\nTech Firms AI Hiring\nSoftware Development AI Tools\nAI Investment Growth\nAI Startup Funding\nSemantic Search Across Data']

**Headlines:**
[{'id': 96, 'headline': 'Dimon, BoE, Bezos warn AI bubble could burst within 6 to 24 months', 'rating': 7.4225531267, 'links': '[RTÉ](https://www.rte.ie/news/business/2025/1010/1537856-ai-stock-tech/)'}, {'id': 31, 'headline': 'Ex-Twitter/Meta policy chief: AI repeats social-media mistakes, needs regulation, cross-sector testing, and global standards', 'rating': 7.1389981821, 'links': '[Fortune](https://fortune.com/2025/

17:05:52 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Result: section_name='AI Funding Roundup' section_title='AI Funding Roundup' overall_coherence=9.0 overall_quality=8.0 should_split=False split_recommendation=None item_actions=[SectionItemAction(id=55, action='rewrite', reason='Strong fit and source; headline is long and cluttered. Lead with investor and tighten language under 20 words.', rewritten_headline='Bain invests $150M in Govini as ARR tops $100M to scale AI Ark for defense logistics', target_category=None), SectionItemAction(id=39, action='rewrite', reason='Good fit; headline is wordy and repeats company names. Tighten and clarify acquisition strategy.', rewritten_headline='Prezent raises $30M at $400M valuation to acquire AI services firms, starting with Prezentium', target_category=None), SectionItemAction(id=24, action='rewrite', reason='Relevant funding round with notable backers; simplify and keep under 20 words.', rewritten_headline='Worktrace AI r

17:06:06 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Result: section_name='AI Talent Arms Race' section_title='AI Talent Arms Race' overall_coherence=8.0 overall_quality=7.0 should_split=False split_recommendation=None item_actions=[SectionItemAction(id=83, action='rewrite', reason='Strong fit with theme (mass AI hiring); headline is long and splits focus. Tighten for clarity and brevity.', rewritten_headline='TCS doubles AI staff to 160,000, commits $5–6B for 1GW AI data centers.', target_category=None), SectionItemAction(id=94, action='rewrite', reason='Fits talent expansion narrative; headline is wordy and includes extraneous financial detail.', rewritten_headline='TCS opens London AI hub and design studio, targets 5,000 UK jobs in three years.', target_category=None), SectionItemAction(id=26, action='rewrite', reason='Clear acqui-hire aligns with talent arms race; shorten and clarify.', rewritten_headline='Apple to acqui-hire Prompt AI, integrate Seemour vision 

17:06:11 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Result: section_name='Sam Altman AI Expansion' section_title="OpenAI's Expanding Orbit" overall_coherence=8.0 overall_quality=7.5 should_split=False split_recommendation=None item_actions=[SectionItemAction(id=2, action='rewrite', reason="Tighten wording, specify the scope ($1T AI deals) and avoid vague phrasing like 'cementing dependencies'.", rewritten_headline='OpenAI sits at center of $1T AI deals, deepening industry dependencies', target_category=None), SectionItemAction(id=73, action='rewrite', reason='Clarify the stake and outcome in active voice; remove hype and keep under 20 words.', rewritten_headline='OpenAI takes 10% stake in AMD; shares jump 34% after multibillion-dollar deal', target_category=None), SectionItemAction(id=79, action='drop', reason='Speculative valuation from a low-authority source; overlaps with stronger dealmaking items and weakens section credibility.', rewritten_headline=None, targe

17:06:36 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | Result: section_name='AI Goes Mainstream' section_title='AI Goes Mainstream' overall_coherence=5.5 overall_quality=6.2 should_split=True split_recommendation='Split into two tighter arcs: 1) Consumer AI platforms and devices (browsers, consoles, retail shopping) and 2) Enterprise adoption and operations (TCS, Chevron, Gap, hospitals). Move investment- and risk-focused items to AI Investment Growth and AI Emerging Risks.' item_actions=[SectionItemAction(id=16, action='rewrite', reason='Strong, timely consumer-platform story; tighten and reduce jargon for clarity.', rewritten_headline='AI browser wars intensify: Google embeds Gemini in Chrome; Comet and Neon launch agent-powered, privacy-focused browsers.', target_category=None), SectionItemAction(id=82, action='rewrite', reason='Core enterprise adoption example; condense and keep concrete details.', rewritten_headline='TCS makes AI default across projects; trains 1

17:06:36 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 99, leaving 99
17:06:36 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       MOVE id=57 to Cross-Sector AI Adoption: Education-focused investment and training initiative is better framed as sectoral adoption than white-collar labor impacts.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 99, leaving 99
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       REWRITE id=74: Policy response to displacement complements the section; tighten and remove redundancy.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         Old: Sanders to propose robot tax on large firms replacing workers with AI, aiming to offset lost tax revenue and deter displacement.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: Sanders will propose a robot tax on firms replacin

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         Old: Court ends ChatGPT log preservation after joint motion by OpenAI and news orgs; OpenAI can stop saving most deleted/temporary chats after Sept. 26.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: Court lifts order to preserve ChatGPT logs; OpenAI can stop saving most deleted or temporary chats.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 91, leaving 91
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       MOVE id=45 to AI Emerging Risks: Opinion piece blends copyright backlash with broad dealmaking; dilutes legal focus of this section. Better framed as risk context than legal action.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 91, leaving 91
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       Coherence: 7.0/1

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 1 stories of 90, leaving 91
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       REWRITE id=12: Compelling consumer hardware shift; make active and concrete.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         Old: AMD and Sony tease Project Amethyst, a machine learning–driven PlayStation chipset rethinking the graphics pipeline.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: AMD and Sony unveil Project Amethyst, an ML-driven PlayStation chipset that rethinks the graphics pipeline.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 90, leaving 90
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       DROP id=62: Low rating and niche Kickstarter gadget; not central to mainstream adoption.
17:06:37 | NewsletterAgent.test_newsletter_202510110828

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 84, leaving 84
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       MOVE id=85 to AI Emerging Risks: Focuses on metric inflation and environmental accounting, which fits risk/impact more than infrastructure buildout.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 84, leaving 84
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       Coherence: 7.5/10,       Quality: 7.0/10
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       REWRITE id=89: Anchors the section with credible regulatory focus; tighten for clarity and brevity. Consider sourcing from FSB/BIS release or Reuters for authority.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         Old: FSB, BIS tighten AI risk oversight in finance, warn of shared-model systemic threats, cyber and fr

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: TCS opens London AI hub and design studio, targets 5,000 UK jobs in three years.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 84, leaving 84
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       REWRITE id=26: Clear acqui-hire aligns with talent arms race; shorten and clarify.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         Old: Apple nears acqui-hire of Prompt AI to fold Seemour vision tech into Apple Intelligence; Seemour app to be retired.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: Apple to acqui-hire Prompt AI, integrate Seemour vision tech into Apple Intelligence.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 84, leaving 84
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO | 

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 1 stories of 81, leaving 82
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       DROP id=106: Portfolio composition story is weakly connected to AI strategy; low-authority source; adds little to the narrative.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 1 stories of 80, leaving 81
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       DROP id=124: Rating < 3.0 and generic ETF roundup; not aligned with strategic power plays.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 1 stories of 79, leaving 80
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       Coherence: 9.0/10,       Quality: 8.0/10
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       REWRITE id=55: Strong fit and source; headline is long and cluttered. Lead with investo

17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |         New: AI-generated homeless-man TikTok triggers false 911 calls in U.S. and U.K.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 78, leaving 78
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       MOVE id=92 to Other: Governance/ethics of political figure advising AI firms fits better outside this theme and lacks strong sourcing.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 0 stories of 78, leaving 78
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       DROP id=75: Generic opinion piece with low authority and limited specifics; dilutes section focus.
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |     Pruned 1 stories of 77, leaving 78
17:06:37 | NewsletterAgent.test_newsletter_20251011082816006041 | INFO |       DROP id=80: Niche parenting work

⏱️  Total execution time: 82.48s
📊 Final result:
❌ Step 9 failed: RetryError[<Future at 0x12d991b10 state=finished raised Exception>]


In [None]:
newsletter_section_df = state.newsletter_section_df.copy()


In [None]:
cat_df = newsletter_section_df.groupby(["cat", "section_title"]).agg({
    'rating': 'mean',  # average rating per category
    'id': 'count'      # story count
}).rename(columns={'id': 'count'}).sort_values('rating', ascending=False).reset_index()


In [None]:
            # Move singleton categories to "Other"
            singleton_cats = cat_df[cat_df['count'] == 1]['cat'].tolist()
            if singleton_cats:
                logger.info(
                    f"Moving {len(singleton_cats)} singleton categories to Other: {singleton_cats}")
                for singleton_cat in singleton_cats:
                    newsletter_section_df.loc[newsletter_section_df['cat']
                                              == singleton_cat, 'cat'] = 'Other'
                    newsletter_section_df.loc[newsletter_section_df['cat']
                                              == 'Other', 'section_title'] = 'Other News'


In [None]:
                cat_df = newsletter_section_df.groupby(["cat", "section_title"]).agg({
                    'rating': 'mean',
                    'id': 'count'
                }).rename(columns={'id': 'count'}).reset_index()


In [None]:
cat_df_dict = dict(zip(cat_df['cat'], cat_df['section_title']))
cat_df_dict


In [None]:
            unique_cats = newsletter_section_df['cat'].unique()
            logger.info(f"Processing {len(unique_cats)} sections")


In [None]:
print("\n".join(unique_cats))

In [None]:

class SectionItemAction(BaseModel):
    """Action to take on a specific story within a section"""
    id: int = Field(description="Story ID from newsletter_section_df")
    action: str = Field(
        description="Action: 'keep', 'drop', 'rewrite', 'move'"
    )
    reason: str = Field(description="Why this action is recommended")
    rewritten_headline: Optional[str] = Field(
        default=None,
        description="New headline text if action=='rewrite'"
    )
    target_category: Optional[str] = Field(
        default=None,
        description="Target category name if action=='move'"
    )


class SectionCritique(BaseModel):
    """Quality critique for a single newsletter section"""
    section_name: str = Field(description="Category name being critiqued")
    section_title: str = Field(description="Section title")

    overall_coherence: float = Field(
        description="0-10: How well stories fit together thematically"
    )
    overall_quality: float = Field(
        description="0-10: Overall section quality (considering ratings, headlines, coherence)"
    )

    should_split: bool = Field(
        description="True if section is too heterogeneous and should be split"
    )
    split_recommendation: Optional[str] = Field(
        default=None,
        description="Explanation of how to split if should_split==True"
    )

    item_actions: List[SectionItemAction] = Field(
        description="Recommended action for each story in section",
        default_factory=list
    )

    summary_notes: str = Field(
        description="Overall assessment of section strengths and weaknesses"
    )

    should_iterate: bool = Field(
        description="True if changes needed and section should be re-critiqued"
    )


class OptimizedSection(BaseModel):
    """Optimized section after applying critique recommendations"""
    section_name: str = Field(description="Category name")
    section_title: str = Field(description="Section title (may be updated)")
    stories: List[Dict[str, Any]] = Field(
        description="List of story dicts with keys: id, headline, rating, links"
    )



# Newsletter critique models for quality evaluation

class DuplicateIssue(BaseModel):
    """Identified duplicate or near-duplicate story across sections"""
    headline_1: str = Field(description="First headline text")
    section_1: str = Field(description="Section containing first headline")
    headline_2: str = Field(description="Second headline text")
    section_2: str = Field(description="Section containing second headline")
    explanation: str = Field(description="Why these are considered duplicates")


class HeadlineIssue(BaseModel):
    """Quality issue with a specific headline"""
    headline: str = Field(description="The problematic headline")
    section: str = Field(description="Section containing this headline")
    issue_type: str = Field(
        description="Type of issue: too_long, passive_voice, unclear, missing_specifics, jargon"
    )
    suggestion: str = Field(
        description="Specific suggestion to improve this headline")

class SectionIssue(BaseModel):
    """Quality issue with a section"""
    section_title: str = Field(description="Title of the problematic section")
    issue_type: str = Field(
        description="Type of issue: too_small, too_large, incoherent, title_mismatch"
    )
    suggestion: str = Field(
        description="Specific suggestion to improve this section")


class NewsletterCritique(BaseModel):
    """Comprehensive quality evaluation of newsletter draft"""
    overall_score: float = Field(
        description="Overall quality score 0-10 (9-10 excellent, 8-9 good, 7-8 acceptable, <7 needs work)"
    )

    # Specific issues (empty lists if none found)
    duplicate_issues: List[DuplicateIssue] = Field(
        default_factory=list,
        description="List of duplicate or near-duplicate stories found"
    )
    headline_issues: List[HeadlineIssue] = Field(
        default_factory=list,
        description="List of headline quality issues"
    )
    section_issues: List[SectionIssue] = Field(
        default_factory=list,
        description="List of section quality issues"
    )

    # Dimension scores (0-10 each)
    theme_coherence: float = Field(
        description="0-10: How well H1 title reflects content and sections cluster thematically"
    )
    headline_quality: float = Field(
        description="0-10: Clarity, conciseness, specificity, active voice"
    )
    source_quality: float = Field(
        description="0-10: Use of authoritative sources (Reuters, Bloomberg, FT, etc.)"
    )
    format_compliance: float = Field(
        description="0-10: Adherence to markdown format rules"
    )

    # Actionable feedback
    recommendations: List[str] = Field(
        description="Top 3-5 specific, actionable improvements needed",
        default_factory=list
    )
    should_iterate: bool = Field(
        description="True if score < 8.5 and issues are fixable through iteration"
    )

In [None]:
            critique_agent = LLMagent(
                system_prompt=section_critique_system,
                user_prompt=section_critique_user,
                output_type=SectionCritique,
                model=section_critique_model,
                verbose=True,
                logger=logger
            )


In [None]:
            async def critique_wrapper(cat):
                cat_stories = newsletter_section_df.loc[newsletter_section_df['cat'] == cat]
                section_title = cat_stories['section_title'].iloc[0]
                section_input = cat_stories[[
                    'id', 'headline', 'rating', 'links']].to_dict('records')
                critique = await critique_agent.run_prompt(section_title=section_title, target_categories=str("\n".join(unique_cats)), input_text=section_input)
                return (cat, critique)

In [None]:
            tasks = [critique_wrapper(cat) for cat in unique_cats]

            critiques = await asyncio.gather(*tasks)


In [None]:
critiques 


In [None]:

            logger.info(f"      Coherence: {critiques[0][1].overall_coherence:.1f}/10, "
                             f"Quality: {critiques[0][1].overall_quality:.1f}/10")

In [None]:
newsletter_section_df['prune']=False
for cat, critique in critiques:
    for action in critique.item_actions:
        story_mask = newsletter_section_df['id'] == action.id
        if action.action == 'drop':
            logger.info(
                f"      DROP id={action.id}: {action.reason}")
            newsletter_section_df.loc[story_mask,
                                      'prune'] = True
            newsletter_section_df.loc[story_mask,
                                      'cat'] = 'Other'
            newsletter_section_df.loc[story_mask,
                                      'section_title'] = 'Other News'
            changes_made = True
        elif action.action == 'rewrite' and action.rewritten_headline:
            old_headline = newsletter_section_df.loc[story_mask,
                                                     'headline'].iloc[0]
            logger.info(
                f"      REWRITE id={action.id}: {action.reason}")
            logger.info(f"        Old: {old_headline}")
            logger.info(
                f"        New: {action.rewritten_headline}")
            newsletter_section_df.loc[story_mask,
                                      'headline'] = action.rewritten_headline
            changes_made = True
            
        elif action.action == 'move' and action.target_category:
            logger.info(
                f"      MOVE id={action.id} to {action.target_category}: {action.reason}")
            newsletter_section_df.loc[story_mask,
                                      'cat'] = action.target_category
            newsletter_section_df.loc[story_mask,
                                      'section_title'] = cat_df_dict[action.target_category]
            
#             catchbad cat
            changes_made = True


In [None]:
print(len(newsletter_section_df))
newsletter_section_df = newsletter_section_df.loc[~newsletter_section_df['prune']]
print(len(newsletter_section_df))


In [None]:
x = [v for c,v in critiques if c=="AI Cloud Infrastructure Surge"][0]
x 



In [None]:
pd.DataFrame([(xx.id, xx.action, xx.reason, xx.rewritten_headline, xx.target_category) for xx in x.item_actions])

In [None]:
sections_md = []
for _, row in cat_df.iterrows():
    cat = row['cat']
    section_title = row['section_title']

    # Get stories for this category, sorted by rating (descending)
    cat_stories = newsletter_section_df[
        newsletter_section_df['cat'] == cat
    ].sort_values('rating', ascending=False)

    # Build markdown section
    section_md = f"## {section_title}\n\n"
    for _, story in cat_stories.iterrows():
        section_md += f"- {story['headline']} - {story['links']}\n"

    sections_md.append(section_md)

In [None]:
display(Markdown("\n\n".join(sections_md)))

In [None]:
                    embeddings = embedding_df.values


In [None]:
target_tier2 = 100 - len(must_include)
target_tier2

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def mmr_selection(
    df: pd.DataFrame,
    embeddings: np.ndarray,
    n: int = 100,
    lambda_param: float = 0.5
) -> pd.DataFrame:
    """
    Select diverse, high-quality stories using Max Marginal Relevance.

    Balances story quality (rating) with diversity (embedding similarity)
    to avoid redundant coverage of the same story angles.

    Args:
        df: DataFrame with 'rating' column and matching embedding indices
        embeddings: numpy array of embeddings (shape: [len(df), embedding_dim])
        n: Number of stories to select
        lambda_param: Tradeoff between relevance (rating) and diversity
                     1.0 = pure rating, 0.0 = pure diversity, 0.5 = balanced

    Returns:
        DataFrame subset of n selected stories with maximum rating and diversity
    """
    if len(df) <= n:
        return df

    # Normalize ratings to 0-1 scale
    max_rating = df['rating'].max()
    min_rating = df['rating'].min()
    rating_range = max_rating - min_rating

    if rating_range == 0:
        normalized_ratings = pd.Series([0.5] * len(df), index=df.index)
    else:
        normalized_ratings = (df['rating'] - min_rating) / rating_range

    selected_indices = []
    remaining_indices = list(df.index)

    # Start with highest rated story
    first_idx = df['rating'].idxmax()
    selected_indices.append(first_idx)
    remaining_indices.remove(first_idx)

    # Get embedding index mapping (df.index might not be 0..n-1)
    idx_to_embedding_pos = {idx: pos for pos, idx in enumerate(df.index)}

    while len(selected_indices) < n and remaining_indices:
        mmr_scores = []

        for idx in remaining_indices:
            # Relevance: normalized rating
            relevance = normalized_ratings.loc[idx]

            # Diversity: 1 - max_similarity to already selected
            embedding_pos = idx_to_embedding_pos[idx]
            selected_positions = [idx_to_embedding_pos[i]
                                  for i in selected_indices]

            similarities = cosine_similarity(
                embeddings[embedding_pos:embedding_pos+1],
                embeddings[selected_positions]
            )
            max_similarity = similarities.max()
            diversity = 1 - max_similarity

            # Combined MMR score
            mmr = lambda_param * relevance + (1 - lambda_param) * diversity
            mmr_scores.append((idx, mmr))

        # Select highest MMR
        best_idx = max(mmr_scores, key=lambda x: x[1])[0]
        selected_indices.append(best_idx)
        remaining_indices.remove(best_idx)

    return df.loc[selected_indices]


In [None]:
                    tier2_selected = mmr_selection(
                        df=candidates,
                        embeddings=embeddings,
                        n=target_tier2,
                        lambda_param=0.5  # 50% rating, 50% diversity
                    )


In [None]:
type(must_include)



In [None]:

# Combine tiers
selected_df = pd.concat(
    [must_include, tier2_selected])
logger.info(
    f"Total selected stories: {len(selected_df)} (target: ~100-120)")
selected_df

In [None]:
            headline_df = selected_df.copy()


In [None]:
            # Get unique categories
            categories = headline_df['cat'].unique().tolist()
            categories = [cat for cat in categories if cat != "Other"]


In [None]:
class SectionStoryLink(BaseModel):
    url: str = Field(description="URL of the article")
    site_name: str = Field(description="Name of the website/source")

    def __str__(self):
        return f"[{self.site_name}]({self.url})"


class SectionStory(BaseModel):
    headline: str = Field(description="Summary of the story")
    links: List[SectionStoryLink] = Field(
        description="List of links related to this story")
    prune: bool = Field(description="Whether to prune/exclude this story")

    def __str__(self):
        return f"- {self.headline} - " + " ".join([str(s) for s in self.links])


class Section(BaseModel):
    section_title: str = Field(description="Title of the newsletter section")
    headlines: List[SectionStory] = Field(
        description="List of stories in this section")

    def __str__(self):
        return f"## {self.section_title}\n\n" + "\n".join(
            [str(h) for h in self.headlines if not h.prune]
        )



In [None]:
            write_section_system_prompt, write_section_user_prompt, model = \
                LangfuseClient().get_prompt("newsagent/write_section")

            write_section_agent = LLMagent(
                system_prompt=write_section_system_prompt,
                user_prompt=write_section_user_prompt,
                output_type=Section,
                model=model,
                verbose=True,
                logger=logger
            )


In [None]:
            async def draft_section(cat, agent):
                """Draft a section for a given category"""
                # Get articles for this category, sorted by rating, convert to JSON
                cat_df = headline_df.loc[headline_df["cat"] == cat].sort_values(
                    "rating", ascending=False)

                input_text = cat_df[["rating", "short_summary", "site_name", "final_url"]].rename(columns={"short_summary": "summary", "final_url": "url"}).to_json(
                    orient="records")

                # Call the LLM to draft the section
                response = await agent.run_prompt(input_text=input_text)

                return (cat, response)

In [None]:
            # Draft all sections asynchronously
            draft_tasks = [draft_section(cat, write_section_agent)
                           for cat in categories]
            draft_results = await asyncio.gather(*draft_tasks, return_exceptions=True)


In [None]:
draft_results[0]


In [None]:
            sections_drafted = 0
            for result in draft_results:
                if isinstance(result, Exception):
                    self.logger.error(f"Error drafting section: {result}")
                    continue

                cat, content = result
                # state.newsletter_section_obj[cat] = content
                state.newsletter_section_text[cat] = content
                sections_drafted += 1

In [None]:
for k, v in state.newsletter_section_text.items():
    display(Markdown(str(v).replace("$", "\\\$")))


In [None]:
cat_df = state.headline_df.groupby("cat") \
    .count() \
    .reset_index()[['cat','source']] \
    .sort_values('source', ascending=False)
output_str = ""
for cat in cat_df["cat"]:
    if cat != "Other":
        output_str += str(state.newsletter_section_text[cat]) + "\n\n"
        display(Markdown(str(state.newsletter_section_text[cat]).replace("$", "\\\$")))

# first do full rewrite .
# check vs. objects , not showing the ones market for pruning
# move prune=True to Other

In [None]:
print(output_str)


In [None]:
draft_newsletter_system_prompt, draft_newsletter_user_prompt, model = \
    LangfuseClient().get_probmpt("newsagent/draft_newsletter")


In [None]:
class Mystr(BaseModel):
    """A string"""
    mystr: str = Field(
        description="a string")


In [None]:
draft_newsletter_agent = LLMagent(
    system_prompt=draft_newsletter_system_prompt,
    user_prompt=draft_newsletter_user_prompt,
    output_type=Mystr,
    model=model,
    verbose=True,
    logger=logger
)


In [None]:
# Apply prompt to generate final newsletter
newsletter_content = await draft_newsletter_agent.run_prompt(input_str=output_str)


In [None]:
newsletter_content = newsletter_content.mystr
display(Markdown(newsletter_content))


In [None]:
HTML(newsletter_content)

In [None]:
%pip install markdown 
from utilities import send_gmail
import markdown

In [None]:
newsletter_content_html = markdown.markdown(newsletter_content)

In [None]:
                today = datetime.now().strftime("%B %d, %Y")
                subject = f"AI News Digest - {today}"

                # Apply HTML styling
                html_content = f"""
                <div style="max-width: 800px; margin: 0 auto; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;">
                    <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 40px 20px; text-align: center; border-radius: 8px 8px 0 0;">
                        <h1 style="color: white; margin: 0; font-size: 32px;">AI News Digest</h1>
                        a<p style="color: rgba(255,255,255,0.9); margin: 10px 0 0 0; font-size: 16px;">{today}</p>
                    </div>
                    <div style="background: #ffffff; padding: 30px; border-radius: 0 0 8px 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.1);">
                        {newsletter_content_html}
                    </div>
                    <div style="text-align: center; padding: 20px; color: #666; font-size: 14px;">
                        <p>Generated on {today} by AI Newsletter Agent</p>
                    </div>
                </div>
                """

                send_gmail(subject, html_content)

In [None]:
use mmr 
do the pruning 
rewrite other after pruning
need to pass thinking effort
critic optimizer loop

In [None]:
input_text = headline_df.loc[headline_df["cat"]=="AI Business Value Gap"].sort_values("rating", ascending=False)[["rating", "short_summary", "site_name", "url"]].to_json(orient="records")
input_text


In [None]:
write_section_system_prompt, write_section_user_prompt, model = \
    LangfuseClient().get_prompt("newsagent/write_section")

write_section_agent = LLMagent(
    system_prompt=write_section_system_prompt,
    user_prompt=write_section_user_prompt,
    output_type=Section,
    model=model,
    verbose=True,
    logger=logger
)


In [None]:
response = await write_section_agent.run_prompt(input_text=input_text)
response

In [None]:
display(Markdown(str(response)))

In [None]:
class Link(BaseModel):
    url: str = Field(description="URL of the article")
    site_name: str = Field(description="Name of the website/source")
    def __str__(self):
        return f"[{self.site_name}]({self.url})"


class Story(BaseModel):
    headline: str = Field(description="Summary of the story")
    links: List[Link] = Field(description="List of links related to this story")
    prune: bool = Field(description="Whether to prune/exclude this story")
    def __str__(self):
        return "" if self.prune else f"- {self.headline} - " + " ".join([str(s) for s in self.links])


class Section(BaseModel):
    section_title: str = Field(description="Title of the newsletter section")
    headlines: List[Story] = Field(description="List of stories in this section")
    def __str__(self):
        return f"## {self.section_title}\n\n" + "\n".join(
            [str(h) for h in self.headlines]
        )
        

In [None]:
headline_df=state.headline_df
headline_df.loc[headline_df["cat"]=="AI Business Value Gap"].sort_values("rating", ascending=False)[["rating", "short_summary", "site_name", "url"]].to_json(orient="records")


In [None]:
print("# SUGGESTED TOPICS:")
catcount = headline_df.groupby("cat").count().reset_index()[['cat', 'source']].sort_values('source', ascending=False)
for c in catcount["cat"]:
    print(c)
print()
print("# RAW NEWS ITEMS:")
i =0
for row in headline_df.sort_values(["cat", "rating"], ascending=False).itertuples():
    print(f"[{row.title}]({row.url}) - {row.site_name}\n".replace("$","\\\\$"))
    row_topics = ", ".join(row.topics)
    print(f"Topics: {row_topics}\n".replace("$","\\\\$"))
    print(f"Rating: {row.rating:.1f}\n")    
    print(f"{row.short_summary}\n".replace("$","\\\\$"))
    print(f"{row.summary}\n".replace("$","\\\\$"))
    print("~~~\n")
    i +=1


In [None]:
headline_df['cat'].unique() 


In [None]:
astate.get_completed_steps() 



In [None]:
# User prompt to run workflow
user_prompt = "Show the workflow status"

print(f"\n📝 User prompt: '{user_prompt}'")
print("=" * 80)

# Run the agent with persistent state
start_time = time.time()
result = await agent.run_step(user_prompt)
duration = time.time() - start_time

print("=" * 80)
print(f"⏱️  Total execution time: {duration:.2f}s")
print(f"📊 Final result:")
print(result)

In [None]:
headline_df=state.headline_df
i =0
for row in headline_df.sort_values("rating", ascending=False).itertuples():
    display(Markdown(f"{row.rating:.1f}"))   
    display(Markdown(f"[{row.title}]({row.url}) - {row.site_name}".replace("$","\\\\$")))
    row_topics = ", ".join(row.topics)
    display(Markdown(f"Topics: {row_topics}".replace("$","\\\\$")))
    display(Markdown(f"{row.short_summary}".replace("$","\\\\$")))
    display(Markdown(f"{row.summary}".replace("$","\\\\$")))
    i +=1
#     if i>=30:
#         break

In [None]:
class DistilledStory(BaseModel):
    """DistilledStory class for structured output distillation into a single sentence """
    item: str = Field(description="List of StoryRating")
        
system, user, model = LangfuseClient().get_prompt("newsagent/item_distiller")

distill_agent = LLMagent(
            system_prompt=system,
            user_prompt=user,
            output_type=DistilledStory,
            model=model,
            verbose=False,
            logger=logger
        )

In [None]:
response = await distill_agent.run_prompt(input_text="""AI 'Homeless Man' Challenge Sparks Outrage as Police Called Over Dangerous Viral Trend - International Business Times

Topics: AI Pranks, Public Safety, Ethical Concerns, Disinformation, Policy And Regulation, Snapchat Challenges, Gen AI

Rating: 1.5

Topics: AI Pranks, Public Safety, Ethical Concerns, Disinformation, Policy And Regulation, Snapchat Challenges, Gen AI

Parents in shock after dangerous 'homeless man' Snapchat AI prank goes horribly wrong, police called to calm the viral chaos.

A viral AI-driven prank called the 'homeless man' challenge on Snapchat deceived parents into thinking a homeless person had broken into their home, prompting police intervention.
The prank caused significant public backlash due to the misuse of AI technology to create real emergency scares, leading to police questioning the pranksters and debates about the legal consequences.
Experts emphasize the ethical concerns and potential emotional harm caused by such digital pranks, highlighting the need for responsible use of AI to avoid wasting emergency resources and creating community panic.
""")
response

In [None]:
headline_df=state.headline_df
i =0
for row in headline_df.sort_values("rating", ascending=False).itertuples():
    print(f"[{row.title}]({row.url}) - {row.site_name}\n".replace("$","\\\\$"))
    row_topics = ", ".join(row.topics)
    print(f"Topics: {row_topics}\n".replace("$","\\\\$"))
    print(f"{row.short_summary}\n".replace("$","\\\\$"))
    print(f"{row.summary}\n".replace("$","\\\\$"))
    print("~~~\n")

    i +=1

TODO:
- update final prompt
- output sections using short summary
# SUGGESTED TOPICS
AI Agents And Reliability
AI Creative Industry Impact
AI Development Tools And Standards
AI Market Valuations
AI Phishing Surge
AI Security Risks
AI Workforce Impact
C2PA Image Provenance
Circular Deal Inflation
Cross-Industry AI Adoption
Crunch Lab Decentralized AI
Data Center Environmental Impact
Deepfake Video Ethics
Dell Raises AI Forecasts
EU AI Strategy
Enterprise AI Data Leakage
Enterprise AI Partnerships
Google Gemini 2.5
Healthcare AI Investments
OpenAI Platform Issues
Other
Qualcomm Acquires Arduino
SoftBank Acquires ABB Robotics
Youth Support AI Ethics

- take each summary and boil it down to 1 sentence , output correct format
- initial write sections - prompt and output json for each section asynchronously
- check and rewrite each section for format asynchronsously
- assemble sections
- do a critic loop