In [1]:
from typing import List, Dict, Any, TypedDict, Optional
from pm_market_getter import get_markets_for_date_range
from langchain.llms import OpenAI
from langchain.tools import Tool
from langchain.agents import AgentExecutor, OpenAIFunctionsAgent
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langgraph.graph import StateGraph, END
from langgraph.prebuilt import ToolInvocation
from operator import add
from typing import Annotated
from datetime import datetime, timedelta
from langgraph.prebuilt import ToolExecutor
import requests
import json
import re
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
import operator
from typing import  Annotated
from langgraph.constants import Send
import threading


For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [2]:
class Counter:
    def __init__(self):
        self._lock = threading.Lock()
        self._value = 0
    
    def get_next(self):
        with self._lock:
            self._value += 1
            return self._value

# Global counter instance
article_counter = Counter() 

class NewsArticles(BaseModel):
    title: str
    description: str
    source_name: str
class ArticleOutput(BaseModel):
    id: int = Field(default_factory=article_counter.get_next)
    headline: str
    subheader: str
    blurb: Optional[str] = Field(default=None)
    score: Optional[int] = Field(default=None)
    ticker: str = Field(default=None)
    links: List[str] = Field(default=None)

class MarketInfo(BaseModel):
    interest_score: float = Field(default=None)
    title: str = Field(default=None)
    ticker: str = Field(default=None)
    description: str = Field(default=None)
    end_date: str = Field(default=None)
    volume: float = Field(default=None)
    featured: float = Field(default=None)
    volume24hr: float = Field(default=None)
    commentCount: float = Field(default=None)
    options: List[Dict[str, Any]] = Field(default=None)
    tags: List[Dict[str, Any]] = Field(default=None)
    tavily_context: Annotated[list, operator.add] = Field(default=None)
    links: Annotated[list, operator.add] = Field(default=None)
    articles: List[NewsArticles] = Field(default_factory=list, description="Collected news articles")
    newsletter_content: List[ArticleOutput] = Field(default=None)
    skipped_newsletter_content: List[ArticleOutput] = Field(default=None)

class OverallGraph(BaseModel):
    markets_list: List[MarketInfo]
    newsletter_content: Annotated[List[ArticleOutput], operator.add] = Field(default=None)
    skipped_newsletter_content: Annotated[List[ArticleOutput], operator.add] = Field(default=None)
    article_groups: Dict[str, List[int]] = Field(
        default_factory=dict,
        description="Dictionary of lists, where each list contains ids of a group of articles. The string is the title of the group"
    )
    done: bool = Field(default = False)

class ExperimentState(TypedDict):
    market: dict  # The current market being processed
    article: dict  # The article being generated/refined
    previous_articles: List[dict]  # List of previously processed articles
    keep_article: bool

class SearchQuery(BaseModel):
    search_query: str = Field(None, description="Search query for retrieval.")

In [3]:
from dotenv import load_dotenv
import os
import sys
from pathlib import Path

# Use the specific path to the .env file
env_path = Path('/Users/ethangoldberg/Desktop/CS-Proj/PolyNewsDaily/PolyNewsDaily/.env')

# Print current working directory for debugging
current_dir = Path.cwd()
print(f"Current working directory: {current_dir}")

# Load the .env file from the specific location
if env_path.exists():
    print(f"Found .env file at: {env_path}")
    load_dotenv(env_path)
    env_loaded = True
else:
    print(f"Could not find .env file at: {env_path}")
    env_loaded = False

# Get environment variables
news_api_key = os.getenv('NEWS_API_KEY')
openai_api = os.getenv('OPENAI_API_KEY')
tavily_api_key = os.getenv('TAVILY_API_KEY')
smtp_pass = os.getenv('SMTP_PASS')

# Print values to debug
print(f"SMTP_PASS: {'Found' if smtp_pass else 'Not found'}")
print(f"NEWS_API_KEY: {'Found' if news_api_key else 'Not found'}")
print(f"OPENAI_API_KEY: {'Found' if openai_api else 'Not found'}")
print(f"TAVILY_API_KEY: {'Found' if tavily_api_key else 'Not found'}")


Current working directory: /Users/ethangoldberg/Desktop/CS-Proj/PolyNewsDaily/PolyNewsDaily/agent
Found .env file at: /Users/ethangoldberg/Desktop/CS-Proj/PolyNewsDaily/PolyNewsDaily/.env
SMTP_PASS: Found
NEWS_API_KEY: Not found
OPENAI_API_KEY: Found
TAVILY_API_KEY: Found


In [13]:
print('OPENAI_API_KEY:', 'Found' if openai_api else 'Not found')


OPENAI_API_KEY: Found


In [14]:
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
def generate_news_query(title, description) -> str:
    llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1)  # gpt-5 requires default temp=1
    
    prompt = f"""
    You are an expert in information retrieval and natural language processing. Your task is to analyze the title and description of a prediction market and generate optimal keywords for a news API search. These keywords should maximize the retrieval of relevant and informative news articles about the market's subject.
bn
    Given:
    - Title: {title}
    - Description: {description}

    Instructions:
    1. Carefully read and understand the title and description.
    2. Identify the core subject matter, key entities, and central concepts.
    3. Consider potential synonyms, related terms, and broader categories that might yield relevant results.
    4. Avoid overly generic terms that might lead to irrelevant results.
    5. Include any specific dates, events, or proper nouns mentioned that are crucial to the market.
    6. Consider the timeframe of the prediction, if specified, and how it might affect keyword relevance.
    7. Aim for a balance between specificity (to ensure relevance) and breadth (to capture a range of potentially useful articles).
    8. Generate 1-3 keyword phrases, separated by commas if more than one.
    9. Each keyword should always be ONE word. There are exceptions for when two or three words are closely linked like "Donald Trump" is two words but would be allowed.

    Output your result in the following format. DO NOT say any other words or give any other explanation.
    Keywords: [Your generated keywords]
    """
    
    response = llm.invoke([HumanMessage(content=prompt)])
    
    # Extract keywords from the response
    keywords_line = [line for line in response.content.split('\n') if line.startswith('Keywords:')][0]
    keywords = keywords_line.split(':', 1)[1].strip()
    keywords_array = [item.strip() for item in keywords.split(',')]
    return keywords_array

In [15]:
os.environ["TAVILY_API_KEY"] = tavily_api_key
tavily_getter = TavilySearchResults(max_results=10)

In [16]:
def tavily_search(state: MarketInfo):
    llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1)
    search_instructions = """You will be given the title, description, and end_date for a polymarket prediction market. 

Your goal is to generate a well-structured query for use in retrieval and / or web-search related to the market to provide context to the market. For example, there may be a market on whether Taylor Swift gets married in 2024. A good search query might be "Taylor Swift Pregnancy Rumors 2024".
        
First, analyze the market.

Pay particular attention to the title, description, and dates for the market.

Convert this final question into a well-structured web search query that will give important context to understand it.

Title : {title}          

Description : {description}

End Date : {end_date}
                                        """
    structured_llm = llm.with_structured_output(SearchQuery)
    system_message = search_instructions.format(title=state.title, description=state.description, end_date = state.end_date)
    search_query = structured_llm.invoke([SystemMessage(content=system_message)])

    search_docs = tavily_getter.invoke(search_query.search_query)

    if isinstance(search_docs, str):
        print("Error: Expected a list of documents but got a string.")
        print("Response content:", search_docs)
        return {"tavily_context": [], "links": []} 

    formatted_search_docs = "\n\n---\n\n".join(
        [
            f'<Document href="{doc["url"]}"/>\n{doc["content"]}\n</Document>'
            for doc in search_docs
        ]
    )
    links = [doc["url"] for doc in search_docs]
    print(links)
    return {"tavily_context": [formatted_search_docs],
            "links": links} 


In [17]:
def call_news_api(state: MarketInfo) -> MarketInfo:
    keywords = generate_news_query(state.title, state.description)
    print(keywords)
    today = datetime.now().date()
    from_date = (today - timedelta(days=3)).isoformat()
    to_date = today.isoformat()
    url = "https://newsapi.org/v2/everything"
    for keyword in keywords:
        params = {
            "from": from_date,
            "to": to_date,
            "apiKey": news_api_key,
            "country" : "us",
            "q" : keyword,
            "pageSize" : 2
        }
        response = requests.get(url, params=params)
        data = response.json()
        articles = data.get("articles", [])
        for article in articles:
            article_info =  NewsArticles(
            title=article['title'],
            description=article['description'],
            source_name=article['source']['name']
        )
            state.articles.append(article_info)

    return state    


In [18]:
class market_key_info(BaseModel):
    what_is_the_market_asking_or_answering: str = Field(defualt=None, description= "this should be a synposis of what this market is asking or answering")
    important_criteria: List[str] = Field(default_factory=List, description="a list of strings that describe important critera for a market and how it is resolved. includes timeline")
    completed: bool = Field(default=None, description="boolean indicating whether or not a market is essentially resolved. If the probability of all the options adds up to 0 or if the probability of any of the options is about 100, it is essentially guaranteed to already have happened or will happen")
    important_insights_context_or_reasoning : str = Field(default=None, description= "summary of important context or insights that could be related to this market. if there is important context from the sources from the internet regarding a probability or a change")

In [19]:
class article_score(BaseModel):
    score: int = Field(default=5, descrption="score for the article 0-10")

In [20]:
llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1, parallel_tool_calls=False)

                parallel_tool_calls was transferred to model_kwargs.
                Please confirm that parallel_tool_calls is what you intended.
  if await self.run_code(code, result, async_=asy):


In [None]:
def generate_newsletter_content(market: MarketInfo):
    """
    Single-pass pipeline:
      1) LLM extracts & normalizes facts directly from full MarketInfo (including description, options, tavily_context, articles).
      2) LLM writes headline / subheader / blurb using those facts + an internally suggested angle.
    No post-generation checks. No outside references. No multi-variant generation.
    """
    import json
    import re

    # ---- 0) Prep ----------------------------------------------------------------
    full_context = {
        "title": market.title,
        "ticker": market.ticker,
        "description": market.description,
        "end_date": market.end_date,
        "volume": market.volume,
        "featured": market.featured,
        "volume24hr": market.volume24hr,
        "commentCount": market.commentCount,
        "options": market.options,
        "tags": market.tags,
        "tavily_context": market.tavily_context,
        "links": market.links,
        "articles": [a.dict() for a in market.articles] if market.articles else [],
    }

    # ---- 1) Extract & normalize facts ------------------------------------------
    extractor_llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1)  # colder & consistent

    extraction_prompt = (
        "You are a precise fact extractor for a prediction-market newsletter. Extract ONLY from the JSON provided.\n"
        "Return STRICT JSON (no prose). If unknown, use null or [].\n\n"
        "Schema:\n"
        "{\n"
        '  "what_market_asks": "one sentence synopsis of the question, plain",\n'
        '  "timebox": "short human-readable end_date or \'unknown\'",\n'
        '  "criteria_bullets": ["1–3 bullets from description with concrete resolution/timing details"],\n'
        '  "primary_outcome": "Yes/No or option name or null",\n'
        '  "primary_probability_pct": 0,\n'
        '  "one_day_change": null,\n'
        '  "secondary_outcomes": [{"outcome":"name","probability_pct":0}],\n'
        '  "context_bullets": ["0–3 short items from tavily_context or articles"],\n'
        '  "salient_numbers": ["e.g., Volume: $X, 24h: $Y"],\n'
        '  "subject": "primary named entity (company/person/government) that the market is about, from title/description",\n'
        '  "counterparty": "the other side in deals (acquirer/target, team/opponent, bill/chamber), if present; else null",\n'
        '  "deal_object": "what is being acquired/approved/passed (e.g., \'Acme Corp\', \'BetaCo assets\') or null",\n'
        '  "action_verb": "canonical verb for the event (e.g., finalize, approve, pass, launch)",\n'
        '  "display_subject": "If subject and counterparty exist, format \'Subject–Counterparty\'; else subject",\n'
        '  "stance": "one of: likely | unlikely | knife-edge (derive from primary_probability_pct: >=60 likely, <=40 unlikely, else knife-edge)",\n'
        '  "angle_suggestion": "one of: clock | momentum | divergence | criteria"\n'
        "}\n\n"
        "Use ONLY this MarketInfo JSON:\n"
        + json.dumps(full_context, ensure_ascii=False)
    )

    extraction_resp = extractor_llm.invoke([HumanMessage(content=extraction_prompt)])
    try:
        facts = json.loads(extraction_resp.content)
    except Exception:
        facts = {
            "what_market_asks": market.title or "Unknown",
            "timebox": "unknown",
            "criteria_bullets": [],
            "primary_outcome": None,
            "primary_probability_pct": None,
            "one_day_change": None,
            "secondary_outcomes": [],
            "context_bullets": [],
            "salient_numbers": [],
            "subject": market.title or None,
            "counterparty": None,
            "deal_object": None,
            "action_verb": None,
            "display_subject": market.title or "This market",
            "stance": "knife-edge",
            "angle_suggestion": "clock",
        }

    # ---- 2) Write headline / subheader / blurb --------------------------------
    # One-pass writer with strong, compact, in-prompt constraints and few-shots.
    writer_llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1)

    few_shot_examples = (
        "EXAMPLE 1\n"
        "FACTS:\n"
        '- display_subject: "Acme–BetaCo"\n'
        '- action_verb: "finalize"\n'
        '- stance: "unlikely"\n'
        '- timebox: "by Sunday 11:59 pm ET"\n'
        '- primary_probability_pct: 32\n'
        '- one_day_change: -6.0\n'
        '- criteria_bullets: ["Resolves on public completion notice before deadline"]\n'
        '- context_bullets: ["Regulator flagged data issues"]\n'
        "OUTPUT:\n"
        "**Acme–BetaCo unlikely to finalize by Sunday**\n"
        "Resolution requires a public completion notice; reviewer flagged issues.\n"
        "Price sits near one-in-three after a 6-point drop. Watch for a formal notice or an extension filing before the deadline.\n\n"
        "EXAMPLE 2\n"
        "FACTS:\n"
        '- display_subject: "Country A central bank"\n'
        '- action_verb: "cut"\n'
        '- stance: "likely"\n'
        '- timebox: "this week"\n'
        '- primary_probability_pct: 72\n'
        '- one_day_change: +9.0\n'
        '- criteria_bullets: ["Resolves on decision statement"]\n'
        '- context_bullets: ["Inflation cooled"]\n'
        "OUTPUT:\n"
        "**Rate cut likely this week for Country A**\n"
        "Outcome hinges on the decision statement; softer inflation supports a move.\n"
        "Odds rose 9 points to 72%. Watch wording and any split votes."
    )

    writer_prompt = (
        "You are a newsroom editor. Use ONLY the FACTS JSON. No outside info.\n\n"
        "HEADLINE RULES:\n"
        "- 6–12 words; active; no questions.\n"
        "- Must include display_subject (exact) and a stance word that matches stance:\n"
        "  likely → “likely/on track”; unlikely → “unlikely/at risk”; knife-edge → “too close to call/split odds”.\n"
        "- If timebox is known, include a time cue (e.g., “by Friday”, “this week”).\n"
        "- If action_verb exists, state it (finalize/approve/pass/etc.).\n"
        "- Avoid generic nouns alone (e.g., “acquisition” without whose acquisition).\n\n"
        "SUBHEADER RULES:\n"
        "- One line; must add *why this matters* (criteria, trigger, or decisive context), not repeat headline nouns.\n"
        "- Prefer criteria if present; otherwise a single strongest context bullet.\n\n"
        "BLURB RULES (≤3 short paragraphs, 12–18 words per sentence):\n"
        "1) Lead sentence: subject + action + timebox/criteria.\n"
        "2) Second: numeric specifics (probability and 24h change) and one concrete fact.\n"
        "3) Third: what to watch next (a document, docket, meeting, or deadline).\n"
        "No filler; newsroom tone; write for a general reader.\n\n"
        "FACTS:\n" + json.dumps(facts, ensure_ascii=False) + "\n\n"
        "Format exactly:\n"
        "**[HEADLINE]**\n"
        "[SUBHEADER]\n"
        "[BLURB]"
    )

    writer_resp = writer_llm.invoke([HumanMessage(content=writer_prompt)])
    final_text = writer_resp.content


    # ---- 3) Parse into ArticleOutput ------------------------------------------
    # Robust parsing:
    # - Headline is the first non-empty line, ideally wrapped in **...**; strip the asterisks if present.
    # - Subheader is the next non-empty line.
    # - Blurb is everything after the subheader (preserve original newlines), trimmed of leading blank lines.
    lines = final_text.splitlines()

    # Helper to find next non-empty index from start index (inclusive)
    def next_non_empty(idx):
        while idx < len(lines) and lines[idx].strip() == "":
            idx += 1
        return idx

    i = next_non_empty(0)
    if i >= len(lines):
        # Fallback: if formatting is unexpected, put everything in blurb
        headline = (market.title or "Market update").strip()
        subheader = "Automated summary"
        blurb_text = final_text.strip()
    else:
        raw_headline = lines[i].strip()
        # Strip bold markers if present at both ends
        m = re.match(r"^\*\*(.+)\*\*$", raw_headline)
        headline = m.group(1).strip() if m else raw_headline.strip("* ").strip()
        # Find subheader
        j = next_non_empty(i + 1)
        if j >= len(lines):
            subheader = "Automated summary"
            blurb_text = ""
        else:
            subheader = lines[j].strip()
            # Blurb is everything after j
            k = j + 1
            # Trim leading blank lines in blurb
            while k < len(lines) and lines[k].strip() == "":
                k += 1
            blurb_text = "\n".join(lines[k:]).strip()

    article = ArticleOutput(
        headline=headline,
        subheader=subheader,
        blurb=blurb_text if blurb_text else None,
        score=None,
        ticker=market.ticker,
        links=market.links or []
    )

    # ---- 4) Return structure ---------------------------------------------------
    # Generate and add image_url
    image_url = generate_and_upload_image(article.headline, article.blurb)
    article.image_url = image_url

    return {"newsletter_content": [article]}


Test passed. Headline: Widget Corp likely to finalize its acquisition by June 30, 2024
Subheader: Resolves on official confirmation by June 30; any delay means No.
Blurb: Widget Corp aims to finalize its acquisition by June 30, 2024, per market criteria.

Odds sit at 80%, up 2.5 points in 24 hours, with the CEO saying on track.

Watch for official confirmation before June 30; any delay beyond that date resolves to No.
Ticker: WIDGET-Q2-MERGER
Links: ['https://widgetnews.com/merger', 'https://finance.example.com/widget']


In [None]:
import os
import cloudinary
import cloudinary.uploader
from openai import OpenAI

# Configure Cloudinary (use your credentials)
cloudinary.config(
    cloud_name=os.getenv('CLOUDINARY_CLOUD_NAME'),
    api_key=os.getenv('CLOUDINARY_API_KEY'),
    api_secret=os.getenv('CLOUDINARY_API_SECRET')
)

# OpenAI client
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def generate_and_upload_image(headline, blurb):
    try:
        # Generate image with DALL-E
        response = openai_client.images.generate(
            model="dall-e-3",
            prompt=f"A realistic news-style image for: {headline}. {blurb[:100]}",  # Truncate blurb if needed
            size="1024x1024",
            quality="standard",
            n=1
        )
        image_url = response.data[0].url
        
        # Upload to Cloudinary
        upload_result = cloudinary.uploader.upload(image_url)
        return upload_result['secure_url']
    except Exception as e:
        print(f"Error generating/uploading image: {e}")
        return None


In [28]:
def continue_to_markets(state: OverallGraph):
    return [Send("parallel_article_writing", market) for market in state.markets_list]

In [29]:
from typing import List, Dict, Optional
from pydantic import BaseModel, Field
from dataclasses import dataclass
import math
class ArticleGroup(BaseModel):
    title: str = Field(..., description="Concise, 1–5 words, Title Case, no emojis, no dates")
    subtitle: Optional[str] = Field(
        default=None,
        description="Optional 1-sentence descriptor clarifying the theme; omit if unnecessary"
    )
    article_ids: List[int] = Field(
        ...,
        description="IDs of the articles in this group, 1–8 items, each ID used exactly once globally"
    )

class ArticleGroups(BaseModel):
    thinking: Optional[str] = Field(
        default=None,
        description="Short note on how the groupings and titles were derived"
    )
    groups: List[ArticleGroup] = Field(
        ...,
        description="Ordered list of groups; order will be used in the newsletter"
    )


In [30]:
def enumerate_articles(articles):
    return "\n".join(f"{article}" for article in articles)
import math
def generate_groups(state: OverallGraph):
    llm = ChatOpenAI(model="gpt-5", api_key=openai_api,seed=42, temperature=1)
    structured_llm = llm.with_structured_output(ArticleGroups)

    formatted_articles = "\n".join([f"{a.id}: {a.headline}" for a in state.newsletter_content])
    N = len(state.newsletter_content)
    num_cats_min = max(2, math.ceil(N/8))
    num_cats_max = min(6, math.ceil(N/3))
    group_min = 2 if N < 9 else 3
    system_rules = f"""
You are an expert news curator and section editor. Group related articles and name the sections.

HARD REQUIREMENTS (do not violate):
- Use ONLY the provided integer IDs; do not invent or skip any.
- Use EVERY ID exactly once across all groups; no duplicates, no leftovers.
- Each group MUST contain at least 2 articles (inclusive).
- Output MUST follow the provided JSON schema exactly (no extra keys).
- Titles: 2–4 words, Title Case, no emojis, no quotes, no dates, no source names.
- Avoid near-duplicate titles across groups; make them distinct.
- If a group’s theme is obvious, you may omit the subtitle; otherwise, include a short clarifier.

Editorial guidance:
- Prefer thematic cohesion (topic, actor, geography, sector, or consequence).
- Balance group sizes; if one group becomes too large, split it along a clear subtheme.
- For naming, aim for compact, newsroom-style labels (e.g., "US Politics", "Global Economy", "AI & Chips", "Climate Policy").
- Order groups from broad/global to specific/niche when reasonable.

Return ONLY valid structured output for the ArticleGroups schema. No extra explanation outside the JSON.
""".strip()
    example = {
        "thinking": "Clustered by geopolitical, domestic policy, tech/AI, and culture themes.",
        "groups": [
            {
                "title": "Global Affairs",
                "subtitle": "Conflicts, diplomacy, and macro shocks shaping international dynamics.",
                "article_ids": [1, 4, 7]
            },
            {
                "title": "US Politics",
                "subtitle": "Elections, legislation, and federal policy shifts.",
                "article_ids": [2, 5, 8]
            },
            {
                "title": "Culture & Media",
                "subtitle": None,
                "article_ids": [3, 6, 9]
            }
        ]
    }
    messages = [
        SystemMessage(content=system_rules + "\n\nExample (illustrative only):\n" + str(example)),
        HumanMessage(content=f"Articles (id: headline):\n{formatted_articles}\n")
    ]
    result: ArticleGroups = structured_llm.invoke(messages)
    print("result: ", result)
    return {"article_groups": result.groups}

def send_redundant_groups(state : OverallGraph):
    if state.done == True:
        print("ending")
        print("state before ending: ", state)
        return END
    list_of_lists_of_articles =[]
    for redundant_group in state.redundant_groups:
        articles_idx_individual = [state.newsletter_content[index] for index in redundant_group]
        list_of_lists_of_articles.append(articles_idx_individual)
    print(list_of_lists_of_articles)
    return [Send("combine_articles", articles) for articles in list_of_lists_of_articles]
def combine_articles(articles):
    print('fixing this redundant group')
    llm = ChatOpenAI(model="gpt-5", api_key=openai_api, temperature=1)
    structured_llm = llm.with_structured_output(ArticleOutput)
    prompt = f"""
    You are a skilled AI editor tasked with condensing multiple prediction market newsletter articles into a single, comprehensive article. Your goal is to distill the most crucial information from each input article while maintaining the engaging style and format of the original pieces.
    Input:
    You will receive a list of newsletter articles, each following this structure:
    Headline: [Attention-grabbing headline]
    Subheader: [Supporting subheader adding context]
    Blurb: [2-3 sentence paragraph expanding on the headline and subheader]
    Instructions:

    Analyze all input articles, identifying the most significant and intriguing information from each.
    Create a new, overarching headline that captures the essence of the combined articles.
    Craft a subheader that provides additional context for the main headline.
    Write a concise blurb that synthesizes the key points from all input articles. This blurb should:

    Highlight the most interesting prediction market probabilities and movements.
    Mention any relevant time frames or end dates for the markets.
    Include any notable contrasts with traditional news reporting, if applicable.
    Preserve the confident, slightly irreverent tone of the original articles.


    Ensure your writing is engaging and assumes an audience familiar with prediction markets.
    Be as concise as possible while encoding the maximum amount of relevant information.

    Your output should follow this format:
    Headline: [Your new, overarching headline]
    Subheader: [A supporting subheader that adds context]
    Blurb: [A 3-4 sentence paragraph synthesizing key information from all input articles]
    Remember to use the exact formatting with the bold markers (**) as shown above.
    Now, using the provided input articles, generate a single, comprehensive newsletter article that captures the most crucial information while maintaining the engaging style and format of the original pieces.
    Here are the articles:
    {articles}"""
    response = structured_llm.invoke([SystemMessage(content=prompt)])
    print("made this new article out of ", len(articles), " articles with the headline: ", response.headline)
     
    return {"newsletter_content" : [ArticleOutput(
            headline=response.headline,
            subheader=response.subheader,
            blurb=response.blurb
        )]}
def remove_redundants(state: OverallGraph):
    print("removing redundants")
    flat_indices = set([index for sublist in state.redundant_groups for index in sublist])
    # Remove items from each sublist
    state.newsletter_content = [item for i, item in enumerate(state.newsletter_content) if i not in flat_indices]
    return state

In [31]:
def check_if_done(state : OverallGraph):
    if state.done == True:
        return END
    else:
        return 'combine_similar'
    
def deduplicate_articles(state: OverallGraph):
    return [Send("parallel_article_writing", market) for market in state.markets_list]

In [32]:
# Define the graph
from IPython.display import Image, display
def define_workflow():
    workflow = StateGraph(MarketInfo)
    
    workflow.add_node("get_context", tavily_search)
    workflow.add_node("generate_newsletter", generate_newsletter_content)
    
    workflow.set_entry_point("get_context")
    workflow.add_edge("get_context", "generate_newsletter")
    workflow.add_edge("generate_newsletter", END)

    parallel_workflow = StateGraph(OverallGraph)

    parallel_workflow.add_node("parallel_article_writing", workflow.compile())
    parallel_workflow.set_conditional_entry_point(continue_to_markets, ["parallel_article_writing"])
    parallel_workflow.add_node("generate_groups", generate_groups)
    parallel_workflow.add_edge("parallel_article_writing", "generate_groups")
    # parallel_workflow.add_node("combine_articles", combine_articles)
    # parallel_workflow.add_conditional_edges("generate_groups", send_redundant_groups, ["combine_articles", END])
    # parallel_workflow.add_node("remove_redundants", remove_redundants)
    # parallel_workflow.add_edge("combine_articles", "remove_redundants")
    parallel_workflow.add_edge("generate_groups", END)
    react_graph = parallel_workflow.compile()
    


    

    # Show
    #display(Image(react_graph.get_graph(xray=True).draw_mermaid_png()))
    
    return react_graph

In [33]:
def articles_wrapper(days_in_past):
    market_info_list = []
    graph = define_workflow()
    articles = []
    full_info = []
    markets = get_markets_for_date_range(days_in_past=days_in_past, limit=75)
    excluded_tags = ["Crypto", "Weather", "Mentions", "Sports"]
    filtered_markets = [market for market in markets if not any(tag['label'] in excluded_tags for tag in market.get('tags', []))]
    sorted_markets_by_interest = sorted(
    {market['title']: market for market in filtered_markets if market.get('interest_score', 0) > 100}.values(),
    key=lambda x: x.get('interest_score', 0),
    reverse=True
)
    print("len of markets post score cutoff: ", len(sorted_markets_by_interest))
    for market in sorted_markets_by_interest:
        market_info_example = MarketInfo(**market)
        # market_end_result = app.invoke(market_info_example)
        market_info_list.append(market_info_example)
        # if market_end_result['newsletter_content']:
        #     articles.append(market_end_result['newsletter_content'])
        #     full_info.append(market_end_result)
    print("number of markets going into the graph: ",len(market_info_list))
    markets_model = OverallGraph(markets_list=market_info_list)
    result = graph.invoke(markets_model)
    
    return result

In [34]:
result = articles_wrapper(1)


len of markets post score cutoff:  7
number of markets going into the graph:  7
['https://www.polygon.com/awards/546327/game-of-the-year-game-awards-2025-frontrunners/', 'https://www.imdb.com/list/ls590425749/', 'https://www.youtube.com/watch?v=jpLUdgffcLU', 'https://thegameawards.com/', 'https://www.youtube.com/watch?v=xWhXiNXf4dk', 'https://duniagames.co.id/discover/article/the-game-awards-2025-goty-predictions-who-will-take-the-crown/en', 'https://www.thegamer.com/goty-game-of-the-year-2025-predictions-unsure-silksong-expedition-33/', 'https://spritecell.com/mp8-the-game-awards/', 'https://www.reddit.com/r/TheGamerLounge/comments/1o0kzrr/the_game_awards_kick_off_on_december_11_whos_your/', 'https://steamcommunity.com/discussions/forum/0/592912124322331194/']
['https://www.kiplinger.com/investing/when-is-the-next-fed-meeting', 'https://www.bankrate.com/banking/federal-reserve/fomc-what-to-expect/', 'https://growbeansprout.com/tools/fedwatch', 'https://www.federalreserve.gov/monetaryp

In [19]:
newsletter_content =  [ArticleOutput(id=23, headline='Iran Military Action Against Israel Likely in 2024', subheader='Prediction market signals 58% chance of Iran-initiated conflict', blurb="The prediction market now assigns a 58% probability to Iran launching a military action against Israel by the end of 2024. This slight uptick in probability highlights the ever-present tensions between the two nations and reflects traders' concerns about potential escalation. With active trading volumes, the market underscores the significance of geopolitical developments in the Middle East.", score=5, ticker='another-iran-strike-on-israel-in-2024', links=['nothing.com']),
  ArticleOutput(id=29, headline='Kamala Harris Faces 8.45% Odds for Exact 270 Electoral Votes', subheader="Prediction market highlights challenging path through 'blue wall' states", blurb="As the 2024 U.S. Presidential Election approaches, prediction markets give Kamala Harris an 8.45% chance of securing exactly 270 electoral votes by winning a specified set of states, including the pivotal 'blue wall' states of Michigan, Pennsylvania, and Wisconsin. Despite a slight increase in confidence, market skepticism remains high. The outcome depends on her ability to navigate shifting political dynamics and voter turnout in these key regions.", score=6, ticker='kamala-wins-with-blue-wall-mi-pa-wi-270-evs', links=['nothing.com']),
  ArticleOutput(id=13, headline='Israel Withdrawal from Lebanon Faces Uncertainty', subheader="Market shows 42% chance of Israel's withdrawal by November 30", blurb='The prediction market suggests a 42% likelihood that Israel will announce a full withdrawal from Lebanon before the end of November 2024. This probability has decreased by 12.5% recently, reflecting growing skepticism amid ongoing regional tensions. Despite the challenges, the outcome remains closely watched as geopolitical dynamics continue to evolve.', score=5, ticker='israel-withdraws-from-lebanon-before-december', links=['nothing.com']),
  ArticleOutput(id=28, headline='Ethereum Unlikely to Hit $2,600 by November 1', subheader='Market shows low confidence in Ethereum price surge', blurb="With a mere 10.5% probability, the prediction market indicates skepticism about Ethereum reaching $2,600 by noon ET on November 1, 2024. The slight 0.6% drop in confidence over the last day further emphasizes a cautious sentiment among traders. As the resolution date approaches, investors should stay vigilant of any market shifts that could impact Ethereum's trajectory.", score=6, ticker='ethereum-above-2600-on-november-1', links=['nothing.com']),
  ArticleOutput(id=26, headline='Solana Faces Uncertainty for $170 Mark by November 2024', subheader="Crypto Traders Show Mixed Sentiment on Solana's Future", blurb="As of now, Solana's chances of trading above $170 on November 1, 2024, stand at a moderate 43.5%. The slight decrease in probability over the past day highlights growing uncertainty in the market, possibly due to recent crypto trends or news affecting investor confidence. With the volatile nature of cryptocurrencies like Solana, the final outcome remains unpredictable, making this market a captivating watch for crypto enthusiasts.", score=5, ticker='solana-above-170-on-november-1', links=['nothing.com']),
  ArticleOutput(id=32, headline='Trump Albuquerque Rally Highlights China, Biden Amid Election Campaign', subheader="Prediction market expects Trump to focus on China, Biden, and 'Garbage' in rally", blurb="As Donald Trump prepares for his Albuquerque rally, prediction markets are buzzing with expectations. Traders overwhelmingly predict he will spotlight issues like China and Biden, with probabilities soaring to 99.95%. This aligns with his ongoing narrative as he gears up for the 2024 presidential election campaign. Meanwhile, topics such as 'Native American' and 'Puerto Rico' are seen as unlikely to feature prominently, reflecting a strategic focus in his messaging.", score=7, ticker='what-will-trump-say-during-albuquerque-nm-rally', links=['nothing.com']),
  ArticleOutput(id=14, headline='Bitcoin eyes record high before US election', subheader='Market predicts 30% chance amidst price decline', blurb='As the US presidential election approaches, Bitcoin faces a potential new all-time high, with markets assigning a 30% probability of surpassing $73,777 by November 4, 2024. This prediction comes after a notable 33.5% drop in probability, suggesting shifting sentiments. With cryptocurrency dynamics intertwined with political events, investors should brace for potential volatility.', score=6, ticker='bitcoin-new-all-time-high-before-election', links=['nothing.com']),
  ArticleOutput(id=27, headline='Bitcoin poised to surpass $68000 by November 1', subheader='Market predicts strong bullish sentiment despite slight dip', blurb="The Polymarket prediction indicates a 92% likelihood that Bitcoin will exceed $68,000 by November 1, 2024, according to Binance's trading data. This optimistic forecast reflects a robust bullish sentiment in the crypto market, even though the probability dipped slightly by 1.85% recently. Investors are banking on historical trends and favorable market conditions to push Bitcoin past this significant price threshold.", score=8, ticker='bitcoin-above-68000-on-november-1', links=['nothing.com']),
  ArticleOutput(id=22, headline='Bitcoin unlikely to hit $80k before election', subheader='Market skepticism grows as probability drops to 6%', blurb='The Polymarket prediction market indicates a mere 6% chance of Bitcoin reaching $80,000 before the U.S. election. This skepticism is reflected in a recent drop in probability, suggesting traders doubt significant price surges amidst current market conditions. Factors such as market sentiment and potential regulatory changes could be influencing this outlook.', score=6, ticker='bitcoin-hits-80k-before-election', links=['nothing.com']),
  ArticleOutput(id=30, headline="Trump Rally Likely Features 'Women's Sports' and 'Pocahontas'", subheader='Prediction markets signal key phrases for Nevada event', blurb="As former President Donald Trump prepares for his rally in Henderson, Nevada, prediction markets are buzzing with expectations of what he might say. Top contenders include 'Women's Sports' and 'Pocahontas,' each with a high probability of being mentioned. These phrases align with Trump's previous campaign themes, suggesting they will resonate with his audience as he gears up for the 2024 election. Meanwhile, terms like 'Crypto/Bitcoin' and 'Tampon' are far less likely, reflecting a strategic focus on traditional issues.", score=5, ticker='what-will-trump-say-during-nevada-rally-oct-31', links=['nothing.com']),
  ArticleOutput(id=2, headline='Trump Faces Steep Challenge in Florida', subheader='Prediction Market Shows Declining Confidence in 12-point Margin', blurb="As the 2024 U.S. Presidential Election approaches, the Polymarket prediction market indicates a mere 20.5% chance that Donald Trump will secure Florida by a 12-point margin, a key battleground state. This reflects a notable decrease in confidence, with probabilities dropping 0.11 in just one day. With Florida's pivotal role in the election, this shift highlights the uncertainty and fierce competition expected in the upcoming race.", score=6, ticker='will-trump-win-florida-by-12-points', links=['nothing.com']),
  ArticleOutput(id=20, headline='Texans Face Tight Odds Against Jets', subheader='Market Predicts Competitive Matchup in Upcoming NFL Game', blurb='As the Houston Texans prepare to face the New York Jets on October 31, prediction markets reflect a closely contested game with the Texans holding a 45.5% chance of victory. Recent trading activity shows a slight dip in confidence for the Texans, dropping 2% in probability. With the game still days away, factors like player health and team dynamics could sway the odds further.', score=5, ticker='nfl-hou-nyj-2024-10-31', links=['nothing.com']),
  ArticleOutput(id=19, headline='Harris Expected to Win New Jersey by 10% Margin', subheader="Prediction market shows strong confidence in Harris's lead", blurb='With over a year until the 2024 U.S. Presidential Election, prediction markets show a 79.5% probability that Kamala Harris will secure a 10% or greater margin of victory in New Jersey. This reflects strong confidence in her candidacy despite the potential for political developments to influence public opinion before the election date. The market will remain open as it awaits the official vote count and certification in New Jersey.', score=8, ticker='will-harris-win-new-jersey-by-10-points', links=['nothing.com']),
  ArticleOutput(id=4, headline='Trump faces uphill battle in Rust Belt sweep', subheader='Prediction market shows 26% chance for Trump victory in key states', blurb='As the 2024 US Presidential Election approaches, prediction markets indicate a challenging path for Donald Trump in the Rust Belt swing states of Wisconsin, Michigan, and Pennsylvania. With only a 26% probability of sweeping these crucial states, the market reflects skepticism about his ability to replicate past successes. This development is critical, as these states often play a pivotal role in election outcomes. Stay tuned as the dynamics evolve closer to election day.', score=6, ticker='trump-rust-belt-swing-state-sweep', links=['nothing.com']),
  ArticleOutput(id=18, headline='Trump Faces Uphill Battle in Iowa Poll', subheader="Prediction Market Shows Low Confidence in Trump's Dominance", blurb="As the Iowa 2024 presidential election approaches, the prediction market shows only a 13.5% probability that Donald Trump will lead by a 12% margin or more in Anne Selzer's final poll. Despite a slight 0.5% increase in confidence, market participants remain skeptical of Trump's ability to secure a commanding lead. This reflects a cautious stance among observers, potentially due to recent campaign dynamics or shifts in voter opinion.", score=6, ticker='trump-12-in-selzer-iowa-poll', links=['nothing.com']),
  ArticleOutput(id=25, headline='Chase Oliver Leads 2024 Third-Party Race', subheader='Prediction market shows Oliver ahead of Stein and RFK Jr.', blurb='In the latest insights from Polymarket, Chase Oliver emerges as the frontrunner among third-party candidates for the 2024 US Presidential election, holding a 38% probability of winning the most votes. Jill Stein follows closely with 33%, while RFK Jr. has a 27% chance. Interestingly, all candidates have experienced a dip in their probabilities, indicating recent shifts in voter sentiment or polling data. Despite their presence, Cornel West, Vermin Supreme, and other candidates are unlikely to make a significant impact, as reflected in their low probabilities.', score=7, ticker='which-3rd-party-candidate-wins-most-votes', links=['nothing.com'])],
article_groups = {'Middle East Politics': [23, 13],
  'US Election Predictions': [29, 32, 2, 19, 4, 18, 25],
  'Cryptocurrency Forecasts': [28, 26, 14, 27, 22],
  'Trump Rallies': [30, 4, 18],
  'Sports and Betting': [20]}

In [35]:
articles = result["newsletter_content"]
# "vikmanocha@gmail.com", "jameswmarren@gmail.com"
email_list = ["ethanagoldberg@gmail.com"]
groups = result["article_groups"]
articles


[ArticleOutput(id=2, headline='Federal Reserve funds-rate change too close to call by March 18, 2026', subheader='Resolves to basis-point change in the upper bound, rounded to nearest 25, per FOMC statement.', blurb='The Federal Reserve decision to change the upper bound resolves by March 18, 2026 under stated criteria.\nNo change leads at 55.5% probability, down 0.01 in 24 hours, with 332 comments posted.\nWatch the FOMC statement released after the March 17–18 meeting for the definitive resolution.', score=None, ticker='fed-decision-in-march-885', links=['https://www.kiplinger.com/investing/when-is-the-next-fed-meeting', 'https://www.bankrate.com/banking/federal-reserve/fomc-what-to-expect/', 'https://growbeansprout.com/tools/fedwatch', 'https://www.federalreserve.gov/monetarypolicy/fomcminutes20250917.htm', 'https://www.aa.com.tr/en/economy/us-federal-reserve-expected-to-continue-easing-cycle-in-2026-experts/3727598', 'https://www.federalreserve.gov/monetarypolicy/fomcminutes2025061

In [36]:
from newsletter_sender import NewsletterSender, test_smtp_connection
from typing import List
smtp_config = {
    "host": "smtp.gmail.com",
    "port": 587,
    "secure": True,
    "auth": {
        "user": "polynewsdailynewsletter@gmail.com",  # Your full Gmail address
        "pass": smtp_pass    # The 16-character app password you generated
    },
    "from": '"PolyNewsDaily Update" <polynewsdailynewsletter@gmail.com>'  # Use your Gmail address here too
}

In [37]:
group_dict = {group.title: group.article_ids for group in groups}

In [23]:
if test_smtp_connection(smtp_config):
    print("Connection successful! Sending test newsletter...")
    
    # Create sender and send newsletter
    sender = NewsletterSender()
    email_list = sender.get_subscriber_emails()
    if email_list:
        save_result = sender.save_newsletter_to_db(articles, groups)
        results = sender.send_newsletter(smtp_config, email_list, articles, groups)
    else:
        email_list = ["ethanagoldberg@gmail.com"]
        save_result = sender.save_newsletter_to_db(articles, groups)
        results = sender.send_newsletter(smtp_config, email_list, articles, groups)

✅ SMTP connection successful!
Connection successful! Sending test newsletter...
Connecting to: postgres://u8tpitnkehg4bm@c1i13pt05ja4ag.cluster-czrs8kj4isg7.us-east-1.rds.amazonaws.com:5432/d8714o8m60rj7k


Found 45 subscribers
Connecting to: postgres://u8tpitnkehg4bm@c1i13pt05ja4ag.cluster-czrs8kj4isg7.us-east-1.rds.amazonaws.com:5432/d8714o8m60rj7k



    Database summary:
    - Articles: 10
    - Groups: 4
    - Group-Article associations: 13
            
Newsletter saved to database successfully


Sending batch 1/1


Successfully sent to: lgoldbergcourt@gmail.com


Successfully sent to: vikmanocha@gmail.com


Successfully sent to: zglassband@gmail.com


Successfully sent to: elijg235@gmai.com


Successfully sent to: charlie.oestreicher@gmail.com


Successfully sent to: ethan.goldberg.pike@gmail.com


Successfully sent to: max.huber@gmail.com


Successfully sent to: elijg235@gmail.com


Successfully sent to: noah.costa.pike@gmail.com


Successfully sent to: leonicastro3@gmail.com


Successfully sent to: rcolon1331@yahoo.com


Successfully sent to: joe.zakielarz.pike@gmail.com


Successfully sent to: echristianr13@gmail.com


Successfully sent to: sftorres56@gmail.com


Successfully sent to: dxstinyai@gmail.com


Successfully sent to: ethanagoldberg@gmail.com


Successfully sent to: perooo88+poly@gmail.com


Successfully sent to: sanjayginde@gmail.com


Successfully sent to: lgrzybowska212@gmail.com


Successfully sent to: sao2162002@gmail.com


Successfully sent to: kristenw@nyu.edu


Successfully sent to: vinson1900@hey.com


Successfully sent to: henry.yang@yale.edu


Successfully sent to: emmawj@protonmail.com


Successfully sent to: snovik@gmail.com


Successfully sent to: bv@vufund.vc


Successfully sent to: christianmaxwellwright@gmail.com


Successfully sent to: enternetdesign@gmail.com


Successfully sent to: jsuss.101@gmail.com


Successfully sent to: ethanagoldberg1@gmail.com


Successfully sent to: leekk980816@gmail.com


Successfully sent to: trey@hewell.net


Successfully sent to: abiyslee1011@gmail.com


Successfully sent to: andrew.keys@thetimes.co.uk


Successfully sent to: com231562@gmail.com


Successfully sent to: angieyvonnegoldberg@gmail.com


Successfully sent to: ban46291@gmail.com


Successfully sent to: francesco@siliconroundabout.ventures


Successfully sent to: seanjfis@gmail.com


Successfully sent to: sophiecmunro@icloud.com


Successfully sent to: dylanhurwitz@gmail.com


Successfully sent to: maniya.chowdhary21@gmail.com


Successfully sent to: billweica@yahoo.com


Successfully sent to: mli0118999@gmail.com


Successfully sent to: maxwellyoas@gmail.com


In [43]:
sender = NewsletterSender()
email_list = ['ethanagoldberg@gmail.com']
if email_list:
    save_result = sender.save_newsletter_to_db(articles, group_dict)
    results = sender.send_newsletter(smtp_config, email_list, articles, group_dict)



Connecting to database...

    Database summary:
    - Articles: 7
    - Groups: 3
    - Group-Article associations: 7
            
Newsletter saved to database successfully
Error sending newsletter: '>' not supported between instances of 'NoneType' and 'int'


In [41]:
email_list


['ethanagoldberg@gmail.com']