# Module 3: LLM Headline Generation

Specify the week of meetings to process.

In [None]:
#### EDIT THIS
MONDAY_DATE = "YYYYMMDD"
FRIDAY_DATE = "YYYYMMDD"
#### EDIT THIS

In [2]:
from datetime import datetime
WEEK = (MONDAY_DATE, FRIDAY_DATE)
START_DATE = datetime.strptime(WEEK[0], "%Y%m%d")
END_DATE = datetime.strptime(WEEK[1], "%Y%m%d")

Define LLM prompt for headline generation from combined segment

In [None]:
import time
import pandas as pd
import anthropic
from openai import OpenAI
import google.generativeai as genai
from pathlib import Path
import os

  from .autonotebook import tqdm as notebook_tqdm


The agenda segments folder `_interim/agenda_segments/` should contain a `.csv` file for each meeting containing a combined segment for each agenda topic. 

In [None]:
AGENDA_SEGMENTS_PATH = Path("../_interim/agenda_segments/")
assert AGENDA_SEGMENTS_PATH.exists()

Function to build the headline generating prompt for each combined segment.

In [4]:
def build_headline_prompt(combined_segment: str) -> str:
    return f"""
You are a local government reporter covering city council meetings.

You will receive:
- A section from a **city council meeting agenda** (note: this may be vague or generic)
- A section from the related **official legislation**
- A section from the **meeting transcript**

Your task is to write a **clear, one-sentence headline** that:
- Focuses on the **most newsworthy action or decision**
- Summarizes what the **council actually did**, proposed, debated, or approved
- Highlights **specific outcomes**, impacts, or controversial statements
- Is written at an **eighth-grade reading level**
- Contains **no commentary** or extra background

Do *not* copy or paraphrase the agenda title. Use the transcript and legislation instead.

---
{combined_segment}
Headline:
"""

Load API keys and set up clients.

In [None]:
# load API keys
claude_key = os.getenv("CLAUDE_KEY")
openai_key = os.getenv("GEMINI_KEY")
gemini_key = os.getenv("OPENAI_KEY")

# setup API clients
claude_client = anthropic.Anthropic(api_key=claude_key)
openai_client = OpenAI(api_key=openai_key)
genai.configure(api_key=gemini_key)



# helper functions to prompt LLMs
def generate_headline_claude(prompt):
    response = claude_client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=64,
        temperature=0,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.content[0].text.strip()

def generate_headline_gemini(prompt):
    model = genai.GenerativeModel("gemini-2.5-pro")
    return model.generate_content(prompt).text.strip()

def generate_headline_openai(prompt):
    response = openai_client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=64
    )
    return response.choices[0].message.content.strip()

For each combined segment, prompt each of the three LLMs (Claude Sonnet 4, Gemini 2.5 Pro, and GPT-4.1 mini) to generate a headline.

In [None]:
LLM_CODES = {"C": "claude", "G": "gemini", "O": "openai"}


for llm_code, llm_name in LLM_CODES.items():
    for aseg_file in AGENDA_SEGMENTS_PATH.glob("*.csv"):

        # check if meeting took place in specified week
        date_str = aseg_file.stem[:8]
        try:
            file_date = datetime.strptime(date_str, "%Y%m%d")
        except ValueError:
            # skip, meeting does not valid date
            continue
        if file_date < START_DATE or file_date > END_DATE:
            # skip, meeting did not take place in specified week
            continue


        df = pd.read_csv(aseg_file)
        # create columns if missing
        if f"{llm_name}_headline" not in df.columns:
            df[f"{llm_name}_headline"] = "NO_HEADLINE"

        for idx, row in df.iterrows():
            combined_segment = row.get("combined_segment", "")
            if combined_segment == "NO_SEGMENT":
                continue
            if row[f"{llm_name}_headline"] != "NO_HEADLINE":
                continue

            try:
                if llm_name == "claude":
                    headline = generate_headline_claude(combined_segment)
                elif llm_name == "gemini":
                    headline = generate_headline_gemini(combined_segment)
                elif llm_name == "openai":
                    headline = generate_headline_openai(combined_segment)
                
                time.sleep(10)

                df.at[idx, f"{llm_name}_headline"] = headline

            except Exception as e:
                print(f"!!!! ERROR GENERATING ROW {idx}: {e}")
                continue

        df.to_csv(aseg_file, index=False)

This concludes Module 3: LLM Headline Generation.