# Chapter 8 Guide 

## 8.1

In [1]:
#cell 1
import requests  #A
import pandas as pd  
import logging  
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta  
pd.set_option("display.max_colwidth", None)

load_dotenv()

NEWS_API_KEY = os.getenv("NEWS_API_KEY")  #B

# Dynamic date calculation: today minus one day  
today = datetime.now().date()  #C  
yesterday = today - timedelta(days=1)  

# Function to extract articles from NewsAPI  
def extract_articles(query, from_date=yesterday, api_key=NEWS_API_KEY):  
    url = f'https://newsapi.org/v2/everything?q={query}&from={from_date}&to={today}&apiKey={api_key}'  #D
    response = requests.get(url)  
    
    if response.status_code == 200:  
        articles = response.json().get('articles', [])  #E
        logging.info(f"Successfully extracted {len(articles)} articles.")  
        return articles  
    else:  
        logging.error(f"Failed to fetch articles. Status code: {response.status_code}")  
        return []  

# Example use case  
articles = extract_articles('Tesla')  #F

# Build a DataFrame with one row per article and full JSON blob
df = pd.DataFrame({'article': articles})  #G
df



Unnamed: 0,article
0,"{'source': {'id': None, 'name': 'Journal du geek'}, 'author': 'Olivier', 'title': 'Comme Tesla, Xpeng veut prouver qu’une voiture peut se passer du lidar', 'description': 'Le constructeur chinois Xpeng mise tout sur la vision et l'IA pour ses véhicules autonomes, abandonnant de fait les coûteux capteurs laser. Une approche radicale, inspirée par Tesla, mais qui suscite encore pas mal de doutes.', 'url': 'https://www.journaldugeek.com/2025/10/11/comme-tesla-xpeng-veut-prouver-quune-voiture-peut-se-passer-du-lidar/', 'urlToImage': 'https://www.journaldugeek.com/app/uploads/2025/10/xpeng-p7-1600x900.jpg', 'publishedAt': '2025-10-11T14:02:33Z', 'content': 'Xpeng a confirmé un changement technologique d’importance durant le salon IAA Mobility 2025 de Munich : fini les lidars dans les voitures autonomes ! Ces capteurs laser, jugés essentiels par la plupa… [+2690 chars]'}"
1,"{'source': {'id': None, 'name': 'Caschys Blog'}, 'author': 'Felix Frank', 'title': 'Tesla: Neues Software-Update bringt 3D-Gebäude in die Navigation und mehr', 'description': 'Tesla hat mit dem Model 3 Standard und dem Model Y Standard neue Einsteigermodelle vorgestellt. Letztgenanntes Model Y Standard ist zu einem Preis ab 39.990 Euro ab sofort auch hierzulande erhältlich. Neuerungen gibt es aber auch für Bestandsfahrzeuge mit dem…', 'url': 'https://stadt-bremerhaven.de/tesla-neues-software-update-bringt-3d-gebaeude-in-die-navigation-und-mehr/', 'urlToImage': 'https://stadt-bremerhaven.de/wp-content/uploads/2024/07/Tesla-Model-3-2023.jpeg', 'publishedAt': '2025-10-11T09:00:09Z', 'content': 'Tesla hat mit dem Model 3 Standard und dem Model Y Standard neue Einsteigermodelle vorgestellt. Letztgenanntes Model Y Standard ist zu einem Preis ab 39.990 Euro ab sofort auch hierzulande erhältlich… [+3305 chars]'}"
2,"{'source': {'id': None, 'name': 'Frandroid'}, 'author': 'Jean-Baptiste Passieux', 'title': 'Nouvelles Tesla abordables, une Xiaomi vraiment autonome et usines chinoises en Europe – Récap’ Survoltés', 'description': 'Entre polémiques sur les Tesla low-cost, les déploiements industriels géants et les bidouilles qui changent tout, l’actualité électrisée de cette semaine a de quoi faire réagir. Place au grand écart de la mobilité numérique et durable, dans ce nouveau Récap' …', 'url': 'https://www.frandroid.com/survoltes/2827515_recap-survoltes-202541', 'urlToImage': 'https://c0.lestechnophiles.com/images.frandroid.com/wp-content/uploads/2025/10/g23xywuw0aaisa7.jpeg?resize=1600,900&key=c7805b99&watermark', 'publishedAt': '2025-10-11T18:02:00Z', 'content': 'Entre polémiques sur les Tesla low-cost, les déploiements industriels géants et les bidouilles qui changent tout, l’actualité électrisée de cette semaine a de quoi faire réagir. Place au grand écart … [+3878 chars]'}"
3,"{'source': {'id': None, 'name': 'Gizmodo.jp'}, 'author': '岡本玄介', 'title': 'テスラの半額くらいで買えそうなルーマニア出身の軽バンEV', 'description': 'Image:DACIA機能性もデザインも良くて、テスラの半額。電動自動車のTESLA（テスラ）をはじめ、EVの市販化は割と最近の流行でクルマ好きには気になる存在かと思います。でも新しいモノだけにお高いので、なかなか手が出せないんですよね。ダチア知ってる？ ルーマニアの大衆車のイメージが強く、今はルノーグループ傘下のDACIA（ダチア）。彼らは購買層がEVのに求めているものをリサーチし、まったく新し', 'url': 'https://www.gizmodo.jp/2025/10/dacia-hipster-concept.html', 'urlToImage': 'https://media.loom-app.com/gizmodo/dist/images/2025/10/08/251009_dacia.jpg?w=1280&h=630&f=jpg', 'publishedAt': '2025-10-11T02:00:00Z', 'content': 'TESLAEV DACIAEVDACIA HIPSTER CONCEPT Video: YOUCAR/YouTube 3m4 SF EV 70L500L Image: DACIA Bluetooth DACIA HIPSTER CONCEPT15000260EVDACIA HIPSTER CONCEPT EVModel 3530Model Y560DACIA HIPSTER … [+48 chars]'}"
4,"{'source': {'id': None, 'name': 'Guessingheadlights.com'}, 'author': 'Olivia Richman', 'title': 'The Most Expensive Cars and Trucks Made in America', 'description': 'When you think of luxury vehicles, your mind might wander to Italian exotics or German engineering. But some of the priciest rides on the road are actually...', 'url': 'https://guessingheadlights.com/most-expensive-cars-and-trucks-from-us/', 'urlToImage': 'https://s.yimg.com/ny/api/res/1.2/7phYapDW1vstvfUVBS6bKw--/YXBwaWQ9aGlnaGxhbmRlcjt3PTEyMDA7aD02NzU-/https://media.zenfs.com/en/guessing_headlights_178/46ec365c303edd28b75d13f597634dbb', 'publishedAt': '2025-10-11T13:00:21Z', 'content': 'When you think of luxury vehicles, your mind might wander to Italian exotics or German engineering. But some of the priciest rides on the road are actually built right here in the United States. From… [+5926 chars]'}"
...,...
94,"{'source': {'id': None, 'name': 'Novinky.cz'}, 'author': 'Milan Lažanský, Novinky, ČTK', 'title': 'Před rokem představila Tesla samořízené taxi Cybercab, na silnice zatím nevyjelo', 'description': 'Přesně před rokem, 11. října 2024, představil šéf americké automobilky Tesla Elon Musk na akci pro investory samořízené taxi Cybercab. „Myslím, že náklady na autonomní dopravu budou tak nízké, že si ji můžete představit jako individualizovanou hromadnou dopra…', 'url': 'https://www.novinky.cz/clanek/auto-pred-rokem-predstavila-tesla-samorizene-taxi-cybercab-na-silnice-zatim-nevyjelo-40543238', 'urlToImage': 'https://d15-a.sdn.cz/d_15/c_img_m1_A/nO6B5UBUaPvURsody/tesla-cybercab?fl=cro,0,213,4096,2304%7Cres,1200,,1%7Cwebp,75', 'publishedAt': '2025-10-11T10:05:00Z', 'content': 'Pedstavené vozy, které bhem akce pepravovaly návtvníky areálu, mly dvoje dvee otevírané nahoru, ale nemly ádné kliky, protoe se dvee otevíraly automaticky. Chyblo jim i zadní okno a boní zrcátka. Mus… [+1129 chars]'}"
95,"{'source': {'id': 'the-times-of-india', 'name': 'The Times of India'}, 'author': 'Reuters', 'title': 'Crypto race to tokenize stocks raises investor protection flags', 'description': 'Crypto firms are racing to launch stock-pegged tokens, sparking concerns among financial giants and regulators. While proponents tout 24/7 trading and instant settlement, critics warn of investor risks and market fragmentation. Many tokens lack traditional ri…', 'url': 'https://economictimes.indiatimes.com/markets/cryptocurrency/crypto-news/crypto-race-to-tokenize-stocks-raises-investor-protection-flags/articleshow/124471348.cms', 'urlToImage': 'https://img.etimg.com/thumb/msid-124471366,width-1200,height-630,imgsize-196688,overlay-etmarkets/articleshow.jpg', 'publishedAt': '2025-10-11T06:28:41Z', 'content': 'A race by crypto companies to sell tokens pegged to stocks is raising alarm bells among traditional financial firms and regulatory experts who warn that the fast-growing novel products pose risks to … [+5919 chars]'}"
96,"{'source': {'id': None, 'name': 'Naftemporiki.gr'}, 'author': 'Νατάσα Στασινού', 'title': 'Η ανάρτηση του Τραμπ που στοίχισε 2 τρισ. δολάρια – Και τι περιμένουμε τη Δευτέρα', 'description': 'Ήταν Παρασκευή πρωί, και ο S&P 500 απείχε μόλις λίγα σημεία από ακόμη ένα ιστορικό υψηλό. Μέσα σε λίγα λεπτά, όλα ανετράπησαν. Μια ανάρτηση 500 λέξεων του Ντόναλντ Τραμπ στο… Η ανάρτηση του Τραμπ που στοίχισε 2 τρισ. δολάρια – Και τι περιμένουμε τη Δευτέρα - …', 'url': 'https://www.naftemporiki.gr/finance/markets/2018617/i-anartisi-toy-tramp-poy-stoichise-2-tris-dolaria-kai-ti-perimenoyme-ti-deytera/', 'urlToImage': 'https://www.naftemporiki.gr/wp-content/uploads/2025/08/ipa_tramp_apografi_metanastes.jpg', 'publishedAt': '2025-10-11T16:24:09Z', 'content': ', S&amp;P 500 . , .  500 Truth Social 2 . .  « » , « » «» , .  -, Wall Street, : « ».  .  Bespoke Investment Group, . Nasdaq 3,56%, , Dow Jones 879 (-1,9%).  500 S&amp;P, 424 , funds .  33 «»… [+383 chars]'}"
97,"{'source': {'id': 'the-times-of-india', 'name': 'The Times of India'}, 'author': 'ETtech', 'title': 'Apple eyeing top talent, technology from visual intelligence startup Prompt AI: Report', 'description': 'According to a CNBC report, employees who chose not to join Apple would receive lower pay and were urged to apply for open roles at the company. The startup told its employees about the pending deal at an all-hands meeting on Thursday, saying those not joinin…', 'url': 'https://economictimes.indiatimes.com/tech/artificial-intelligence/apple-eyeing-top-talent-technology-from-visual-intelligence-startup-prompt-ai-report/articleshow/124479848.cms', 'urlToImage': 'https://img.etimg.com/thumb/width-1200,height-900,imgsize-23722,resizemode-75,msid-124479848/tech/artificial-intelligence/apple-eyeing-top-talent-technology-from-visual-intelligence-startup-prompt-ai-report.jpg', 'publishedAt': '2025-10-11T13:24:09Z', 'content': 'Apple is in advanced stages of discussions to acquire top talent and technology from computer vision startupPrompt AI, CNBC reported Friday.The startup told its employees about the pending deal at an… [+3004 chars]'}"


## 8.2

In [2]:
import os
import logging
import openai
import pandas as pd
from dotenv import load_dotenv
from pydantic import BaseModel

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

class ExtractedArticle(BaseModel):
    source: str
    title: str
    short_summary: str
    publish_date: str

system_prompt = f"""
You are a data extraction agent. For each input article JSON, return a single object matching this schema:
{ExtractedArticle.schema_json(indent=2)}

Use the raw JSON to guide extraction with natural language hints:
- source: use article['source']['name'] when present.
- title: use article['title'].
- short_summary: 1–2 sentences summarizing the article in plain English.
- publish_date: use article['publishedAt'] (ISO-8601 timestamp).

Return exactly one object that matches the schema.
""".strip()

# Sentiment agent (adapted from news_api_tsla_full_pipeline)
def perform_sentiment_analysis(text: str):
    prompt = (
        "Analyze the sentiment of the following text and return a numerical sentiment "
        "score from -1 (very negative) to 1 (very positive). Return only the number: "
        f"{text}"
    )
    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,
            temperature=0.3
        )
        sentiment_str = response.choices[0].message.content.strip()
        return float(sentiment_str)
    except Exception as e:
        logging.error(f"Error performing sentiment analysis: {e}")
        return None

results = []
input_articles = articles  # from prior cell

# Limit for quick iteration; adjust/remove as needed
for idx, article in enumerate(input_articles[:5]):
    try:
        completion = openai.beta.chat.completions.parse(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"{article}"}
            ],
            response_format=ExtractedArticle
        )
        parsed = completion.choices[0].message.parsed
        if parsed:
            item = parsed.dict()
            item["sentiment"] = perform_sentiment_analysis(item["short_summary"])  # agent call
            results.append(item)
    except Exception as e:
        print(f"Error on article {idx}: {e}")

extracted_df = pd.DataFrame(results)
extracted_df


/var/folders/h0/ckkxq40s70vc524w2v0_myw00000gp/T/ipykernel_2109/3097910003.py:19: PydanticDeprecatedSince20: The `schema_json` method is deprecated; use `model_json_schema` and json.dumps instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  {ExtractedArticle.schema_json(indent=2)}
/var/folders/h0/ckkxq40s70vc524w2v0_myw00000gp/T/ipykernel_2109/3097910003.py:69: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  item = parsed.dict()


Unnamed: 0,source,title,short_summary,publish_date,sentiment
0,Journal du geek,"Comme Tesla, Xpeng veut prouver qu’une voiture peut se passer du lidar","Xpeng, a Chinese car manufacturer, is following Tesla's lead by relying on vision and AI for autonomous vehicles, instead of using expensive laser sensors. This bold move is unconventional and raises skepticism within the industry.",2025-10-11T14:02:33Z,0.1
1,Caschys Blog,Tesla: Neues Software-Update bringt 3D-Gebäude in die Navigation und mehr,"Tesla has introduced new entry-level models for the Model 3 Standard and Model Y Standard, with the latter available in the market at a starting price of 39,990 Euro. Additionally, Tesla has released a new software update for existing vehicles that includes 3D buildings in navigation features.",2025-10-11T09:00:09Z,0.5
2,Frandroid,"Nouvelles Tesla abordables, une Xiaomi vraiment autonome et usines chinoises en Europe – Récap’ Survoltés","The article discusses recent developments in the electric vehicle and technology industry, including affordable Tesla models, autonomous Xiaomi projects, and the expansion of Chinese factories in Europe. These topics highlight ongoing trends and changes in sustainable mobility.",2025-10-11T18:02:00Z,0.5
3,Gizmodo.jp,テスラの半額くらいで買えそうなルーマニア出身の軽バンEV,"The article discusses the Dacia Hipster Concept, an affordable electric vehicle from Romania that costs about half the price of a Tesla. The Dacia Hipster is positioned as a functional and well-designed option for those interested in purchasing an EV at a lower cost.",2025-10-11T02:00:00Z,0.8
4,Guessingheadlights.com,The Most Expensive Cars and Trucks Made in America,"The article explores luxury vehicles that are manufactured in the United States, highlighting how some of the most expensive cars and trucks on the road are made domestically rather than overseas.",2025-10-11T13:00:21Z,0.2


## 8.3

In [3]:
import logging
import openai
import pandas as pd
from pydantic import BaseModel

class QualityCategorization(BaseModel):
    short_date: str            # YYYY-MM-DD (no timezone)
    publish_est: str           # ISO-8601 datetime in America/New_York
    publish_pst: str           # ISO-8601 datetime in America/Los_Angeles
    publish_gmt: str           # ISO-8601 datetime in GMT/UTC (+00:00)
    topic: str                 # One of: Financial, Operations, Product/Technology, Regulatory/Legal, Market/Competition, Executive/Personnel, Strategy/M&A, Customers/Partnerships, Supply Chain/Manufacturing, ESG/Sustainability, Risk/Incidents, Marketing/PR
    region: str                # One of: North America, South America, Europe, Africa, Middle East, Asia, Oceania

qc_system_prompt = f"""
You are a data quality and categorization agent. For each input article, return a single object matching this schema:
{QualityCategorization.schema_json(indent=2)}

Instructions:
- short_date: Derive from the input publish_date by dropping time and timezone, format as YYYY-MM-DD.
- publish_est / publish_pst / publish_gmt: Convert the input publish_date to the specified timezone and return ISO-8601 (include timezone offset). Use the original timestamp as ground truth. Do not guess.
- topic: Choose the best label from [Financial, Operations, Product/Technology, Regulatory/Legal, Market/Competition, Executive/Personnel, Strategy/M&A, Customers/Partnerships, Supply Chain/Manufacturing, ESG/Sustainability, Risk/Incidents, Marketing/PR]. If none is perfect, pick the closest and be consistent.
- region: Infer using language cues, source, and content (country/city mentions). Map to one of:
  [North America, South America, Europe, Africa, Middle East, Asia, Oceania]. Always use exactly these labels.
- Return strictly valid JSON with exactly these keys and no extra text.
""".strip()

qc_results = []

for idx, row in extracted_df.iterrows():
    article_input = {
        "source": row.get("source", ""),
        "title": row.get("title", ""),
        "short_summary": row.get("short_summary", ""),
        "publish_date": row.get("publish_date", "")
    }
    try:
        completion = openai.beta.chat.completions.parse(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": qc_system_prompt},
                {"role": "user", "content": f"{article_input}"}
            ],
            response_format=QualityCategorization
        )
        parsed = completion.choices[0].message.parsed
        if parsed:
            qc_results.append(parsed.dict())
        else:
            qc_results.append({
                "short_date": "",
                "publish_est": "",
                "publish_pst": "",
                "publish_gmt": "",
                "topic": "",
                "region": ""
            })
    except Exception as e:
        logging.error(f"QC error on row {idx}: {e}")
        qc_results.append({
            "short_date": "",
            "publish_est": "",
            "publish_pst": "",
            "publish_gmt": "",
            "topic": "",
            "region": ""
        })

qc_df = pd.DataFrame(qc_results)
enriched_df = pd.concat([extracted_df.reset_index(drop=True), qc_df], axis=1)
enriched_df


/var/folders/h0/ckkxq40s70vc524w2v0_myw00000gp/T/ipykernel_2109/1513746606.py:16: PydanticDeprecatedSince20: The `schema_json` method is deprecated; use `model_json_schema` and json.dumps instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  {QualityCategorization.schema_json(indent=2)}
/var/folders/h0/ckkxq40s70vc524w2v0_myw00000gp/T/ipykernel_2109/1513746606.py:47: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  qc_results.append(parsed.dict())


Unnamed: 0,source,title,short_summary,publish_date,sentiment,short_date,publish_est,publish_pst,publish_gmt,topic,region
0,Journal du geek,"Comme Tesla, Xpeng veut prouver qu’une voiture peut se passer du lidar","Xpeng, a Chinese car manufacturer, is following Tesla's lead by relying on vision and AI for autonomous vehicles, instead of using expensive laser sensors. This bold move is unconventional and raises skepticism within the industry.",2025-10-11T14:02:33Z,0.1,2025-10-11,2025-10-11T10:02:33-04:00,2025-10-11T07:02:33-07:00,2025-10-11T14:02:33Z,Product/Technology,Asia
1,Caschys Blog,Tesla: Neues Software-Update bringt 3D-Gebäude in die Navigation und mehr,"Tesla has introduced new entry-level models for the Model 3 Standard and Model Y Standard, with the latter available in the market at a starting price of 39,990 Euro. Additionally, Tesla has released a new software update for existing vehicles that includes 3D buildings in navigation features.",2025-10-11T09:00:09Z,0.5,2025-10-11,2025-10-11T05:00:09-04:00,2025-10-11T02:00:09-07:00,2025-10-11T09:00:09+00:00,Product/Technology,Europe
2,Frandroid,"Nouvelles Tesla abordables, une Xiaomi vraiment autonome et usines chinoises en Europe – Récap’ Survoltés","The article discusses recent developments in the electric vehicle and technology industry, including affordable Tesla models, autonomous Xiaomi projects, and the expansion of Chinese factories in Europe. These topics highlight ongoing trends and changes in sustainable mobility.",2025-10-11T18:02:00Z,0.5,2025-10-11,2025-10-11T14:02:00-04:00,2025-10-11T11:02:00-07:00,2025-10-11T18:02:00+00:00,Product/Technology,Europe
3,Gizmodo.jp,テスラの半額くらいで買えそうなルーマニア出身の軽バンEV,"The article discusses the Dacia Hipster Concept, an affordable electric vehicle from Romania that costs about half the price of a Tesla. The Dacia Hipster is positioned as a functional and well-designed option for those interested in purchasing an EV at a lower cost.",2025-10-11T02:00:00Z,0.8,2025-10-11,2025-10-10T22:00:00-04:00,2025-10-10T19:00:00-07:00,2025-10-11T02:00:00+00:00,Product/Technology,Europe
4,Guessingheadlights.com,The Most Expensive Cars and Trucks Made in America,"The article explores luxury vehicles that are manufactured in the United States, highlighting how some of the most expensive cars and trucks on the road are made domestically rather than overseas.",2025-10-11T13:00:21Z,0.2,2025-10-11,2025-10-11T09:00:21-04:00,2025-10-11T06:00:21-07:00,2025-10-11T13:00:21Z,Supply Chain/Manufacturing,North America


# 8.4 + 8.5

In [7]:
import os
import json
import logging
import pandas as pd
import openai
from pydantic import BaseModel

# Ensure psycopg is available
import sys, subprocess
try:
    import psycopg
except Exception:
    subprocess.run([sys.executable, "-m", "pip", "install", "psycopg[binary]>=3.1"], check=False)
    import psycopg

openai.api_key = os.getenv("OPENAI_API_KEY")

# Pydantic for DDL contract
class TableDDL(BaseModel):
    ddl: str  # CREATE TABLE ... statement only

# Model-friendly schema of enriched_df
sample_fields = {
    "source": "text",
    "title": "text",
    "short_summary": "text",
    "publish_date": "timestamptz",
    "sentiment": "numeric",
    "short_date": "date",
    "publish_est": "timestamptz",
    "publish_pst": "timestamptz",
    "publish_gmt": "timestamptz",
    "topic": "text",
    "region": "text"
}

# Compose prompt to generate DDL
ddl_prompt = f"""
You are a SQL DDL assistant. Return only a single valid PostgreSQL CREATE TABLE statement for table name news_articles.
Use these columns and suggested types. Adjust types conservatively if needed, add NOT NULL only if obviously safe.
Columns:
{json.dumps(sample_fields, indent=2)}

Rules:
- Include a surrogate primary key id BIGSERIAL PRIMARY KEY.
- Add created_at TIMESTAMPTZ DEFAULT NOW().
- Use snake_case column names exactly as provided.
- Return strictly the SQL, no comments or extra text.
""".strip()

# Ask AI for DDL
completion = openai.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": ddl_prompt},
        {"role": "user", "content": "Generate the DDL now."}
    ],
    response_format=TableDDL
)
TABLE_DDL = completion.choices[0].message.parsed.ddl
print(TABLE_DDL)

# Connect to Postgres
conn = psycopg.connect(
    host=os.getenv("PGHOST", "localhost"),
    port=os.getenv("PGPORT", "5432"),
    dbname=os.getenv("PGDATABASE", "news_db"),
    user=os.getenv("PGUSER", "news_user"),
    password=os.getenv("PGPASSWORD", "")
)

# Create table if not exists (idempotent):
with conn.cursor() as cur:
    try:
        cur.execute(TABLE_DDL)
    except Exception as e:
        # If table already exists, ignore
        msg = str(e).lower()
        if "already exists" not in msg:
            raise
conn.commit()

# Prepare insert (upsert optional)
cols = [
    "source", "title", "short_summary", "publish_date", "sentiment",
    "short_date", "publish_est", "publish_pst", "publish_gmt", "topic", "region"
]

placeholders = ",".join(["%s"] * len(cols))
insert_sql = f"INSERT INTO news_articles ({','.join(cols)}) VALUES ({placeholders})"

# Convert dataframe rows to tuples
rows = []
for _, r in enriched_df.iterrows():
    rows.append(tuple(r.get(c) for c in cols))

# Batch insert
with conn.cursor() as cur:
    if rows:
        cur.executemany(insert_sql, rows)
        print(f"Inserted {len(rows)} rows into news_articles")
    else:
        print("No rows to insert")
conn.commit()

conn.close()



CREATE TABLE news_articles (
    id BIGSERIAL PRIMARY KEY,
    source TEXT,
    title TEXT,
    short_summary TEXT,
    publish_date TIMESTAMPTZ,
    sentiment NUMERIC,
    short_date DATE,
    publish_est TIMESTAMPTZ,
    publish_pst TIMESTAMPTZ,
    publish_gmt TIMESTAMPTZ,
    topic TEXT,
    region TEXT,
    created_at TIMESTAMPTZ DEFAULT NOW()
);
Inserted 5 rows into news_articles


# Optional Verification

In [None]:
import os
import pandas as pd

# Reuse psycopg from previous cell (installed if needed there)
import psycopg

conn = psycopg.connect(
    host=os.getenv("PGHOST", "localhost"),
    port=os.getenv("PGPORT", "5432"),
    dbname=os.getenv("PGDATABASE", "news_db"),
    user=os.getenv("PGUSER", "news_user"),
    password=os.getenv("PGPASSWORD", "")
)

# Show total rows
with conn.cursor() as cur:
    cur.execute("SELECT COUNT(*) FROM news_articles;")
    total_rows = cur.fetchone()[0]
print(f"news_articles rows: {total_rows}")

# Preview last 5 rows
with conn.cursor() as cur:
    cur.execute(
        """
        SELECT id, source, title, publish_date, topic, region, sentiment, created_at
        FROM news_articles
        ORDER BY id DESC
        LIMIT 5;
        """
    )
    rows = cur.fetchall()
    cols = [c[0] for c in cur.description]

conn.close()

pd.DataFrame(rows, columns=cols)
