# *Imports*

In [1]:
import os, json, math, time, hashlib
import numpy as np
import pandas as pd
import requests
from pinecone import Pinecone, ServerlessSpec
import os
from dotenv import load_dotenv
import textwrap
load_dotenv()

True

# *Configurations*

In [2]:
# Chunking restrictions
MAX_TOKENS = 1024 # [512, 1024, 2048]
MAX_OVERLAP_RATIO = 0.2 # [0.05, 0.15, 0.25]
EMBED_DIMS = 1536

OVERLAP_WORDS = int(MAX_TOKENS * MAX_OVERLAP_RATIO)
START_WITH_N_TALKS = 30  # set None to do all data

# Output artifact files (cached embeddings)
OUT_META_JSONL = "ted_chunks_meta.jsonl"
OUT_EMB_NPY = "ted_chunks_embeds.npy"
OUT_IDMAP_JSON = "ted_chunks_idmap.json"  # to avoid duplicates across runs

# API keys
LLMOD_API_KEY = os.getenv("LLMOD_API_KEY")  # or paste here: "sk-...."
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
pc = Pinecone(api_key=PINECONE_API_KEY)
INDEX_NAME = "ted"

HEADERS = {"Authorization": f"Bearer {LLMOD_API_KEY}", "Content-Type": "application/json"}

In [3]:
# LLMOD API settings
BASE_URL = "https://api.llmod.ai/v1"  
EMBED_MODEL = "RPRTHPB-text-embedding-3-small"
EMBED_DIMS = 1536

In [4]:
# ====== RAG parameters ======
CHAT_MODEL = "RPRTHPB-gpt-5-mini"

# Must choose & report (per assignment)
RAG_CHUNK_SIZE_TOKENS = MAX_TOKENS          # 2048 (max allowed)
RAG_OVERLAP_RATIO = MAX_OVERLAP_RATIO       # 0.30 (max allowed)
RAG_TOP_K = 2                               # <= 30 (tune: 5-12 often good)

RETRIEVE_INCLUDE_TEXT = True  # we stored snippet in metadata["text"]

REQUIRED_SYSTEM_PROMPT = """You are a TED Talk assistant that answers questions strictly and
only based on the TED dataset context provided to you (metadata
and transcript passages). You must not use any external
knowledge, the open internet, or information that is not explicitly
contained in the retrieved context. If the answer cannot be
determined from the provided context, respond: “I don't know
based on the provided TED data.” Always explain your answer
using the given context, quoting or paraphrasing the relevant
transcript or metadata when helpful.
"""


# *Chunks*

In [10]:
def stable_hash(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()

def approx_word_chunks(text: str, max_words: int, overlap_words: int):
    """
    Word-based chunker to approximate token limits.
    Ensures overlap <= 30% by construction if overlap_words <= 0.3*max_words.
    """
    words = text.split()
    if not words:
        return []

    chunks = []
    step = max_words - overlap_words
    if step <= 0:
        raise ValueError("overlap_words too large; step must be > 0.")

    start = 0
    while start < len(words):
        end = min(start + max_words, len(words))
        chunk_words = words[start:end]
        chunk_text = " ".join(chunk_words).strip()
        if chunk_text:
            chunks.append(chunk_text)
        if end == len(words):
            break
        start += step
    return chunks

def embed_texts_batch(texts, model=EMBED_MODEL, dims=EMBED_DIMS, max_retries=6):
    """
    Calls llmod.ai embeddings endpoint (OpenAI-compatible).
    Uses exponential backoff on transient errors.
    """
    url = f"{BASE_URL}/embeddings"
    payload = {
        "model": model,
        "input": texts,
        "dimensions": dims,  # aligned with the model default (1536)
    }

    for attempt in range(max_retries):
        try:
            r = requests.post(url, headers=HEADERS, data=json.dumps(payload), timeout=60)
            if r.status_code == 200:
                data = r.json()
                # data["data"] is list of {embedding: [...]}
                embs = [np.array(item["embedding"], dtype=np.float32) for item in data["data"]]
                return np.vstack(embs)
            # Retry on rate limit
            if r.status_code in (429, 500, 502, 503, 504):
                sleep_s = min(2 ** attempt, 30)
                time.sleep(sleep_s)
                continue
            # fails + explnation
            raise RuntimeError(f"Embeddings error {r.status_code}: {r.text[:500]}")
        except requests.RequestException as e:
            sleep_s = min(2 ** attempt, 30)
            time.sleep(sleep_s)
            last_err = e
    raise RuntimeError(f"Embeddings failed after retries. Last error: {last_err}")

In [11]:
CSV_PATH = "ted_talks_en.csv"  
df = pd.read_csv(CSV_PATH)

# start small for budget
if START_WITH_N_TALKS is not None:
    df = df.head(START_WITH_N_TALKS).copy()

df.head()

Unnamed: 0,talk_id,title,speaker_1,all_speakers,occupations,about_speakers,views,recorded_date,published_date,event,native_lang,available_lang,comments,duration,topics,related_talks,url,description,transcript
0,1,Averting the climate crisis,Al Gore,{0: 'Al Gore'},{0: ['climate advocate']},{0: 'Nobel Laureate Al Gore focused the world’...,3523392,2006-02-25,2006-06-27,TED2006,en,"['ar', 'bg', 'cs', 'de', 'el', 'en', 'es', 'fa...",272.0,977,"['alternative energy', 'cars', 'climate change...","{243: 'New thinking on the climate crisis', 54...",https://www.ted.com/talks/al_gore_averting_the...,With the same humor and humanity he exuded in ...,"Thank you so much, Chris. And it's truly a gre..."
1,92,The best stats you've ever seen,Hans Rosling,{0: 'Hans Rosling'},{0: ['global health expert; data visionary']},"{0: 'In Hans Rosling’s hands, data sings. Glob...",14501685,2006-02-22,2006-06-27,TED2006,en,"['ar', 'az', 'bg', 'bn', 'bs', 'cs', 'da', 'de...",628.0,1190,"['Africa', 'Asia', 'Google', 'demo', 'economic...","{2056: ""Own your body's data"", 2296: 'A visual...",https://www.ted.com/talks/hans_rosling_the_bes...,You've never seen data presented like this. Wi...,"About 10 years ago, I took on the task to teac..."
2,7,Simplicity sells,David Pogue,{0: 'David Pogue'},{0: ['technology columnist']},{0: 'David Pogue is the personal technology co...,1920832,2006-02-24,2006-06-27,TED2006,en,"['ar', 'bg', 'de', 'el', 'en', 'es', 'fa', 'fr...",124.0,1286,"['computers', 'entertainment', 'interface desi...","{1725: '10 top time-saving tech tips', 2274: '...",https://www.ted.com/talks/david_pogue_simplici...,New York Times columnist David Pogue takes aim...,"(Music: ""The Sound of Silence,"" Simon & Garfun..."
3,53,Greening the ghetto,Majora Carter,{0: 'Majora Carter'},{0: ['activist for environmental justice']},{0: 'Majora Carter redefined the field of envi...,2664069,2006-02-26,2006-06-27,TED2006,en,"['ar', 'bg', 'bn', 'ca', 'cs', 'de', 'en', 'es...",219.0,1116,"['MacArthur grant', 'activism', 'business', 'c...",{1041: '3 stories of local eco-entrepreneurshi...,https://www.ted.com/talks/majora_carter_greeni...,"In an emotionally charged talk, MacArthur-winn...",If you're here today — and I'm very happy that...
4,66,Do schools kill creativity?,Sir Ken Robinson,{0: 'Sir Ken Robinson'},"{0: ['author', 'educator']}","{0: ""Creativity expert Sir Ken Robinson challe...",65051954,2006-02-25,2006-06-27,TED2006,en,"['af', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs...",4931.0,1164,"['children', 'creativity', 'culture', 'dance',...","{865: 'Bring on the learning revolution!', 173...",https://www.ted.com/talks/sir_ken_robinson_do_...,Sir Ken Robinson makes an entertaining and pro...,Good morning. How are you? (Audience) Good. It...


In [12]:
# Build chunk records (metadata + text)
records = []
for _, row in df.iterrows():
    talk_id = str(row.get("talk_id", ""))
    title = str(row.get("title", ""))
    transcript = str(row.get("transcript", "") or "")
    if not transcript.strip():
        continue

    chunks = approx_word_chunks(transcript, max_words=MAX_TOKENS, overlap_words=OVERLAP_WORDS)

    for ci, chunk_text in enumerate(chunks):
        chunk_uid = stable_hash(talk_id + "|" + title + "|" + str(ci) + "|" + chunk_text)
        records.append({
            "chunk_uid": chunk_uid,
            "talk_id": talk_id,
            "title": title,
            "chunk_id": ci,
            "text": chunk_text,
        })

len(records), records[0].keys() if records else None

(118, dict_keys(['chunk_uid', 'talk_id', 'title', 'chunk_id', 'text']))

In [13]:
# Load previous cache (so we don't re-embed)
if os.path.exists(OUT_IDMAP_JSON):
    with open(OUT_IDMAP_JSON, "r", encoding="utf-8") as f:
        seen = set(json.load(f))
else:
    seen = set()

new_records = [r for r in records if r["chunk_uid"] not in seen]
print("Total records:", len(records))
print("Already embedded:", len(records) - len(new_records))
print("To embed now:", len(new_records))

Total records: 118
Already embedded: 118
To embed now: 0


In [14]:
# Embed new chunks in batches + append to cache
BATCH_SIZE = 64 

if os.path.exists(OUT_EMB_NPY):
    old_embs = np.load(OUT_EMB_NPY)
else:
    old_embs = None

new_emb_list = []
to_write_meta = []

for i in range(0, len(new_records), BATCH_SIZE):
    batch = new_records[i:i+BATCH_SIZE]
    texts = [b["text"] for b in batch]
    embs = embed_texts_batch(texts)
    # Sanity check dimensions
    if embs.shape[1] != EMBED_DIMS:
        raise ValueError(f"Unexpected embedding dims: {embs.shape[1]} (expected {EMBED_DIMS})")

    new_emb_list.append(embs)
    to_write_meta.extend(batch)

    print(f"Embedded {min(i+BATCH_SIZE, len(new_records))}/{len(new_records)}")

# Append embeddings
if new_emb_list:
    new_embs = np.vstack(new_emb_list)
    all_embs = new_embs if old_embs is None else np.vstack([old_embs, new_embs])
else:
    all_embs = old_embs if old_embs is not None else np.zeros((0, EMBED_DIMS), dtype=np.float32)

# Save embeddings matrix
np.save(OUT_EMB_NPY, all_embs)

# Append metadata (jsonl)
if to_write_meta:
    with open(OUT_META_JSONL, "a", encoding="utf-8") as f:
        for r in to_write_meta:
            f.write(json.dumps({
                "chunk_uid": r["chunk_uid"],
                "talk_id": r["talk_id"],
                "title": r["title"],
                "chunk_id": r["chunk_id"],
                "text": r["text"],
            }, ensure_ascii=False) + "\n")

# Update seen set and save
for r in to_write_meta:
    seen.add(r["chunk_uid"])
with open(OUT_IDMAP_JSON, "w", encoding="utf-8") as f:
    json.dump(sorted(list(seen)), f)

print("Done.")
print("Embeddings shape:", all_embs.shape)
print("Saved:", OUT_EMB_NPY, OUT_META_JSONL, OUT_IDMAP_JSON)


Done.
Embeddings shape: (157, 1536)
Saved: ted_chunks_embeds.npy ted_chunks_meta.jsonl ted_chunks_idmap.json


# Pinecone Index

In [15]:
existing = [idx["name"] for idx in pc.list_indexes()]

if INDEX_NAME not in existing:
    pc.create_index(
        name=INDEX_NAME,
        dimension=EMBED_DIMS, # using the dimensions returned by embedding model
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1",
        )
    )

index = pc.Index(INDEX_NAME)
print("Ready:", INDEX_NAME)

Ready: ted


In [16]:
OUT_PINECONE_IDMAP_JSON = "pinecone_upserted_ids.json"

if not os.path.exists(OUT_EMB_NPY):
    raise FileNotFoundError(f"Missing embeddings file: {OUT_EMB_NPY}")
embs = np.load(OUT_EMB_NPY)

if not os.path.exists(OUT_META_JSONL):
    raise FileNotFoundError(f"Missing metadata file: {OUT_META_JSONL}")

meta_rows = []
with open(OUT_META_JSONL, "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if line:
            meta_rows.append(json.loads(line))

if len(meta_rows) != embs.shape[0]:
    raise ValueError(
        f"Mismatch: meta rows={len(meta_rows)} vs embeddings={embs.shape[0]}. "
        "These must be aligned (same append order)."
    )
print("Loaded embeddings:", embs.shape)
print("Loaded meta rows:", len(meta_rows))

# using local cache to avoid re-upserting
if os.path.exists(OUT_PINECONE_IDMAP_JSON):
    with open(OUT_PINECONE_IDMAP_JSON, "r", encoding="utf-8") as f:
        upserted = set(json.load(f))
else:
    upserted = set()

print("Already upserted IDs (local cache):", len(upserted))

def safe_metadata(m: dict) -> dict:
    """
    Pinecone metadata must be JSON-serializable, typically simple types.
    Also keep text size reasonable (metadata size limits exist).
    """
    text = m.get("text", "") or ""
    # Keep a snippet to avoid oversized metadata; adjust if you want.
    text_snippet = text[:2000]

    return {
        "talk_id": str(m.get("talk_id", "")),
        "title": str(m.get("title", "")),
        "chunk_id": int(m.get("chunk_id", 0)),
        "text": text_snippet,
    }

# Build items to upsert (skip already upserted IDs)
items = []
for i, m in enumerate(meta_rows):
    _id = m["chunk_uid"]
    if _id in upserted:
        continue

    vec = embs[i]
    if vec.shape[0] != EMBED_DIMS:
        raise ValueError(f"Bad dims at row {i}: got {vec.shape[0]} expected {EMBED_DIMS}")

    items.append((_id, vec.tolist(), safe_metadata(m)))

print("To upsert now:", len(items))

# Upsert in batches
UPSERT_BATCH = 100
for start in range(0, len(items), UPSERT_BATCH):
    batch = items[start:start+UPSERT_BATCH]
    index.upsert(vectors=batch)
    # update local cache
    for _id, _, _ in batch:
        upserted.add(_id)

    if (start // UPSERT_BATCH) % 5 == 0:
        print(f"Upserted {min(start+UPSERT_BATCH, len(items))}/{len(items)}")

# Persist local upsert cache
with open(OUT_PINECONE_IDMAP_JSON, "w", encoding="utf-8") as f:
    json.dump(sorted(list(upserted)), f)

print("✅ Upsert complete.")
print("Local upsert cache saved to:", OUT_PINECONE_IDMAP_JSON)

# describing index stats
stats = index.describe_index_stats()
print("Index stats:", stats)

Loaded embeddings: (157, 1536)
Loaded meta rows: 157
Already upserted IDs (local cache): 39
To upsert now: 118
Upserted 100/118
✅ Upsert complete.
Local upsert cache saved to: pinecone_upserted_ids.json
Index stats: {'_response_info': {'raw_headers': {'connection': 'keep-alive',
                                    'content-length': '186',
                                    'content-type': 'application/json',
                                    'date': 'Sun, 28 Dec 2025 13:36:00 GMT',
                                    'grpc-status': '0',
                                    'server': 'envoy',
                                    'x-envoy-upstream-service-time': '33',
                                    'x-pinecone-request-id': '7029018262805765821',
                                    'x-pinecone-request-latency-ms': '33',
                                    'x-pinecone-response-duration-ms': '35'}},
 'dimension': 1536,
 'index_fullness': 0.0,
 'memoryFullness': 0.0,
 'metric': 'cosine

# Rag

In [19]:
def embed_query(text: str) -> np.ndarray:
    """Embed one query text -> (1536,) float32"""
    emb = embed_texts_batch([text], model=EMBED_MODEL, dims=EMBED_DIMS)
    if emb.shape != (1, EMBED_DIMS):
        raise ValueError(f"Bad query embedding shape: {emb.shape}")
    return emb[0].astype(np.float32)

def retrieve_from_pinecone(query: str, top_k: int = RAG_TOP_K):
    """
    Returns list of matches with fields: id, score, metadata.
    """
    qvec = embed_query(query)
    res = index.query(
        vector=qvec.tolist(),
        top_k=top_k,
        include_metadata=True
    )
    # normalizing the dict object returned by the pinecone:
    matches = res.get("matches", res["matches"]) if isinstance(res, dict) else res.matches
    # print("Returned from PINECONE: \n", matches)
    out = []
    for m in matches:
        mid = m.get("id", None) if isinstance(m, dict) else m.id
        score = m.get("score", None) if isinstance(m, dict) else m.score
        meta = m.get("metadata", {}) if isinstance(m, dict) else (m.metadata or {})
        out.append({"id": mid, "score": float(score) if score is not None else None, "metadata": meta})
    return out

def build_context(matches, max_chars: int = 12000) -> str:
    """
    Build a compact context block from retrieved matches.
    We cap total chars to avoid bloating the model context (efficiency).
    """
    parts = []
    total = 0

    for rank, item in enumerate(matches, start=1):
        md = item["metadata"] or {}
        title = str(md.get("title", ""))
        talk_id = str(md.get("talk_id", ""))
        chunk_id = md.get("chunk_id", "")
        text = str(md.get("text", "")) if RETRIEVE_INCLUDE_TEXT else ""

        block = (
            f"[{rank}] talk_id={talk_id} | title={title} | chunk_id={chunk_id} | score={item['score']:.4f}\n"
            f"PASSAGE:\n{text}\n"
        )
        if total + len(block) > max_chars:
            break
        parts.append(block)
        total += len(block)

    return "\n---\n".join(parts).strip()

def print_retrieved(matches, text_chars: int = 400):
    print("\n=== RETRIEVED MATCHES ===")
    for i, m in enumerate(matches, start=1):
        md = m.get("metadata", {}) or {}
        title = md.get("title", "")
        talk_id = md.get("talk_id", "")
        chunk_id = md.get("chunk_id", "")
        score = m.get("score", None)

        passage = (md.get("text", "") or "")
        passage = passage[:text_chars] + ("..." if len(passage) > text_chars else "")

        print(f"\n[{i}] score={score:.4f} | talk_id={talk_id} | chunk_id={chunk_id} | title={title}")
        print("PASSAGE:", passage)


def call_chat_model(question: str, context: str) -> str:
    """
    Calls llmod.ai chat completions endpoint.
    """
    url = f"{BASE_URL}/chat/completions"
    messages = [
        {"role": "system", "content": REQUIRED_SYSTEM_PROMPT},
        {"role": "user", "content": f"TED DATA CONTEXT:\n{context}\n\nQUESTION:\n{question}"},
    ]
    payload = {
        "model": CHAT_MODEL,
        "messages": messages,
    }

    # minimal retry
    max_retries = 6
    last_err = None
    for attempt in range(max_retries):
        try:
            r = requests.post(url, headers=HEADERS, data=json.dumps(payload), timeout=90)
            if r.status_code == 200:
                data = r.json()
                return data["choices"][0]["message"]["content"]
            if r.status_code in (429, 500, 502, 503, 504):
                time.sleep(min(2 ** attempt, 30))
                continue
            raise RuntimeError(f"Chat error {r.status_code}: {r.text[:500]}")
        except requests.RequestException as e:
            last_err = e
            time.sleep(min(2 ** attempt, 30))
    raise RuntimeError(f"Chat failed after retries. Last error: {last_err}")

def rag_answer(question: str, top_k: int = RAG_TOP_K, min_matches: int = 1, debug: bool = True):
    """
    End-to-end RAG: retrieve -> build context -> ask model.
    """
    matches = retrieve_from_pinecone(question, top_k=top_k)
    if debug:
        print(f"Retrieved: {len(matches)} matches (top_k={top_k})")
        print_retrieved(matches, text_chars=600)
    
    if debug:
        print(f"Retrieved: {len(matches)} matches (top_k={top_k})")

    if len(matches) < min_matches:
        return "I don’t know based on the provided TED data."

    context = build_context(matches)
    if debug:
        print("\nContext preview (first 600 chars):")
        print(context[:600] + ("..." if len(context) > 600 else ""))
        print("\n---\nRAG hyperparameters to report:")
        print(f"chunk_size_tokens={RAG_CHUNK_SIZE_TOKENS}, overlap_ratio={RAG_OVERLAP_RATIO}, top_k={top_k}")

    answer = call_chat_model(question, context)
    return answer

In [20]:
# # ====== QUICK TEST ======
# q = "What is the main message of the talk about education and creativity?"
# print(rag_answer(q, top_k=RAG_TOP_K, debug=True))

# Testing

In [21]:
test_questions = [
    # 1. Precise Fact Retrieval
    {
        "category": "Precise Fact Retrieval",
        "question": "In the talk about the 'birth of Wikipedia', how many employees does Jimmy Wales say were managing the site at the time of the recording?",
        "expected_hint": "Should mention a very small number (often cited as 'one' or a small group) or the specific role of volunteers."
    },
    
    # 2. Multi-Result Topic Listing (Up to 3 Results)
    {
        "category": "Multi-Result Topic Listing",
        "question": "Which talks focus on the environment, climate change, or sustainable design? Return a list of exactly 3 talk titles.",
        "expected_hint": "Likely candidates in first 30: 'Averting the climate crisis' (Al Gore), 'Greening the ghetto' (Majora Carter), 'Global warming's ruinous afterlife' (checking if in top 30) or other eco-focused talks."
    },

    # 3. Key Idea Summary Extraction
    {
        "category": "Key Idea Summary Extraction",
        "question": "Find the talk by Tony Robbins. Provide the title and a short summary of his explanation for 'why we do what we do'.",
        "expected_hint": "Title: 'Why we do what we do'. Summary: Focuses on the 'six human needs' and the 'invisible forces' that drive behavior."
    },

    # 4. Recommendation with Evidence-Based Justification
    {
        "category": "Recommendation with Evidence-Based Justification",
        "question": "I am interested in user interface design and how simplicity is better than complexity. Which talk should I watch?",
        "expected_hint": "Recommendation: 'Simplicity sells' by David Pogue. Justification: Mentions his critique of software interfaces and the importance of simplicity in technology."
    }
]

print(f"Running tests...\n")

for i, test in enumerate(test_questions, 1):
    print(f"\n{'='*60}")
    print(f"TEST {i}: {test['category']}")
    print(f"QUESTION: {test['question']}")
    print(f"{'-'*60}")
    
    try:
        response = rag_answer(test['question'], top_k=3, debug=False)
        
        print(f"MODEL ANSWER:\n{response}")
        print(f"\n[Expected Logic/Hint]: {test['expected_hint']}")
        
    except Exception as e:
        print(f"ERROR: {e}")

Running tests...


TEST 1: Precise Fact Retrieval
QUESTION: In the talk about the 'birth of Wikipedia', how many employees does Jimmy Wales say were managing the site at the time of the recording?
------------------------------------------------------------
MODEL ANSWER:
I don't know based on the provided TED data. The supplied transcript passages say Wikipedia was “managed by virtually an all‑volunteer staff” and note that “we actually hired [Brian]” after he worked part‑time and then full‑time, but none of the provided excerpts states how many paid employees were managing the site at the time. For example: “everything about Wikipedia is managed by virtually an all‑volunteer staff” and “we actually hired him,” but no total employee number is given in the supplied passages.

[Expected Logic/Hint]: Should mention a very small number (often cited as 'one' or a small group) or the specific role of volunteers.

TEST 2: Multi-Result Topic Listing
QUESTION: Which talks focus on the environme

In [22]:
additional_questions = [
    {
        "category": "Precise Fact Retrieval",
        "question": "In the talk 'The freakonomics of crack dealing', what does Steven Levitt say about the actual hourly wage of a street-corner crack dealer?",
        "expected_hint": "He states it is very low, specifically 'below minimum wage' (or compares it to working at McDonald's)."
    },
    {
        "category": "Multi-Result Topic Listing",
        "question": "Which talks discuss 'happiness', 'psychology', or 'choice'? Return a list of exactly 3 talk titles.",
        "expected_hint": "Should find titles like: 'The surprising science of happiness' (Dan Gilbert), 'The paradox of choice' (Barry Schwartz), 'Happiness in body and soul' (Eve Ensler), or 'Choice, happiness and spaghetti sauce' (Malcolm Gladwell)."
    },
    {
        "category": "Key Idea Summary Extraction",
        "question": "Find the talk by Malcolm Gladwell about spaghetti sauce. Provide the title and summarize his main conclusion about 'perfection' and 'choice'.",
        "expected_hint": "Title: 'Choice, happiness and spaghetti sauce'. Summary: The key idea is that there is no single perfect product for everyone, but rather 'perfect swarms' or segments (horizontal segmentation)."
    },
    {
        "category": "Recommendation with Evidence-Based Justification",
        "question": "I am interested in architecture and want to see a deep dive into the design process of a specific building. Which talk would you recommend?",
        "expected_hint": "Recommendation: 'Behind the design of Seattle's library' by Joshua Prince-Ramus. Justification: He gives a tour of the Seattle Central Library and explains the hyper-rational design process."
    }
]

print(f"Running ADDITIONAL tests ...\n")

for i, test in enumerate(additional_questions, 1):
    print(f"\n{'='*60}")
    print(f"ADDITIONAL TEST {i}: {test['category']}")
    print(f"QUESTION: {test['question']}")
    print(f"{'-'*60}")
    
    try:
        response = rag_answer(test['question'], top_k=3, debug=False)
        print(f"MODEL ANSWER:\n{response}")
        print(f"\n[Expected Logic/Hint]: {test['expected_hint']}")
        
    except Exception as e:
        print(f"ERROR: {e}")

Running ADDITIONAL tests ...


ADDITIONAL TEST 1: Precise Fact Retrieval
QUESTION: In the talk 'The freakonomics of crack dealing', what does Steven Levitt say about the actual hourly wage of a street-corner crack dealer?
------------------------------------------------------------
MODEL ANSWER:
I don't know based on the provided TED data.

The supplied transcript excerpts describe Levitt's access to the gang's financial records, compare gang pay to McDonald's (saying "the money looks about the same"), and discuss compensating differentials and foot‑soldier pay, but none of the provided passages state the actual hourly wage figure for a street‑corner crack dealer. For example, chunk 0 notes he looked at "the financial records of the gang," chunk 3 says "the relationship to McDonald's breaks down here. The money looks about the same," and chunk 4 discusses pay adjustments in wartime, but no numeric hourly wage appears in the provided snippets.

[Expected Logic/Hint]: He states it is ver