Self Correcting RAG

In [2]:
import os
import argparse
import json
import textwrap
import re
from typing import List, Dict, Any, Tuple
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline
import spacy
from dateutil import parser as dateparser

In [139]:
import warnings
import logging
import os
warnings.filterwarnings("ignore")

logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("torch").setLevel(logging.ERROR)
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

print("‚úÖ All noisy logs silenced.")


‚úÖ All noisy logs silenced.


In [140]:
# ---------------- CONFIG ----------------
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
GEN_LOCAL_MODEL = "google/flan-t5-large"   
QA_MODEL = "deepset/roberta-base-squad2"
MNLI_MODEL = "roberta-large-mnli"
TOP_K = 8
RELEVANCE_THRESHOLD = 0.62
FACT_SUPPORT_THRESHOLD = 0.5
USE_OPENAI = False
OPENAI_MODEL = None

In [141]:
import sys

# upgrade tooling first
#!{sys.executable} -m pip install -U pip setuptools wheel

# ensure spaCy itself is up-to-date in this kernel
#!{sys.executable} -m pip install -U spacy

# download & install the small English model into this environment
#!{sys.executable} -m spacy download en_core_web_sm


In [142]:
spacy.load('en_core_web_sm')

<spacy.lang.en.English at 0x10962e69b90>

In [192]:
import re

def extract_year_from_question(question: str):
    """
    Extracts full 4-digit years like 2018, 2020, 2024 from the question.
    Returns a SET of years as strings.
    """
    years = re.findall(r"(19\d{2}|20\d{2})", question)
    return set(years)



def parse_doc_year(date_str: str):
    """
    Converts doc date string to YYYY.
    Safely handles missing/bad formats.
    """
    if not date_str:
        return None
    try:
        return int(str(date_str)[:4])
    except:
        return None

def apply_strict_date_filter(retrieved_docs, question: str):
    """
    Keeps ONLY docs that match the question year.
    If the question has no year ‚Üí no filtering.
    If filtering removes all docs ‚Üí return empty list (NO FALLBACK).
    """
    target_year = extract_year_from_question(question)
    if target_year is None:
        print("üìÖ No year found in question ‚Üí Skipping date filter.")
        return retrieved_docs

    filtered = []
    for r in retrieved_docs:
        doc_year = parse_doc_year(r["meta"].get("date", ""))
        if doc_year == target_year:
            filtered.append(r)

    print(f"üìÖ Date filter for year {target_year}: kept {len(filtered)} docs")

    return filtered

def filter_docs_by_year(docs, question):
    q_year = extract_year_from_question(question)

    if q_year is None:
        return docs  

    filtered = []
    for d in docs:
        doc_year = extract_year_from_meta(d["meta"])

        if doc_year and abs(doc_year - q_year) <= 1:
            filtered.append(d)

    if len(filtered) == 0:
        print("‚ö†Ô∏è Date filter removed all docs. Using original retrieved set.")
        return docs

    print(f"‚úÖ Date Filter Applied ‚Üí {len(filtered)} / {len(docs)} docs kept for year {q_year}")
    return filtered


In [169]:
nlp = spacy.load("en_core_web_sm")

def split_sentences(text: str) -> List[str]:
    sents = re.split(r'(?<=[\.\?\!])\s+', text.strip())
    return [s for s in sents if s]

def normalize_dates(text: str) -> List[str]:
    dates = []
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == "DATE":
            try:
                pd = dateparser.parse(ent.text, fuzzy=True)
                if pd:
                    dates.append(pd.date().isoformat())
            except Exception:
                continue
    return dates
    
def build_index(docs: List[str], embed_model_name=EMBED_MODEL):
    embedder = SentenceTransformer(embed_model_name)
    embs = embedder.encode(docs, convert_to_numpy=True, show_progress_bar=True)
    faiss.normalize_L2(embs)
    dim = embs.shape[1]
    index = faiss.IndexFlatIP(dim)
    index.add(embs)
    return index, embs, embedder

def ingest_ccnews(num_docs=2000):
    print("Loading CC-NEWS subset from HuggingFace (vblagoje/cc_news)...")
    ds = load_dataset("vblagoje/cc_news", split=f"train[:{num_docs}]")
    docs = []
    meta = []
    for i, ex in enumerate(ds):
        text = (ex.get("text") or "")[:6000]  
        title = ex.get("title") or ""
        url = ex.get("url") or ""
        date = ex.get("publish_date") or ex.get("date") or ""
        if not text.strip():
            continue
        
        chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
        for j, chunk in enumerate(chunks):
            docs.append(title + "\n\n" + chunk)
            meta.append({"doc_id": i, "chunk_id": j, "title": title, "url": url, "date": date, "source": ex.get("source", "")})
    print(f"Prepared {len(docs)} chunks from {num_docs} articles.")
    return docs, meta

In [170]:
class Retriever:
    def __init__(self, index, embeddings, embedder, docs, meta):
        self.index = index
        self.embeddings = embeddings
        self.embedder = embedder
        self.docs = docs              
        self.meta = meta

    def query(self, q: str, k=TOP_K):
        q_emb = self.embedder.encode([q], convert_to_numpy=True)
        faiss.normalize_L2(q_emb)
        D, I = self.index.search(q_emb, k)

        results = []
        for score, idx in zip(D[0], I[0]):
            if idx == -1:
                continue
            results.append({
                "meta": self.meta[idx],
                "text": self.docs[idx],     
                "score": float(score),
                "idx": int(idx)
            })
        return results


In [171]:
from datetime import datetime

def relevance_filter_with_year(question, retrieved, threshold=0.35):
    year = extract_year_from_question(question)

    kept = []
    for r in retrieved:
        score_ok = r["score"] >= threshold

        date_str = r["meta"].get("date", "")
        doc_year = None

        try:
            doc_year = int(date_str[:4])
        except:
            pass

        
        if year:
            if doc_year == year and score_ok:
                kept.append(r)
        else:
            if score_ok:
                kept.append(r)

    if not kept:
        print("‚ö†Ô∏è Date filter removed all docs. Using original retrieved set.")
        kept = retrieved[:3]

    return kept


In [172]:
import requests

def fallback_entity_lookup(entity: str) -> str | None:
    if not entity:
        return None
    
    url = "https://en.wikipedia.org/api/rest_v1/page/summary/" + entity.replace(" ", "%20")
    
    try:
        r = requests.get(url, timeout=10)
        if r.status_code != 200:
            return None
        
        data = r.json()
        return data.get("extract")
    
    except Exception:
        return None


In [186]:
import re

def answer_type_matches_question(question: str, answer: str) -> bool:
    q = question.lower()
    a = answer.strip()

    if any(x in q for x in ["which team", "which club", "which country"]):
        return len(a.split()) <= 5   

    if "who" in q:
        return len(a.split()) >= 2   

    if "when" in q or "which year" in q:
        return bool(re.search(r"\b(19|20)\d{2}\b", a))

    
    return True


In [174]:
def extract_main_entity(question: str) -> str:
    """
    Very simple heuristic:
    Capitalized multi-word person names or last capitalized token group.
    """
    tokens = question.replace("?", "").split()
    candidates = [t for t in tokens if t.istitle()]
    return " ".join(candidates) if candidates else question

In [193]:

USE_OPENAI = False  #  disabled

from transformers import pipeline as hf_pipeline

_local_generator = None

def generate_answer(question: str, docs_texts: List[str]):
    prompt = "Use the following documents to answer the question.\n\n"
    prompt += "\n\n---\n\n".join(docs_texts[:6])
    prompt += f"\n\nQuestion: {question}\n\nAnswer:"

    gen = pipeline(
        "text2text-generation",
        model=GEN_LOCAL_MODEL,
        device=0 if os.getenv("CUDA_VISIBLE_DEVICES") else -1
    )

    out = gen(prompt, max_length=256, do_sample=False)[0]["generated_text"]
    return out.strip()



def run_pipeline(question: str, retriever, verbose=True):
    retrieved = retriever.query(question, k=TOP_K)

    if verbose:
        print("\nüîç Retrieved:")
        for r in retrieved:
            print(f" - {r['score']:.3f} | {r['meta'].get('date','')} | {r['meta'].get('title','')[:60]}")

    kept = relevance_filter(question, retrieved)

    if verbose:
        print(f"\n‚úÖ Kept {len(kept)} docs after relevance filtering.")

    kept_texts = [k["text"] for k in kept]

    answer = generate_answer(question, kept_texts)

    if verbose:
        print("\nüß† Generated Answer:\n", answer)

    fc = fact_check_answer(answer, kept)

    if verbose:
        print("\nüìä Fact-check support fraction:", fc["support_fraction"], f"({fc['num_claims']} claims)")

    fc["type_mismatch"] = False
    if answer and not answer_type_matches_question(question, answer):
        fc["support_fraction"] = 0.0
        fc["type_mismatch"] = True
        if verbose:
            print("‚ùå Answer type does not match question intent.")

    question_years = extract_year_from_question(question)
    doc_years = set()
    
    for d in kept:
        date = d["meta"].get("date", "")
        if isinstance(date, str) and len(date) >= 4:
            doc_years.add(date[:4])
    
    fc["year_mismatch"] = False
    
    if question_years:
        if not doc_years:
            
            fc["year_mismatch"] = True
            fc["support_fraction"] = 0.0
            if verbose:
                print("‚ùå Year mismatch: No year found in retrieved documents.")
        elif not (question_years & doc_years):
            
            fc["year_mismatch"] = True
            fc["support_fraction"] = 0.0
            if verbose:
                print(f"‚ùå Year mismatch: question={question_years}, docs={doc_years}")


    if fc["support_fraction"] < 0.5 or fc.get("type_mismatch") or fc.get("year_mismatch"):
        return {
            "final_answer": "No verified evidence found.",
            "source": "RAG-Rejected",
            "initial_answer": answer,
            "fact_check": fc,
        }
    
    return {
        "final_answer": answer,
        "source": "RAG (Verified)",
        "fact_check": fc,
    }
    



In [176]:
_qa = None
_mnli = None
def get_qa(device=-1):
    global _qa
    if _qa is None:
        _qa = pipeline("question-answering", model=QA_MODEL, tokenizer=QA_MODEL, device=device)
    return _qa

def get_mnli(device=-1):
    global _mnli
    if _mnli is None:
        _mnli = pipeline("text-classification", model=MNLI_MODEL, tokenizer=MNLI_MODEL, device=device, return_all_scores=True)
    return _mnli

In [177]:
def extractive_spans_for_claim(claim: str, candidate_docs: List[Dict], top_k_spans=4):
    qa = get_qa()
    spans = []
    for d in candidate_docs:
        context = d["text"]
        try:
            out = qa(question=claim, context=context, topk=1)
            if isinstance(out, list): out = out[0]
            ans = out.get("answer","").strip()
            score = float(out.get("score",0.0))
            if ans and score>0.01:
                sent = extract_sentence(context, ans)
                spans.append({"span": ans, "score": score, "doc_meta": d["meta"], "context_sentence": sent, "context_snippet": context[:500]})
        except Exception:
            continue
    spans_sorted = sorted(spans, key=lambda x: x["score"], reverse=True)
    return spans_sorted[:top_k_spans]

In [178]:
def extract_sentence(context: str, span: str):
    idx = context.find(span)
    if idx == -1:
        return context[:200]
    start = context.rfind('.', 0, idx)
    end = context.find('.', idx)
    start = 0 if start==-1 else start+1
    end = len(context) if end==-1 else end+1
    return context[start:end].strip()

def mnli_score(span: str, claim: str):
    mnli = get_mnli()
    try:
        out = mnli(f"{span} </s></s> {claim}")
        label_scores = {d['label']: float(d['score']) for d in out[0]} if isinstance(out, list) and isinstance(out[0], list) else {d['label']: float(d['score']) for d in out}
        entail = label_scores.get("ENTAILMENT", label_scores.get("entailment",0.0))
        return entail, label_scores
    except Exception:
        return 0.0, {"ENTAILMENT":0.0,"NEUTRAL":1.0,"CONTRADICTION":0.0}

In [179]:
def fact_check_answer(answer: str, candidate_docs: list):
    """
    Robust fact checker:
    - Split answer into claims
    - Split each retrieved doc into sentences
    - Run MNLI on (doc_sentence ‚Üí claim)
    - If ANY sentence entails the claim ‚Üí SUPPORTED ‚úÖ
    """

    from transformers import pipeline
    import re

    nli = pipeline("text-classification", model="roberta-large-mnli", top_k=None)
    
    claims = split_sentences(answer)
    results = []

    for claim in claims:
        claim = claim.strip()
        supported = False
        best_match = None
        best_score = 0.0

        for doc in candidate_docs:
            text = doc["text"]
            meta = doc["meta"]

            sentences = split_sentences(text)

            for sent in sentences:
                pair = f"{sent} </s></s> {claim}"
                out = nli(pair)[0]

                label_scores = {x["label"]: x["score"] for x in out}
                entail = label_scores.get("ENTAILMENT", 0.0)

                if entail > best_score:
                    best_score = entail
                    best_match = {
                        "sentence": sent,
                        "entailment": entail,
                        "doc_meta": meta
                    }

                if entail >= 0.60:
                    supported = True
                    break

            if supported:
                break

        results.append({
            "claim": claim,
            "supported": supported,
            "best_evidence": best_match
        })

    support_fraction = sum(1 for r in results if r["supported"]) / max(1, len(results))

    return {
        "claim_results": results,
        "support_fraction": support_fraction,
        "num_claims": len(results)
    }


In [180]:
from IPython.display import display, Markdown

def rag_output(result):
    display(Markdown("## üß† RAG Pipeline Result"))

    display(Markdown("### ‚úÖ Final Answer"))
    display(Markdown(result.get("final_answer", "No answer")))

    display(Markdown(f"**Source:** {result.get('source','')}"))

    fc = result.get("fact_check")

    if not fc or fc.get("num_claims", 0) == 0:
        display(Markdown("### ‚ö†Ô∏è No claim-level evidence available"))
        display(Markdown("This usually happens when the model produced a very short answer or failed validation."))
        return

    display(Markdown("### üìä Fact-Check Summary"))
    display(Markdown(f"- Claims detected: {fc['num_claims']}"))
    display(Markdown(f"- Support fraction: {round(fc['support_fraction'], 2)}"))

    if fc.get("type_mismatch"):
        display(Markdown("‚ùå **Answer type mismatch detected**"))

    if fc.get("year_mismatch"):
        display(Markdown("‚ùå **Year mismatch detected**"))

    display(Markdown("---"))
    display(Markdown("## üîç Claim-Level Breakdown"))

    for i, c in enumerate(fc["claim_results"], 1):
        status = "‚úÖ Supported" if c["supported"] else "‚ùå Unsupported"
        display(Markdown(f"### Claim {i}: {status}"))
        display(Markdown(f"**Claim:** {c['claim']}"))

        best = c.get("best_evidence")
        if best:
            display(Markdown("**Best Evidence:**"))
            display(Markdown(f"- Sentence: {best['sentence']}"))
            display(Markdown(f"- Title: {best['doc_meta']['title']}"))
            display(Markdown(f"- Date: {best['doc_meta']['date']}"))
            display(Markdown(f"- Entailment: {round(best['entailment'], 3)}"))


In [181]:
META_F = "ccnews_meta.jsonl"
INDEX_F = "ccnews_index.faiss"
DOCS_F = "ccnews_docs.jsonl"
EMBS_F = "ccnews_embs.npy"
EMBED_MODEL_NAME = EMBED_MODEL 

In [190]:
def build_and_save_index(num_docs: int = 2000, overwrite: bool = False):
    """
    Build index from CC-NEWS subset and save files to disk.
    Returns (index, embeddings, embedder, docs, meta)
    """
    if not overwrite and os.path.exists(INDEX_F):
        print(f"Index file {INDEX_F} already exists. Set overwrite=True to rebuild.")
        return load_index(return_all=True)

    print(f"Building index with {num_docs} articles (this may take a while)...")
    docs, meta = ingest_ccnews(num_docs=num_docs)  
    index, embeddings, embedder = build_index(docs, embed_model_name=EMBED_MODEL)
    faiss.write_index(index, INDEX_F)
    np.save(EMBS_F, embeddings)
    with open(META_F, "w", encoding="utf8") as f:
        for m in meta:
            f.write(json.dumps(m) + "\n")
    with open(DOCS_F, "w", encoding="utf8") as f:
        for d in docs:
            f.write(json.dumps({"text": d}) + "\n")
    print("Index built and saved to disk.")
    return index, embeddings, embedder, docs, meta

def load_index(return_all: bool = False):
    if not os.path.exists(INDEX_F):
        raise FileNotFoundError("Index not found. Run build_and_save_index() first.")

    print("Loading index & metadata from disk...")

    index = faiss.read_index(INDEX_F)
    embeddings = np.load(EMBS_F)
    embedder = SentenceTransformer(EMBED_MODEL_NAME)

    docs = []
    meta = []

    with open(DOCS_F, "r", encoding="utf8") as f:
        for line in f:
            docs.append(json.loads(line)["text"])

    with open(META_F, "r", encoding="utf8") as f:
        for line in f:
            meta.append(json.loads(line))

    retr = Retriever(index, embeddings, embedder, docs, meta) 

    print("Loaded index. Chunks:", len(docs))

    if return_all:
        return index, embeddings, embedder, docs, meta

    return retr


def query_pipeline(question: str, retriever=None, verbose=True):
    if retriever is None:
        retriever = load_index()

    out = run_pipeline(question, retriever, verbose=verbose)

    try:
        import json
        print("\n--- PIPELINE RESULT SUMMARY ---")
        print(json.dumps(out, indent=2))
    except Exception:
        pass

    return out

def rebuild_index_and_reload(num_docs: int = 2000):
    """
    Helper: rebuild the index (overwrite) and return a Retriever instance.
    """
    build_and_save_index(num_docs=num_docs, overwrite=True)
    return load_index()

print("Jupyter helper functions loaded: build_and_save_index, load_index, query_pipeline, rebuild_index_and_reload")

Jupyter helper functions loaded: build_and_save_index, load_index, query_pipeline, rebuild_index_and_reload


In [38]:
#index, embs, embedder, docs, meta = build_and_save_index(num_docs=1000, overwrite=True)

Building index with 1000 articles (this may take a while)...
Loading CC-NEWS subset from HuggingFace (vblagoje/cc_news)...
Prepared 1694 chunks from 1000 articles.


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 53/53 [01:19<00:00,  1.49s/it]

Index built and saved to disk.





In [36]:
retriever = rebuild_index_and_reload(num_docs=1000)

Building index with 1000 articles (this may take a while)...
Loading CC-NEWS subset from HuggingFace (vblagoje/cc_news)...
Prepared 1694 chunks from 1000 articles.


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 53/53 [01:27<00:00,  1.64s/it]


Index built and saved to disk.
Loading index & metadata from disk...
Loaded index. Chunks: 1694


In [77]:
out = query_pipeline("Who won the 2024 UEFA Champions League final?", retriever, verbose=True)
rag_output(out)


üîç Retrieved:
 - 0.421 | 2018-02-01 06:55:00 | Leganes salvages Copa draw with Sevilla goalkeeper's mistake
 - 0.398 | 2017-02-15 17:38:00 | Turkey to bid to host 2024 European Championship
 - 0.381 | 2018-02-03 00:00:00 | Higuain and Buffon lead Juventus past Atalanta 1-0 in Cup
 - 0.375 | 2018-02-03 00:00:00 | Valencia in better spot since 7-0 loss to Barcelona in Copa
 - 0.331 | 2018-02-01 06:15:00 | 10.5-second goal sets Spurs on way to win over Man United
 - 0.321 | 2018-05-31 02:00:00 | BC-TEN--French Open Results
 - 0.320 | 2018-01-31 07:08:00 | Depleted West Ham holds Crystal Palace to 1-1 draw
 - 0.316 | 2018-04-24 23:23:00 | WORLD CUP: Salah can rouse Egypt after 28-year wait
üìÖ Date filter for year 2024: kept 0 docs

‚ùå No documents match the required year.

--- PIPELINE RESULT SUMMARY ---
{
  "initial_answer": "No reliable recent information found for this question.",
  "initial_fc": {
    "support_fraction": 0.0,
    "num_claims": 0
  }
}


# üß† RAG Pipeline Result

---

## ‚úÖ Final Answer

**No reliable recent information found for this question.**

---

## üìä Fact-Check Summary

- **Claims detected:** `0`

- **Support fraction:** `0.00`

---

## ‚ö†Ô∏è No claim-level evidence available

- This usually happens when the model produced a very short answer (like a single word), or when no reliable documents were found.

In [78]:
out = query_pipeline("Who did Juventus beat in the 2018 Italian Cup semifinal?", retriever)
rag_output(out)


üîç Retrieved:
 - 0.639 | 2018-02-03 00:00:00 | Higuain and Buffon lead Juventus past Atalanta 1-0 in Cup
 - 0.546 | 2018-01-30 02:48:00 | After failed election, Italian FA faces emergency measures
 - 0.471 | 2018-01-30 02:48:00 | After failed election, Italian FA faces emergency measures
 - 0.441 | 2018-01-31 19:17:00 | Azeglio Vicini, Italy's coach at 1990 World Cup, dies at 84
 - 0.397 | 2018-02-03 00:00:00 | Valencia in better spot since 7-0 loss to Barcelona in Copa
 - 0.391 | 2018-04-24 23:23:00 | WORLD CUP: Salah can rouse Egypt after 28-year wait
 - 0.379 | 2018-05-31 02:00:00 | BC-TEN--French Open Results
 - 0.378 | 2018-05-31 03:10:00 | French-Italian crew wins Leg 1 of Atlantic Cup
üìÖ Date filter for year 2018: kept 8 docs

‚úÖ Kept 1 docs after relevance + date filtering.

üß† Generated Answer:
 Atalanta

üìä Fact-check support fraction: 1.0 (1 claims)

--- PIPELINE RESULT SUMMARY ---
{
  "initial_answer": "Atalanta",
  "initial_fc": {
    "claim_results": [
      {
 

# üß† RAG Pipeline Result

---

## ‚úÖ Final Answer

**Atalanta**

---

## üìä Fact-Check Summary

- **Claims detected:** `1`

- **Support fraction:** `1.00`

---

## üîç Claim-Level Breakdown

### Claim 1: ‚úÖ Supported

**Claim:** Atalanta

**üîó Best Supporting Evidence**

- **Sentence:** Juventus is aiming to win its fourth straight Italian Cup while Atalanta is looking to reach the final for the first time in 22 years, since losing in 1996 to Fiorentina.

- **Title:** Higuain and Buffon lead Juventus past Atalanta 1-0 in Cup

- **Date:** 2018-02-03 00:00:00

- **Entailment Score:** `0.661`

---

In [80]:
out = query_pipeline("Which team did Barcelona defeat 7-0 in the Copa del Rey?", retriever)
rag_output(out)


üîç Retrieved:
 - 0.566 | 2018-02-01 06:55:00 | Leganes salvages Copa draw with Sevilla goalkeeper's mistake
 - 0.543 | 2018-02-03 00:00:00 | Valencia in better spot since 7-0 loss to Barcelona in Copa
 - 0.507 | 2018-02-03 00:00:00 | Valencia in better spot since 7-0 loss to Barcelona in Copa
 - 0.406 | 2018-05-31 13:37:00 | Hollingshead's goal, assist lead FC Dallas past Galaxy
 - 0.382 | 2017-02-15 18:51:00 | Madrid police talk to Maradona after altercation at hotel
 - 0.381 | 2018-02-03 00:00:00 | Higuain and Buffon lead Juventus past Atalanta 1-0 in Cup
 - 0.358 | 2018-02-01 06:28:00 | Sociedad signs Mexico defender Moreno in quiet day in Spain
 - 0.340 | 2018-07-05 04:36:00 | Costa Rica says coach out after disappointing World Cup
üìÖ No year found in question ‚Üí Skipping date filter.

‚úÖ Kept 3 docs after relevance + date filtering.

üß† Generated Answer:
 Valencia doesn't have fond memories of the last time it faced Barcelona in the Copa del Rey. The teams met two years a

# üß† RAG Pipeline Result

---

## ‚úÖ Final Answer

**Valencia doesn't have fond memories of the last time it faced Barcelona in the Copa del Rey. The teams met two years ago in the semifinals, with Valencia losing 7-0 in the first leg at Camp Nou Stadium.**

---

## üìä Fact-Check Summary

- **Claims detected:** `2`

- **Support fraction:** `1.00`

---

## üîç Claim-Level Breakdown

### Claim 1: ‚úÖ Supported

**Claim:** Valencia doesn't have fond memories of the last time it faced Barcelona in the Copa del Rey.

**üîó Best Supporting Evidence**

- **Sentence:** Valencia in better spot since 7-0 loss to Barcelona in Copa

MADRID (AP) ‚Äî Valencia doesn't have fond memories of the last time it faced Barcelona in the Copa del Rey.

- **Title:** Valencia in better spot since 7-0 loss to Barcelona in Copa

- **Date:** 2018-02-03 00:00:00

- **Entailment Score:** `0.994`

---

### Claim 2: ‚úÖ Supported

**Claim:** The teams met two years ago in the semifinals, with Valencia losing 7-0 in the first leg at Camp Nou Stadium.

**üîó Best Supporting Evidence**

- **Sentence:** The teams met two years ago in the semifinals, with Valencia losing 7-0 in the first leg at Camp Nou Stadium.

- **Title:** Valencia in better spot since 7-0 loss to Barcelona in Copa

- **Date:** 2018-02-03 00:00:00

- **Entailment Score:** `0.994`

---

In [183]:
out = query_pipeline("Which club did Mohamed Salah play for during the 2018 World Cup?", retriever)
rag_output(out)


üîç Retrieved:
 - 0.663 | 2018-04-24 23:23:00 | WORLD CUP: Salah can rouse Egypt after 28-year wait
 - 0.612 | 2018-04-24 23:23:00 | WORLD CUP: Salah can rouse Egypt after 28-year wait
 - 0.580 | 2018-04-24 23:23:00 | WORLD CUP: Salah can rouse Egypt after 28-year wait
 - 0.491 | 2018-02-03 00:00:00 | Aubameyang joins Arsenal for club-record $80 million
 - 0.477 | 2018-04-24 00:00:00 | WORLD CUP: Coaching changes unsettle lowest-ranked Saudis
 - 0.452 | 2018-04-24 00:00:00 | WORLD CUP: Coaching changes unsettle lowest-ranked Saudis
 - 0.432 | 2018-04-24 00:00:00 | WORLD CUP: Uruguay striker Suarez seeking redemption
 - 0.417 | 2018-02-03 00:00:00 | Athletic acts quickly, signs Martinez to replace Laporte

‚úÖ Kept 1 docs after relevance filtering.

üß† Generated Answer:
 Egypt

üìä Fact-check support fraction: 1.0 (1 claims)
‚ùå Answer type does not match question intent.
‚ùå Year mismatch between question and retrieved documents.

--- PIPELINE RESULT SUMMARY ---
{
  "final_answer"

## üß† RAG Pipeline Result

### ‚úÖ Final Answer

No verified evidence found.

**Source:** RAG-Rejected

### üìä Fact-Check Summary

- Claims detected: 1

- Support fraction: 0.0

‚ùå **Answer type mismatch detected**

‚ùå **Year mismatch detected**

---

## üîç Claim-Level Breakdown

### Claim 1: ‚úÖ Supported

**Claim:** Egypt

**Best Evidence:**

- Sentence: WORLD CUP: Salah can rouse Egypt after 28-year wait

 Kahrabah plays in Saudi Arabia.

- Title: WORLD CUP: Salah can rouse Egypt after 28-year wait

- Date: 2018-04-24 23:23:00

- Entailment: 0.66

In [195]:
out = query_pipeline("Which country planned to bid for the 2018 European Championship?", retriever)
rag_output(out)


üîç Retrieved:
 - 0.628 | 2017-02-15 17:38:00 | Turkey to bid to host 2024 European Championship
 - 0.466 | 2018-03-20 03:21:00 | Joint World Cup bidders: Trump hasn't sparked voter concerns
 - 0.447 | 2018-01-30 23:01:00 | Croatia to fulfil 2015 stadium ban by UEFA at England game
 - 0.407 | 2018-03-20 03:21:00 | Joint World Cup bidders: Trump hasn't sparked voter concerns
 - 0.372 | 2018-01-29 21:30:00 | EU ready to hit back if Trump imposes anti-EU trade measures
 - 0.356 | 2018-01-30 15:33:00 | The question at the Olympic Oval: Dutch domination, again?
 - 0.355 | 2018-01-29 21:30:00 | EU ready to hit back if Trump imposes anti-EU trade measures
 - 0.333 | 2018-02-01 08:03:00 | WORLD SPORTS at 0000 GMT

‚úÖ Kept 1 docs after relevance filtering.

üß† Generated Answer:
 Turkey had bid to host the final games of Euro 2020 but later withdrew its application saying it would concentrate efforts on 2024

üìä Fact-check support fraction: 1.0 (1 claims)
‚ùå Answer type does not match qu

## üß† RAG Pipeline Result

### ‚úÖ Final Answer

No verified evidence found.

**Source:** RAG-Rejected

### üìä Fact-Check Summary

- Claims detected: 1

- Support fraction: 0.0

‚ùå **Answer type mismatch detected**

‚ùå **Year mismatch detected**

---

## üîç Claim-Level Breakdown

### Claim 1: ‚úÖ Supported

**Claim:** Turkey had bid to host the final games of Euro 2020 but later withdrew its application saying it would concentrate efforts on 2024

**Best Evidence:**

- Sentence: Turkey had bid to host the final games of Euro 2020 but later withdrew its application saying it would concentrate efforts on 2024.

- Title: Turkey to bid to host 2024 European Championship

- Date: 2017-02-15 17:38:00

- Entailment: 0.993

In [194]:
out = query_pipeline("Which team held Crystal Palace to a 1-1 draw in 2018?", retriever)
rag_output(out)


üîç Retrieved:
 - 0.561 | 2018-01-31 07:08:00 | Depleted West Ham holds Crystal Palace to 1-1 draw
 - 0.446 | 2018-02-01 06:55:00 | Leganes salvages Copa draw with Sevilla goalkeeper's mistake
 - 0.439 | 2018-05-31 02:00:00 | BC-TEN--French Open Results
 - 0.437 | 2018-05-31 02:00:00 | BC-TEN--French Open Results
 - 0.428 | 2018-05-31 02:00:00 | BC-TEN--French Open Results
 - 0.418 | 2018-02-03 00:00:00 | Valencia in better spot since 7-0 loss to Barcelona in Copa
 - 0.415 | 2018-02-01 08:03:00 | WORLD SPORTS at 0000 GMT
 - 0.406 | 2018-02-03 00:00:00 | Higuain and Buffon lead Juventus past Atalanta 1-0 in Cup

‚úÖ Kept 3 docs after relevance filtering.

üß† Generated Answer:
 West Ham

üìä Fact-check support fraction: 1.0 (1 claims)

--- PIPELINE RESULT SUMMARY ---
{
  "final_answer": "West Ham",
  "source": "RAG (Verified)",
  "fact_check": {
    "claim_results": [
      {
        "claim": "West Ham",
        "supported": true,
        "best_evidence": {
          "sentence": "De

## üß† RAG Pipeline Result

### ‚úÖ Final Answer

West Ham

**Source:** RAG (Verified)

### üìä Fact-Check Summary

- Claims detected: 1

- Support fraction: 1.0

---

## üîç Claim-Level Breakdown

### Claim 1: ‚úÖ Supported

**Claim:** West Ham

**Best Evidence:**

- Sentence: Depleted West Ham holds Crystal Palace to 1-1 draw

LONDON (AP) ‚Äî Mark Noble's penalty canceled out a rare goal from Christian Benteke as injury-hit West Ham battled to a 1-1 draw at home to Crystal Palace in the Premier League on Tuesday.

- Title: Depleted West Ham holds Crystal Palace to 1-1 draw

- Date: 2018-01-31 07:08:00

- Entailment: 0.798