In [None]:
import sys, subprocess
if "google.colab" in sys.modules:
    subprocess.run(["pip", "install", "-q", "pandas", "numpy", "scikit-learn", "requests", "pydantic", "jsonschema"])


# RAG Evaluation

Goal: compare retrieval quality across a small set of example queries.

Why it matters: even simple offline scores help researchers iterate on prompts and indexing strategies before deploying assistants.

How to run and adapt: run after the index is built; expand the query set or add your own heuristics for judging relevance.

In [None]:
import pickle
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

articles = pd.read_csv(DATA_DIR / "sample_texts" / "articles_sample.csv")
index_path = DATA_DIR / "vector_index.pkl"

if not index_path.exists():
    vectorizer = TfidfVectorizer(stop_words="english")
    tfidf_matrix = vectorizer.fit_transform(articles["abstract"].fillna(""))
    with open(index_path, "wb") as handle:
        pickle.dump({"vectorizer": vectorizer, "tfidf_matrix": tfidf_matrix}, handle)
else:
    with open(index_path, "rb") as handle:
        payload = pickle.load(handle)
    vectorizer = payload["vectorizer"]
    tfidf_matrix = payload["tfidf_matrix"]


queries = [
    "quantitative study design",
    "community impacts of technology",
    "statistical methods for small samples",
]
results = []
for q in queries:
    score = cosine_similarity(vectorizer.transform([q]), tfidf_matrix).flatten().max()
    results.append({"query": q, "best_score": float(score)})

pd.DataFrame(results)


In [None]:
import pickle
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

articles = pd.read_csv(DATA_DIR / "sample_texts" / "articles_sample.csv")
with open(DATA_DIR / "vector_index.pkl", "rb") as handle:
    payload = pickle.load(handle)
vectorizer = payload["vectorizer"]
tfidf_matrix = payload["tfidf_matrix"]

queries = [
    "quantitative study design",
    "community impacts of technology",
    "statistical methods for small samples",
]
results = []
for q in queries:
    score = cosine_similarity(vectorizer.transform([q]), tfidf_matrix).flatten().max()
    results.append({"query": q, "best_score": float(score)})

pd.DataFrame(results)
