In [None]:
from google.colab import userdata
Api_key = userdata.get('OpenAI_OpenRouter_api_key')

In [None]:
!pip install faiss-cpu datasets transformers sentence-transformers accelerate torch openai tqdm

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.1-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.1


In [None]:
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
MAGENTA = "\033[95m"
CYAN = "\033[96m"
RESET = "\033[0m"

In [None]:
import os
import time
import math
import faiss
import torch
import numpy as np
import openai
from tqdm.auto import tqdm
from datasets import load_dataset
from transformers import DPRContextEncoder, DPRContextEncoderTokenizerFast
from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizerFast
from sentence_transformers import SentenceTransformer


In [None]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device: ", DEVICE)

Device:  cuda


In [None]:
DATASET_NAME = "rajpurkar/squad"
MAX_PASSAGES = 5000
PASSAGE_MAXLEN = 256
DPR_DIM = 768

In [None]:
dataset = load_dataset(DATASET_NAME, split="train")

README.md: 0.00B [00:00, ?B/s]

plain_text/train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

plain_text/validation-00000-of-00001.par(…):   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [None]:
print(dataset)

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 87599
})


In [None]:
dataset[101]

{'id': '573387acd058e614000b5cb3',
 'title': 'University_of_Notre_Dame',
 'context': 'One of the main driving forces in the growth of the University was its football team, the Notre Dame Fighting Irish. Knute Rockne became head coach in 1918. Under Rockne, the Irish would post a record of 105 wins, 12 losses, and five ties. During his 13 years the Irish won three national championships, had five undefeated seasons, won the Rose Bowl in 1925, and produced players such as George Gipp and the "Four Horsemen". Knute Rockne has the highest winning percentage (.881) in NCAA Division I/FBS football history. Rockne\'s offenses employed the Notre Dame Box and his defenses ran a 7–2–2 scheme. The last game Rockne coached was on December 14, 1930 when he led a group of Notre Dame all-stars against the New York Giants in New York City.',
 'question': 'How many years was Knute Rockne head coach at Notre Dame?',
 'answers': {'text': ['13'], 'answer_start': [251]}}

In [None]:
passages = []
seen_contexts = set()

for d in dataset:
    context = d["context"].strip()
    if context and context not in seen_contexts:
        passages.append({"id": len(passages), "text": context})
        seen_contexts.add(context)

passages = passages[:MAX_PASSAGES]
print(f"Number of unique passages used: {len(passages)}")


Number of unique passages used: 5000


In [None]:
print(RED+"Loading DPR passage encoder..."+RESET)
passage_tokenizer = DPRContextEncoderTokenizerFast.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
passage_encoder = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base").to(DEVICE)
passage_encoder.eval()

[91mLoading DPR passage encoder...[0m


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DPRQuestionEncoderTokenizer'. 
The class this function is called from is 'DPRContextEncoderTokenizerFast'.
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRContextEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification

DPRContextEncoder(
  (ctx_encoder): DPREncoder(
    (bert_model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768,

In [None]:
print("Loading DPR question encoder...")
query_tokenizer = DPRQuestionEncoderTokenizerFast.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
query_encoder = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base").to(DEVICE)
query_encoder.eval()

Loading DPR question encoder...


Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


DPRQuestionEncoder(
  (question_encoder): DPREncoder(
    (bert_model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSdpaSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_feature

In [None]:
print("Loading sentence-transformers baseline (all-MiniLM-L6-v2)...")
st_model = SentenceTransformer("all-MiniLM-L6-v2", device=DEVICE)

Loading sentence-transformers baseline (all-MiniLM-L6-v2)...


In [None]:
def encode_passages_dpr(passages, tokenizer, encoder, device=DEVICE, batch_size=32):
    vectors = []
    for i in range(0, len(passages), batch_size):
        batch_texts = [p["text"] for p in passages[i:i+batch_size]]
        enc = tokenizer(batch_texts, padding=True, truncation=True, max_length=PASSAGE_MAXLEN, return_tensors="pt")
        enc = {k: v.to(device) for k, v in enc.items()}
        with torch.no_grad():
            out = encoder(**enc)
            emb = out.pooler_output
            emb = emb.cpu().numpy()
            emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
            vectors.append(emb)
    vectors = np.vstack(vectors)
    return vectors

def encode_passages_st(passages, st_model, batch_size=64):
    texts = [p["text"] for p in passages]
    embeddings = st_model.encode(texts, convert_to_numpy=True, batch_size=batch_size, show_progress_bar=True)
    return embeddings

In [None]:

print(YELLOW+ "Encoding DPR passage vectors..." +RESET)
passage_vectors_dpr = encode_passages_dpr(passages, passage_tokenizer, passage_encoder, DEVICE, batch_size=32)
print("DPR passage vectors shape:", passage_vectors_dpr.shape)

print(YELLOW+ "Encoding sentence-transformer passage vectors (baseline)..." +RESET)
passage_vectors_st = encode_passages_st(passages, st_model, batch_size=64)
print("Sentence-transformer passage vectors shape:",passage_vectors_st.shape)

[93mEncoding DPR passage vectors...[0m
DPR passage vectors shape: (5000, 768)
[93mEncoding sentence-transformer passage vectors (baseline)...[0m


Batches:   0%|          | 0/79 [00:00<?, ?it/s]

Sentence-transformer passage vectors shape: (5000, 384)


In [None]:
def normalize_np(v):
    norms = np.linalg.norm(v, axis=1, keepdims=True) + 1e-10
    return v / norms

passage_vectors_dpr = normalize_np(passage_vectors_dpr).astype("float32")
passage_vectors_st = normalize_np(passage_vectors_st).astype("float32")

In [None]:
d = passage_vectors_dpr.shape[1]
print("Vector dim:", d)

Vector dim: 768


In [None]:
index_flat = faiss.IndexFlatIP(d)               # exact search using inner product (cosine after normalization)
index_flat.add(passage_vectors_dpr)
print("IndexFlatIP added:", index_flat.ntotal)

IndexFlatIP added: 5000


In [None]:
nlist = 256 # Number of clusters
m = 64 # Number of subvectors
quantizer = faiss.IndexFlatL2(d)
index_ivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
index_ivfpq.train(passage_vectors_dpr) # Learns the centroids and codebooks
index_ivfpq.add(passage_vectors_dpr) # Assign vectors to clusters
index_ivfpq.nprobe = 8
print("IndexIVFPQ added:", index_ivfpq.ntotal)

IndexIVFPQ added: 5000


In [None]:
efConstruction = 200
M = 32
index_hnsw = faiss.IndexHNSWFlat(d, M)
index_hnsw.hnsw.efConstruction = efConstruction
index_hnsw.add(passage_vectors_dpr)
index_hnsw.hnsw.efSearch = 64
print("IndexHNSWFlat added:", index_hnsw.ntotal)

IndexHNSWFlat added: 5000


In [None]:
def retrieve_with_index(index, query_emb, topk=5):
    # query_emb: shape (d,) or (1,d)
    if query_emb.ndim == 1:
        query_emb = query_emb.reshape(1, -1)
    D, I = index.search(query_emb.astype("float32"), topk)  # distances and indices
    return D, I

In [None]:
def run_query(text_query, topk=5):
    # 1) DPR query encode
    enc = query_tokenizer(text_query, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
    with torch.no_grad():
        q_out = query_encoder(**enc).pooler_output.cpu().numpy()
    q_emb = normalize_np(q_out).astype("float32")

    # 2) ST query encode
    q_emb_st = st_model.encode([text_query], convert_to_numpy=True)
    q_emb_st = normalize_np(q_emb_st).astype("float32")

    results = {}
    for name, idx in [("FlatIP", index_flat), ("IVFPQ", index_ivfpq), ("HNSW", index_hnsw)]:
        D, I = retrieve_with_index(idx, q_emb, topk=topk)
        results[name] = {"D": D[0].tolist(), "I": I[0].tolist(),
                         "passages": [passages[i]["text"] for i in I[0]]}


    idx_st = faiss.IndexFlatIP(q_emb_st.shape[1])
    idx_st.add(passage_vectors_st)
    Dst, Ist = idx_st.search(q_emb_st, topk)
    results["ST_FlatIP"] = {"D": Dst[0].tolist(), "I": Ist[0].tolist(), "passages": [passages[i]["text"] for i in Ist[0]]}

    return results

In [None]:
query = "What was the name of the Polish-Jewish lawyer who first described Nazi atrocities as genocide?"
print(YELLOW + "Query:" + query + RESET)
res = run_query(query, topk=5)

for k, v in res.items():
    print(f"{CYAN}\n--- {k} results ---{RESET}")
    for i, (score, pid, passage) in enumerate(zip(v["D"], v["I"], v["passages"])):
        print(f"Rank {i+1} — id={pid} {RED}score={score:.4f}{RESET}\n{GREEN}{passage[:350].strip()}...\n{RESET}")

[93mQuery:What was the name of the Polish-Jewish lawyer who first described Nazi atrocities as genocide?[0m
[96m
--- FlatIP results ---[0m
Rank 1 — id=183 [91mscore=0.6660[0m
[92mBecause the universal acceptance of international laws which in 1948 defined and forbade genocide with the promulgation of the Convention on the Prevention and Punishment of the Crime of Genocide (CPPCG), those criminals who were prosecuted after the war in international courts for taking part in the Holocaust were found guilty of crimes against hu...
[0m
Rank 2 — id=172 [91mscore=0.6582[0m
[92mGenocide has become an official term used in international relations. The word genocide was not in use before 1944. Before this, in 1941, Winston Churchill described the mass killing of Russian prisoners of war and civilians as "a crime without a name". In that year, a Polish-Jewish lawyer named Raphael Lemkin, described the policies of systematic...
[0m
Rank 3 — id=4550 [91mscore=0.6388[0m
[92mAbout that

In [None]:
top_passages = res["HNSW"]["passages"][:5]
context_text = "\n\n---\n\n".join(top_passages)

In [None]:
PROMPT = """
You are a helpful assistant. Use ONLY the context below to answer the question.


Context:
{context}

Question:
{question}

Answer:
"""

In [None]:
final_prompt = PROMPT.format(
    context=context_text,
    question=query
)
final_prompt

'\nYou are a helpful assistant. Use ONLY the context below to answer the question.\n\n\nContext:\nBecause the universal acceptance of international laws which in 1948 defined and forbade genocide with the promulgation of the Convention on the Prevention and Punishment of the Crime of Genocide (CPPCG), those criminals who were prosecuted after the war in international courts for taking part in the Holocaust were found guilty of crimes against humanity and other more specific crimes like murder. Nevertheless, the Holocaust is universally recognized to have been a genocide and the term, that had been coined the year before by Raphael Lemkin, appeared in the indictment of the 24 Nazi leaders, Count 3, which stated that all the defendants had "conducted deliberate and systematic genocide—namely, the extermination of racial and national groups..."\n\n---\n\nGenocide has become an official term used in international relations. The word genocide was not in use before 1944. Before this, in 1941

In [None]:
from openai import OpenAI
llm = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=Api_key
)

In [None]:
response = llm.chat.completions.create(
    model="openai/gpt-oss-20b:free",
    messages=[{"role": "user", "content": final_prompt}]
)

print(response.choices[0].message.content)

Raphael Lemkin.


In [None]:
query = "In what year did Notre Dame Victory March get copyrighted?"
print(YELLOW + "Query:" + query + RESET)
res = run_query(query, topk=5)
top_passages = res["FlatIP"]["passages"][:5]
context_text = "\n\n---\n\n".join(top_passages)
final_prompt = PROMPT.format(
    context=context_text,
    question=query
)
print(final_prompt)
response = llm.chat.completions.create(
    model="openai/gpt-oss-20b:free",
    messages=[{"role": "user", "content": final_prompt}]
)

print(response.choices[0].message.content)

[93mQuery:In what year did Notre Dame Victory March get copyrighted?[0m

You are a helpful assistant. Use ONLY the context below to answer the question.


Context:
The "Notre Dame Victory March" is the fight song for the University of Notre Dame. It was written by two brothers who were Notre Dame graduates. The Rev. Michael J. Shea, a 1904 graduate, wrote the music, and his brother, John F. Shea, who earned degrees in 1906 and 1908, wrote the original lyrics. The lyrics were revised in the 1920s; it first appeared under the copyright of the University of Notre Dame in 1928. The chorus is, "Cheer cheer for old Notre Dame, wake up the echos cheering her name. Send a volley cheer on high, shake down the thunder from the sky! What though the odds be great or small, old Notre Dame will win over all. While her loyal sons are marching, onward to victory!"

---

Notre Dame rose to national prominence in the early 1900s for its Fighting Irish football team, especially under the guidance of th

In [None]:
query = "Ronald Reagan played the role of whom in 1940's Knute Rockne?"
print(YELLOW + "Query:" + query + RESET)
res = run_query(query, topk=5)
top_passages = res["FlatIP"]["passages"][:5]
context_text = "\n\n---\n\n".join(top_passages)
final_prompt = PROMPT.format(
    context=context_text,
    question=query
)
print(final_prompt)
response = llm.chat.completions.create(
    model="openai/gpt-oss-20b:free",
    messages=[{"role": "user", "content": final_prompt}]
)

print(response.choices[0].message.content)

[93mQuery:Ronald Reagan played the role of whom in 1940's Knute Rockne?[0m

You are a helpful assistant. Use ONLY the context below to answer the question.


Context:
In the film Knute Rockne, All American, Knute Rockne (played by Pat O'Brien) delivers the famous "Win one for the Gipper" speech, at which point the background music swells with the "Notre Dame Victory March". George Gipp was played by Ronald Reagan, whose nickname "The Gipper" was derived from this role. This scene was parodied in the movie Airplane! with the same background music, only this time honoring George Zipp, one of Ted Striker's former comrades. The song also was prominent in the movie Rudy, with Sean Astin as Daniel "Rudy" Ruettiger, who harbored dreams of playing football at the University of Notre Dame despite significant obstacles.

---

His film appearances after becoming Governor of California included a three-second cameo appearance in The Rundown, and the 2004 remake of Around the World in 80 Days. In

In [None]:
query = "Who believed that the titles given to Tibetan leaders did not confer authority?"
print(YELLOW + "Query:" + query + RESET)
res = run_query(query, topk=5)
top_passages = res["FlatIP"]["passages"][:5]
context_text = "\n\n---\n\n".join(top_passages)
final_prompt = PROMPT.format(
    context=context_text,
    question=query
)
print(final_prompt)
response = llm.chat.completions.create(
    model="openai/gpt-oss-20b:free",
    messages=[{"role": "user", "content": final_prompt}]
)

print(response.choices[0].message.content)

[93mQuery:Who believed that the titles given to Tibetan leaders did not confer authority?[0m

You are a helpful assistant. Use ONLY the context below to answer the question.


Context:
Dreyfus writes that after the Phagmodrupa lost its centralizing power over Tibet in 1434, several attempts by other families to establish hegemonies failed over the next two centuries until 1642 with the 5th Dalai Lama's effective hegemony over Tibet.

---

According to Tibetologist John Powers, Tibetan sources counter this narrative of titles granted by the Chinese to Tibetans with various titles which the Tibetans gave to the Chinese emperors and their officials. Tribute missions from Tibetan monasteries to the Chinese court brought back not only titles, but large, commercially valuable gifts which could subsequently be sold. The Ming emperors sent invitations to ruling lamas, but the lamas sent subordinates rather than coming themselves, and no Tibetan ruler ever explicitly accepted the role of bein