In [1]:
!pip install transformers datasets sentence-transformers faiss-cpu



In [2]:
from datasets import load_dataset

# Load the QNLI dataset from the GLUE benchmark
dataset = load_dataset("glue", "qnli")

# Use only the train split for simplicity
train_data = dataset["train"]

# Extract questions and answers
questions = train_data["question"]
contexts = train_data["sentence"]

# Combine questions and answers into a single context-answer pair
qa_pairs = [
    f"{question}? {contexts[idx]}"
    for idx, question in enumerate(questions)
]

In [3]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load a pre-trained embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Encode QA pairs
qa_embeddings = embedding_model.encode(qa_pairs, convert_to_numpy=True)

# Build the FAISS index with QA embeddings
index = faiss.IndexFlatL2(qa_embeddings.shape[1])
index.add(qa_embeddings)

In [4]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load a generative model (T5 in this case)
gen_tokenizer = AutoTokenizer.from_pretrained("t5-small")
gen_model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")

In [5]:
def retrieve_qa_pairs(question, top_k=5):
    # Encode the question
    question_embedding = embedding_model.encode([question], convert_to_numpy=True)

    # Retrieve the top-k most relevant QA pairs
    distances, indices = index.search(question_embedding, top_k)
    retrieved_qa_pairs = [qa_pairs[idx] for idx in indices[0]]
    
    return retrieved_qa_pairs


In [6]:
question = """Where did Jebe die? Genghis Khan recalled Subutai back to Mongolia soon afterwards, and Jebe died on the road back to Samarkand."""
answer = retrieve_qa_pairs(question)
print(answer)

['Who captured him during World War I?? After being seriously wounded and captured by the Imperial Russians during World War I, Josip was sent to a work camp in the Ural Mountains.', 'Where was the location of the Shahi defeat by the Muslims?? After the Battle of Peshawar, he committed suicide because his subjects thought he had brought disaster and disgrace to the Shahi dynasty.', 'The Jats assisted in building what state?? A number of Gurjars had been exterminated in Bhinmal and Ajmer areas fighting with the invaders.', "Who was Genghis Khan's successor?? However, when they ceased to pay tribute after Genghis Khan's death, his successor Ögedei Khan (r. 1229–1241) launched an invasion into Tibet.", "What happened as a result of Maximilian's death?? When Maximilian died, Albert's line died out, and the Duchy of Prussia passed to the Electors of Brandenburg, forming Brandenburg-Prussia."]
