In [39]:
# Import necessary libraries
from transformers import T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

In [40]:
# 1. Load a pre-trained sentence transformer model for encoding documents
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


In [41]:
# Example documents (replace this with your own corpus or knowledge base)
documents = [
    "Paris is the capital of France, known for its art, fashion, and landmarks",
    "The Eiffel Tower is one of most famous landmarks in Paris, France",
    "The Louvre Museum in Paris is the world's largest art museum and a historic monument.",
    "Alexei Kitaev is Quantum Computing Researcher."
]

In [42]:
# 2. Encode the documents into embeddings using sentence transformer
document_embeddings = sentence_model.encode(documents, convert_to_tensor=True)

In [43]:
# 3. Create a FAISS index for efficient retrieval
dimension = document_embeddings.shape[1]  # Dimenssion of the embeddings
index = faiss.IndexFlatL2(dimension)      # Using L2 distance for similiarity search
index.add(np.array(document_embeddings))

In [44]:
# 4. Load a pre-trained T5 model and tokenizer for text generation
model = T5ForConditionalGeneration.from_pretrained('t5-small')
tokenizer = T5Tokenizer.from_pretrained('t5-small')

Loading weights:   0%|          | 0/131 [00:00<?, ?it/s]

In [45]:
# 5. Define a function to retrieve the most relavant documents based on a query
def retrieve_documents(query, top_k=1):
    query_embedding = sentence_model.encode([query], convert_to_tensor=True)

    # Perform retrieval
    _, indices = index.search(np.array(query_embedding), top_k)

    # Retrieve the top k most relavant documents
    retrieved_docs = [documents[i] for i in indices[0]]
    return retrieved_docs

In [46]:
# 6. Define a function for RAG-based Question Answering (QA)
def rag_qa(query):
    # Retrieve the most relavant documents for the query
    retrieved_docs = retrieve_documents(query)

    # Combine retrieved documents into one string to form the context
    context = "".join(retrieved_docs)

    # Prepare the input for the model (query + context)
    input_text = f"Question: {query} Context: {context}"
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids

    # Generate an answer using T5 model
    generated_ids = model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)

    # Decode the generated output to text
    output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return output

In [47]:
# 7. Test the RAG-based QA system
query = "Who is Alexei Kitaev?"
answer = rag_qa(query)
print("Answer:", answer)

Answer: Alexei Kitaev is Quantum Computing Researcher.
