### Explicit Routing 

In [None]:
pip install transformers torch

In [None]:
pip install PyPDF2

In [None]:
import torch
from transformers import BertTokenizer, BertModel
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
import PyPDF2

# Initialize BERT model and tokenizer for semantic similarity scoring
bert_model_name = "bert-base-uncased"
bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = BertModel.from_pretrained(bert_model_name)

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file_path):
    text = ""
    with open(pdf_file_path, "rb") as f:
        reader = PyPDF2.PdfFileReader(f)
        for page_num in range(reader.numPages):
            page = reader.getPage(page_num)
            text += page.extractText()
    return text

# Function to compute similarity scores between input question and text
def compute_similarity(input_question, text):
    # Tokenize input question
    input_question_tokens = bert_tokenizer(input_question, return_tensors="pt", padding=True, truncation=True)
    input_question_ids = input_question_tokens["input_ids"]

    # Tokenize text
    text_tokens = bert_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    text_ids = text_tokens["input_ids"]

    # Compute embeddings for input question and text
    with torch.no_grad():
        _, input_question_embedding = bert_model(**input_question_tokens)
        _, text_embeddings = bert_model(**text_tokens)

    # Compute similarity scores using cosine similarity
    similarity_scores = torch.nn.functional.cosine_similarity(input_question_embedding, text_embeddings, dim=1)
    return similarity_scores.numpy()

# Sample input question
input_question = "What is the capital of Canada?"

# Extract text from PDF file
pdf_file_path = "path/to/your/pdf/file.pdf"
pdf_text = extract_text_from_pdf(pdf_file_path)

# Compute similarity scores
similarity_scores = compute_similarity(input_question, pdf_text)

# Print similarity scores
print("Similarity Scores:")
print(similarity_scores)

# Initialize RAG tokenizer, retriever, and generator
rag_tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-base")
rag_retriever = RagRetriever.from_pretrained("facebook/rag-token-base", index_name="exact", use_dummy_dataset=True)
rag_generator = RagSequenceForGeneration.from_pretrained("facebook/rag-token-base")

# Encode input question
input_ids = rag_tokenizer(input_question, return_tensors="pt").input_ids

# Retrieve relevant passages
retrieved_docs = rag_retriever.retrieve(input_ids)

# Generate answer based on retrieved passages
output = rag_generator.generate(input_ids, retrieved_docs=retrieved_docs)

# Decode and print the answer
answer = rag_tokenizer.decode(output[0], skip_special_tokens=True)
print("Answer:", answer)
