<a href="https://colab.research.google.com/github/aasrith72/Assignment-5/blob/main/Assignment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers datasets faiss-cpu
# !pip       : Shell command to install Python packages.
# -q         : Quiet mode (suppresses output).
# transformers: Python library for pre-trained models (Hugging Face).
# datasets   : Python library for loading datasets (Hugging Face).
# faiss-cpu  : Facebook AI Similarity Search library (for vector operations) on CPU.

sample_text="""
# sample_text: The document/text that serves as our knowledge base.
Albert Einstein was a theoretical physicist who developed the theory of relativity,
 one of the two pillars of modern physics (alongside quantum mechanics). His work is also
 known for its influence on the philosophy of science. He is best known to the general public
 for his mass-energy equivalence formula E = mc².
"""

from transformers import AutoTokenizer, AutoModel
# AutoTokenizer: Automatically loads the correct tokenizer for a model.
# AutoModel   : Automatically loads the correct model architecture.

import torch
# torch      : PyTorch library for deep learning operations (tensors, neural networks).

import numpy as np
# numpy      : Library for numerical operations, especially array manipulation.
# np         : Common alias for numpy.

model_name="sentence-transformers/all-MiniLM-L6-v2"
# model_name : Identifier for the specific pre-trained embedding model.

tokenizer=AutoTokenizer.from_pretrained(model_name)
# tokenizer  : The tool that converts text into numerical tokens for the model.

model=AutoModel.from_pretrained(model_name)
# model      : The pre-trained neural network that will generate embeddings.

def get_embeddings(text):
# get_embeddings: Function to convert text into a numerical vector (embedding).

tokens=tokenizer(text,return_tensors='pt', truncation=True, padding=True)
# tokens     : Tokenized input format for the model.
# return_tensors='pt': Specifies PyTorch tensors as output format.
# truncation=True: Cuts text if it's too long.
# padding=True : Adds padding to short texts to match length.

with torch.no_grad():
# torch.no_grad(): Disables gradient calculation, useful for inference (not training) to save memory.

outputs=model(**tokens)
# outputs    : The raw output from the neural network model.

return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
# .last_hidden_state: Accesses the contextual embeddings for each token from the model's output.
# .mean(dim=1) : Calculates the average of token embeddings to get a single text embedding.
# .squeeze()   : Removes single-dimensional entries from the tensor.
# .numpy()     : Converts the PyTorch tensor to a NumPy array.

import faiss
# faiss      : Library for efficient similarity search on vectors.

chunks=[sample_text]
# chunks     : A list of text segments (our knowledge base documents).

embeddings=[get_embeddings(chunk) for chunk in chunks]
# embeddings : A list of numerical vector representations for each text chunk.
# list comprehension: A concise way to create lists.

dim=len(embeddings[0])
# dim        : The dimensionality (length) of the embedding vectors.

index=faiss.IndexFlatL2(dim)
# index      : The FAISS index structure for storing and searching vectors.
# faiss.IndexFlatL2: Creates a flat index using L2 (Euclidean) distance for similarity.

index.add(np.array(embeddings))
# .add       : Method to add embedding vectors to the FAISS index.
# np.array() : Converts a Python list into a NumPy array.

from transformers import pipeline
# pipeline   : A high-level helper function from transformers to simplify model usage for common tasks.

qa_pipeline =pipeline("text2text-generation", model="google/flan-t5-small")
# qa_pipeline: Our text generation pipeline.
# "text2text-generation": Specifies the task type for the pipeline.
# model="google/flan-t5-small": The specific LLM (Large Language Model) to use for generation.

def retrive_and_answer(query,top_k=1):
  # retrive_and_answer: Function that orchestrates the RAG process (retrieval + answer generation).
  # query      : The user's input question.
  # top_k      : The number of most relevant documents/chunks to retrieve.

  query_embedding = get_embeddings(query).reshape(1,-1)
  # query_embedding: The numerical vector representation of the user's query.
  # .reshape(1,-1): Reshapes the vector into a 2D array suitable for FAISS search.

  distances, indices = index.search(query_embedding,top_k)
  # distances  : The similarity scores (L2 distance) of retrieved items.
  # indices    : The positions/IDs of the retrieved chunks in the original 'chunks' list.
  # .search    : Method to perform the similarity search on the FAISS index.

  retrived_texts = [chunks[i] for i in indices[0].tolist()]
  # retrived_texts: The actual text content of the chunks found by the search.
  # .tolist()  : Converts a NumPy array to a Python list.

  context=" ".join(retrived_texts)
  # context    : The combined text of all retrieved chunks, forming the information for the LLM.
  # .join()    : String method to concatenate elements of a list into a single string.

  prompt=f'Context: {context} \nQuestion :{query} \nAnswer: '
  # prompt     : The final, augmented input string given to the LLM for generating an answer.
  # f''        : F-string for easy embedding of variables into a string.
  # \n         : Newline character.

  result = qa_pipeline([prompt],max_length=100,do_sample=False)
  # result     : The output generated by the LLM.
  # max_length=100: Sets the maximum length of the generated answer.
  # do_sample=False: Ensures deterministic (non-random) output from the LLM.

  print("Pipeline Result:", result) # Add this line to print the result
  # print      : Standard Python print function.

  return result[0]['generated_text']
  # ['generated_text']: Key to access the actual generated text from the pipeline's output.

Question="What is Albert Einstein known for?"
# Question   : The specific query being asked.

Answer=retrive_and_answer(Question)
# Answer     : The final answer produced by the RAG system.

print("Q:",Question)
print("A:",Answer)