# RAG Workshop

# Implementation

In [1]:
pip install -r requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


## FAISS Library

### DECLARING GLOBAL VARIABLES + OBJECTS

In [1]:
import transformers
print(transformers.__version__)

4.52.4


In [3]:
import pandas as pd
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import torch
from transformers import pipeline

# EMBEDDING MODEL
embedding_model = SentenceTransformer("paraphrase-mpnet-base-v2") # bert-base-nli-mean-tokens

# DATA STORE THAT WILL ALSO BE STORED AS  VECTOR STORE
data = [
    ['What is the weather like today?', 'general'],
    ['Can you provide the latest stock market updates?', 'finance'],
    ['Recommend a good Italian restaurant nearby', 'food'],
    ['How do I reset my password?', 'tech support'],
    ['Tell me a joke', 'entertainment'],
    ['What are the symptoms of a flu?', 'health'],
    ['Book a flight to New York', 'travel'],
    ['How to make a chocolate cake?', 'cooking'],
    ['In todays football game, Barcelona beat Real Madrid 5-2', 'sports'],
    ['Im feeling happy today', 'personal emotion']
]
df = pd.DataFrame(data, columns=['text', 'category'])

# USER QUERY
USER_QUERY = "What was the score in today's football game"

# GENERATION MODEL
model_id = "meta-llama/Llama-3.2-1B"
generation_pipe = pipeline(
    "text-generation", 
    model=model_id, 
    torch_dtype=torch.bfloat16, 
    max_new_tokens=100,
    # num_return_sequences=3, 
    # do_sample=True, 
    # temperature=0.7
)

df

  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 199/199 [00:00<00:00, 372.40it/s, Materializing param=pooler.dense.weight]                        
MPNetModel LOAD REPORT from: sentence-transformers/paraphrase-mpnet-base-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 146/146 [00:00<00:00, 314.93it/s, Materializing param=model.norm.weight]                              
Passing `generation_config` together with generation-related arguments=({'max_new_tokens'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.


Unnamed: 0,text,category
0,What is the weather like today?,general
1,Can you provide the latest stock market updates?,finance
2,Recommend a good Italian restaurant nearby,food
3,How do I reset my password?,tech support
4,Tell me a joke,entertainment
5,What are the symptoms of a flu?,health
6,Book a flight to New York,travel
7,How to make a chocolate cake?,cooking
8,"In todays football game, Barcelona beat Real M...",sports
9,Im feeling happy today,personal emotion


### VectorDB creation

In [4]:
text = df['text']
embeddings = embedding_model.encode(text)

embeddings.shape # (10, 768)

embd_dim = embeddings.shape[1] # get embedding dimension

index = faiss.IndexFlatL2(embd_dim) # create faiss index of 768 dimension and use L2 distance as distance metric
faiss.normalize_L2(embeddings) # so only angle matters, not the magnitude

index.add(embeddings) # embeddings added into index/VectorDB

### Retrieval

In [5]:
search_vector = embedding_model.encode(USER_QUERY)
new_vector = np.array([search_vector])
faiss.normalize_L2(new_vector)

distances, indices = index.search(new_vector, k=1) # Fetch 1 Nearest Neighbours based on L2 distance
results = pd.DataFrame({'distances': distances[0], 'ann': indices[0]})
results

Unnamed: 0,distances,ann
0,1.404837,8


In [6]:
df_merged = pd.merge(results, df, left_on='ann', right_index=True)
df_merged.head()

Unnamed: 0,distances,ann,text,category
0,1.404837,8,"In todays football game, Barcelona beat Real M...",sports


### Augmentation

In [7]:
prompt_template = """
Give output to user question based on relvant context.

User Question: {USER_QUERY}
Context:
{Context}

Answer:
""".strip()

prompt = prompt_template.format(USER_QUERY=USER_QUERY, Context=df_merged["text"])
prompt

"Give output to user question based on relvant context.\n\nUser Question: What was the score in today's football game\nContext:\n0    In todays football game, Barcelona beat Real M...\nName: text, dtype: str\n\nAnswer:"

### Generation

In [8]:
generation_pipe(prompt)[0]["generated_text"]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Both `max_new_tokens` (=100) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


"Give output to user question based on relvant context.\n\nUser Question: What was the score in today's football game\nContext:\n0    In todays football game, Barcelona beat Real M...\nName: text, dtype: str\n\nAnswer: Barcelona\nName: text, dtype: str\n\nAnswer: Real M...\nName: text, dtype: str\n\nAnswer: Real Madrid\nName: text, dtype: str\n\nAnswer: Real Betis\nName: text, dtype: str\n\nAnswer: Real Sociedad\nName: text, dtype: str\n\nAnswer: Real Valladolid\nName: text, dtype: str\n\nAnswer: Real Madrid\nName: text, dtype: str\n\nAnswer: Real Sociedad\nName:"

## LangChain Framework

### DECLARING GLOBAL VARIABLES + OBJECT

In [1]:
import os
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from transformers import pipeline
import torch

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "hf_..."

data = [
    ['What is the weather like today?', 'general'],
    ['Can you provide the latest stock market updates?', 'finance'],
    ['Recommend a good Italian restaurant nearby', 'food'],
    ['How do I reset my password?', 'tech support'],
    ['Tell me a joke', 'entertainment'],
    ['What are the symptoms of a flu?', 'health'],
    ['Book a flight to New York', 'travel'],
    ['How to make a chocolate cake?', 'cooking'],
    ['In todays football game, Barcelona beat Real Madrid 5-2', 'sports'],
    ['Im feeling happy today', 'personal emotion']
]
df = pd.DataFrame(data, columns=['text', 'category'])


embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-mpnet-base-v2")

model_id = "meta-llama/Llama-3.2-1B"
generation_pipe = pipeline(
    "text-generation", 
    model=model_id, 
    torch_dtype=torch.bfloat16, 
    max_new_tokens=100,
)

  from .autonotebook import tqdm as notebook_tqdm
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 199/199 [00:00<00:00, 342.99it/s, Materializing param=pooler.dense.weight]                        
MPNetModel LOAD REPORT from: sentence-transformers/paraphrase-mpnet-base-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
`torch_dtype` is deprecated! Use `dtype` instead!
Loading weights: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 146/146 [00:00<00:00, 371.74it/s, Materializing param=model.norm.weight]                              
Passing `generation_config` together with generation-related arguments=({'max_new_tokens'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.


### VectorDB creation

In [2]:
vector_store = InMemoryVectorStore(embedding_model)
vector_store.add_texts(df["text"])

['4325fbe2-6270-4804-97ce-fd1ca438f610',
 '949190be-64ce-4731-8150-fb913f1491b9',
 'fa2019e7-3859-4d3c-8e01-7dbb1c4cdd9a',
 '3efb16b3-d13c-4b60-bb65-1476d748f972',
 '79e3ee51-2331-43ea-839e-1fd28c549c58',
 'a01639f0-55fb-4f46-ba1f-3cd1413552a1',
 'a1317727-5e84-44d2-9139-ab05ed93e89e',
 'e76c65aa-794d-492e-80fd-69eb79254136',
 'ced6c2fc-8c96-40f3-a405-5dcff3433d94',
 'e10c7da7-abf0-4c40-883f-a9f2eb3aacb8']

### Retrieval

In [3]:
query = "What's the score in the latest Barcelona game?"
retrieved_docs = vector_store.similarity_search(query, k=3)
print(retrieved_docs)

[Document(id='ced6c2fc-8c96-40f3-a405-5dcff3433d94', metadata={}, page_content='In todays football game, Barcelona beat Real Madrid 5-2'), Document(id='949190be-64ce-4731-8150-fb913f1491b9', metadata={}, page_content='Can you provide the latest stock market updates?'), Document(id='4325fbe2-6270-4804-97ce-fd1ca438f610', metadata={}, page_content='What is the weather like today?')]


### Augmentation

In [4]:
prompt_template = """
Give output to user question based on relvant context.

User Question: {USER_QUERY}
Context:
{Context}

Answer:
""".strip()

context = "\n".join([doc.page_content for doc in retrieved_docs])
prompt = prompt_template.format(USER_QUERY=query, Context=context)

### Generation

In [5]:
generation_pipe(prompt)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Both `max_new_tokens` (=100) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[{'generated_text': "Give output to user question based on relvant context.\n\nUser Question: What's the score in the latest Barcelona game?\nContext:\nIn todays football game, Barcelona beat Real Madrid 5-2\nCan you provide the latest stock market updates?\nWhat is the weather like today?\n\nAnswer: 5-2 (in todays game)\n5-2 (in todays stock market update)\n\nUser Question: What's the score in the latest Barcelona game?\nContext:\nIn todays football game, Barcelona beat Real Madrid 5-2\nCan you provide the latest stock market updates?\nWhat is the weather like today?\nContext: In todays football game, Barcelona beat Real Madrid 5-2\nWhat is the weather like today?\n\nAnswer: 5-2 (in tod"}]

# LangChain Framework vs FAISS Library

This notebook compares **LangChain** and **FAISS**, two commonly used tools in AI applications, highlighting their strengths and weaknesses.

| Tool        | Strengths | Weaknesses |
|------------|-----------|------------|
| **LangChain** | - Enables rapid development of LLM-based applications such as chatbots, RAG systems, and AI agents. <br> - Provides high-level abstractions, reducing the need for deep AI or programming knowledge. <br> - Integrates easily with external APIs and vector databases (like FAISS). | - Internal workings are abstracted, making it harder to fully understand or customize low-level behavior. <br> - Can introduce overhead compared to a lean, custom implementation. |
| **FAISS** | - Highly efficient and scalable library for vector similarity search.. <br> - Flexible low-level control for optimized performance. | - Purely a vector search engine; does not handle LLMs, prompts, or application workflows. <br> - Requires additional effort to integrate embeddings and LLMs for complete AI applications. |

---

**Summary:**  
- **FAISS** is the engine for vector search and similarity tasks.  
- **LangChain** is a higher-level framework for building LLM-powered applications, which can leverage FAISS (or other vector stores) for retrieval.  


### Other VectorDB alternatives
1) ChromaDB
2) Qdrant DB
3) Pinecone
4) Weaviate

# Retrieval For Images

![Alt](diagrams/RAG%20-%20Retrieval%20For%20Images.jpg)

### Practice 
1) Use FAISS library + an CLIP's embedding model for vision
2) Use cat/dog images in "images" directory(paths already defined below)
3) Create an image store and VectorDB, store them in images directory
4) Use the query image(path defined below)
5) Perform similarity search and retrieve top 2 images

In [1]:
img_paths = {
    0: "images/german_sheperd.jpg",
    1: "images/Golden_Retriever.jpg",
    2: "images/siberian_husky.jpg",
    3: "images/persian_cat.jpg",
    4: "images/scottish_fold_cat.jpg",
    5: "images/sphynx_cat.jpg"
}

QUERY_IMG = "images/query_german_sheperd.jpg"

In [None]:
# SAMPLE CODE TO GENERATE IMAGE EMBEDDINGS USING CLIP'S IMAGE ENCODER

from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

model.eval()

def get_img_embeddings_using_clip_img_encoder(img_path):
    img = Image.open(img_path)

    inputs = processor(images=img, return_tensors="pt")
    image_tensor = inputs['pixel_values']  # shape: (1, 3, 224, 224)

    # Encode
    with torch.no_grad():
        embeddings = model.get_image_features(image_tensor)  # Hugging Face
        embeddings = embeddings.cpu().numpy().astype('float32')  # convert to NumPy

In [None]:
# Write code

## Cross-model/Mulitmodal Retrieval

![Alt text](diagrams/RAG%20-%20Cross-Model%20RetrievalMultimodal%20Retrieval.jpg)

## Text-to-Image Retrieval

![Alt text](diagrams/RAG%20-%20Text-to-Image%20Retrieval.jpg)

### Practice 
1) Use FAISS library & CLIP's vision encoder + text decoder
2) Load the existing image store + VectorDB created previously
3) Use the sample query text given below
3) Perform similarity search and retrieve top 2 images

In [None]:
img_paths = {
    0: "images/german_sheperd.jpg",
    1: "images/Golden_Retriever.jpg",
    2: "images/siberian_husky.jpg",
    3: "images/persian_cat.jpg",
    4: "images/scottish_fold_cat.jpg",
    5: "images/sphynx_cat.jpg"
}

QUERY_TXT = "A sphynx cat"

In [None]:
# SAMPLE CODE TO GENERATE TEXT EMBEDDINGS USING CLIP'S TEXT ENCODER

def get_text_embeddings_using_clip_text_encoder(text):
    inputs = processor(text=text, return_tensors="pt", padding=True)

    with torch.no_grad():
        embeddings = model.get_text_features(**inputs)
        embeddings = embeddings.cpu().numpy().astype('float32')  # convert to NumPy

In [None]:
# Write code

## Image-to-Text Retreival

![Alt text](diagrams/RAG%20-%20Image-to-text%20Retrieval.jpg)

### Practice 
1) Use FAISS library & CLIP's vision encoder + text decoder.
2) load sentences from sentences.txt, make document store + VectorDB out of it.
3) Also store document store + VectorDB in texts folder. 
4) Use the query image(path defined below)
5) Perform similarity search and retrieve top 2 sentences.

In [1]:
QUERY_IMG = "images/query_german_sheperd.jpg"

In [None]:
# Write code

# Create Streamlit App
### Create an HR Chatbot that uses RAG in backend to answer employee queries

### Instructions:
1) use chatGPT to generate sample data and put it in a file
2) Create FAISS index using this data
1) Complete the code given below(Note that the streamlit UI code is complete, you just need to implement retrieve, augment, generate functions)
2) Copy the code into a new file: app.py
3) Run using terminal command: streamlit run app.py

In [None]:
prompt_template = """
You are acting as an HR chatbot for company 'Dense Fusion'. Answer user query using given context/
User Query: {user_query}
Context: {context}
""".strip()

def retrieve(user_query):
    return ""

def augment(user_query, context):
    return ""

def generate(prompt):
    return ""


def RAG(user_query):

    context = retrieve(user_query)
    prompt = augment(user_query, context)
    response = generate(prompt)
    return response


# STEAMLIT UI CODE
import streamlit as st

st.set_page_config(page_title="RAG Chat App", page_icon="ðŸ“š")

st.title("ðŸ“š RAG-powered Q&A")
st.write("Ask a question and get an answer using Retrieval-Augmented Generation.")

# User input
user_input = st.text_input("Enter your question:")

# Submit button
if st.button("Ask"):
    if not user_input.strip():
        st.warning("Please enter a question.")
    else:
        with st.spinner("Generating answer..."):
            response = RAG(user_input)

        st.subheader("Answer")
        st.write(response)