# RAG Workshop

# Implementation

In [None]:
!pip install -r requirements.txt -q

## FAISS Library

### DECLARING GLOBAL VARIABLES + OBJECTS

In [15]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

import torch
from transformers import pipeline

import pandas as pd

# EMBEDDING MODEL
embedding_model = SentenceTransformer("paraphrase-mpnet-base-v2") # bert-base-nli-mean-tokens

# DATA STORE THAT WILL ALSO BE STORED AS  VECTOR STORE
data = [
    ['What is the weather like today?', 'general'],
    ['Can you provide the latest stock market updates?', 'finance'],
    ['Recommend a good Italian restaurant nearby', 'food'],
    ['How do I reset my password?', 'tech support'],
    ['Tell me a joke', 'entertainment'],
    ['What are the symptoms of a flu?', 'health'],
    ['Book a flight to New York', 'travel'],
    ['How to make a chocolate cake?', 'cooking'],
    ['In todays football game, Barcelona beat Real Madrid 5-2', 'sports'],
    ['Im feeling happy today', 'personal emotion']
]
df = pd.DataFrame(data, columns=['text', 'category'])

# USER QUERY
USER_QUERY = "What was the score in today's football game"

# GENERATION MODEL
model_id = "Qwen/Qwen2.5-1.5B-Instruct"

# high level API
generation_pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.float32, # set precision. float32 = 7 digit representation. float16= 4 digit representation.
    max_new_tokens=100,
    do_sample=False,
    temperature=0.0,
    device = 0
    # num_return_sequences=3,
)
# output = generation_pipe(USER_QUERY)

df

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Unnamed: 0,text,category
0,What is the weather like today?,general
1,Can you provide the latest stock market updates?,finance
2,Recommend a good Italian restaurant nearby,food
3,How do I reset my password?,tech support
4,Tell me a joke,entertainment
5,What are the symptoms of a flu?,health
6,Book a flight to New York,travel
7,How to make a chocolate cake?,cooking
8,"In todays football game, Barcelona beat Real M...",sports
9,Im feeling happy today,personal emotion


### VectorDB creation

In [None]:
text = df['text'] # ["What is the weather like today?", "Can you provide the latest stock market updates?", ...]

with torch.no_grad():
  embeddings = embedding_model.encode(text)

print(embeddings.shape) # (10, 768)
print(type(embeddings[0]))

embd_dim = embeddings.shape[1] # get embedding dimension (768)

index = faiss.IndexFlatL2(embd_dim) # create faiss index of 768 dimension and use L2 distance as distance metric (FLAT=brute force)
faiss.normalize_L2(embeddings) # In-place normalization of all embeddigns. Magnitude of all vectors become 1. so only angle matters, not the

index.add(embeddings) # normalized embeddings added into index/VectorDB

(10, 768)
<class 'numpy.ndarray'>


### Retrieval

In [17]:
with torch.no_grad():
  search_vector = embedding_model.encode(USER_QUERY)
print(search_vector.shape, type(search_vector))
new_vector = np.array([search_vector])
print(new_vector.shape)
faiss.normalize_L2(new_vector)

distances, indices = index.search(new_vector, k=1) # Fetch 1 Nearest Neighbours based on L2 distance
results = pd.DataFrame({'distances': distances[0], 'ann': indices[0]})
results

(768,) <class 'numpy.ndarray'>
(1, 768)


Unnamed: 0,distances,ann
0,1.404838,8


In [18]:
df_merged = pd.merge(results, df, left_on='ann', right_index=True)
df_merged.head()

Unnamed: 0,distances,ann,text,category
0,1.404838,8,"In todays football game, Barcelona beat Real M...",sports


### Augmentation

In [19]:
prompt_template = """
Give output to user question based on relvant context.

User Question: {USER_QUERY}
Context:
{Context}

Answer:
""".strip()

prompt = prompt_template.format(USER_QUERY=USER_QUERY, Context=" ".join(df_merged["text"].tolist()))
prompt

"Give output to user question based on relvant context.\n\nUser Question: What was the score in today's football game\nContext:\nIn todays football game, Barcelona beat Real Madrid 5-2\n\nAnswer:"

### Generation

In [20]:
generation_pipe(prompt)[0]["generated_text"]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


"Give output to user question based on relvant context.\n\nUser Question: What was the score in today's football game\nContext:\nIn todays football game, Barcelona beat Real Madrid 5-2\n\nAnswer: The score in today's football game was Barcelona 5, Real Madrid 2. \n\nOutput: The score in today's football game was Barcelona 5, Real Madrid 2."

## LangChain Framework

### DECLARING GLOBAL VARIABLES + OBJECT

In [23]:
import os
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from transformers import pipeline
import torch

# os.environ["HUGGINGFACEHUB_API_TOKEN"] = "hf_..."

data = [
    ['What is the weather like today?', 'general'],
    ['Can you provide the latest stock market updates?', 'finance'],
    ['Recommend a good Italian restaurant nearby', 'food'],
    ['How do I reset my password?', 'tech support'],
    ['Tell me a joke', 'entertainment'],
    ['What are the symptoms of a flu?', 'health'],
    ['Book a flight to New York', 'travel'],
    ['How to make a chocolate cake?', 'cooking'],
    ['In todays football game, Barcelona beat Real Madrid 5-2', 'sports'],
    ['Im feeling happy today', 'personal emotion']
]
df = pd.DataFrame(data, columns=['text', 'category'])


embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-mpnet-base-v2")

# model_id = "Qwen/Qwen2.5-1.5B-Instruct"
# generation_pipe = pipeline(
#     "text-generation",
#     model=model_id,
#     torch_dtype=torch.float32,
#     max_new_tokens=100,
#     do_sample=False,
#     temperature=0.0,
#     device = 0
#     # num_return_sequences=3,
# )

### VectorDB creation

In [24]:
vector_store = InMemoryVectorStore(embedding_model)
vector_store.add_texts(df["text"])

['33b76b88-e062-42ef-a0a4-a32e3b87f1f2',
 'a2d1eee8-5c48-4c9a-bc21-987dd964481c',
 'bc29a990-f692-4028-9b6f-0ad1f7aa3d76',
 'a3353947-96e4-47f0-8206-37c89787cab5',
 '1f13c892-1ee0-4bc6-8357-022cb86ed0d6',
 '06634e59-a839-4127-b41b-1a1fe126666f',
 'f9d6d822-f0f4-4848-9cd8-716a5960c63a',
 '3c945efd-e02d-4528-aa73-f544e93dd9ef',
 'b88f404b-e5da-4d0f-a3a2-ff8b043173ae',
 'cf276a48-85ee-4fcc-b1d1-f4e45e5fcfad']

### Retrieval

In [None]:
query = "What's the score in the latest Barcelona game?"
retrieved_docs = vector_store.similarity_search(query, k=3)
print(retrieved_docs)

[Document(id='b88f404b-e5da-4d0f-a3a2-ff8b043173ae', metadata={}, page_content='In todays football game, Barcelona beat Real Madrid 5-2'), Document(id='a2d1eee8-5c48-4c9a-bc21-987dd964481c', metadata={}, page_content='Can you provide the latest stock market updates?'), Document(id='33b76b88-e062-42ef-a0a4-a32e3b87f1f2', metadata={}, page_content='What is the weather like today?')]


### Augmentation

In [26]:
prompt_template = """
Give output to user question based on relvant context.

User Question: {USER_QUERY}
Context:
{Context}

Answer:
""".strip()

context = "\n".join([doc.page_content for doc in retrieved_docs])
prompt = prompt_template.format(USER_QUERY=query, Context=context)

### Generation

In [27]:
generation_pipe(prompt)

The following generation flags are not valid and may be ignored: ['temperature', 'top_p', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


[{'generated_text': "Give output to user question based on relvant context.\n\nUser Question: What's the score in the latest Barcelona game?\nContext:\nIn todays football game, Barcelona beat Real Madrid 5-2\nCan you provide the latest stock market updates?\nWhat is the weather like today?\n\nAnswer: The score of the latest Barcelona game was 5-2 against Real Madrid. For the most up-to-date stock market updates, please visit our financial news section. And for current weather conditions, we recommend checking a reliable weather website or app. \n\nPlease let me know if there are any other questions I can assist with! üè´‚öΩÔ∏èüå§Ô∏è\n\nNote: This response provides the requested information while maintaining a professional tone and avoiding direct repetition from the original context. It also includes additional relevant"}]

# LangChain Framework vs FAISS Library

**LangChain** and **FAISS** and two commonly used tools in AI applications, highlighting their strengths and weaknesses.

| Tool        | Strengths | Weaknesses |
|------------|-----------|------------|
| **LangChain** | - Enables rapid development of LLM-based applications such as chatbots, RAG systems, and AI agents. <br> - Provides high-level abstractions, reducing the need for deep AI or programming knowledge. <br> - Integrates easily with external APIs and vector databases (like FAISS). | - Internal workings are abstracted, making it harder to fully understand or customize low-level behavior. <br> - Can introduce overhead compared to a lean, custom implementation. |
| **FAISS** | - Highly efficient and scalable library for vector similarity search.. <br> - Flexible low-level control for optimized performance. | - Purely a vector search engine; does not handle LLMs, prompts, or application workflows. <br> - Requires additional effort to integrate embeddings and LLMs for complete AI applications. |

---

**Summary:**  
- **FAISS** is the engine for vector search and similarity tasks.  
- **LangChain** is a higher-level framework for building LLM-powered applications, which can leverage FAISS (or other vector stores) for retrieval.  


### Other VectorDB alternatives
1) ChromaDB
2) Qdrant DB
3) Pinecone
4) Weaviate

# Retrieval For Images

![Alt](diagrams/RAG%20-%20Retrieval%20For%20Images.jpg)

### Practice
1) Use FAISS library + CLIP's embedding model for vision
2) Use cat/dog images in "images" directory(paths already defined below)
3) Create an image store and VectorDB, store them in images directory
4) Use the query image(path defined below)
5) Perform similarity search and retrieve top 2 images

In [1]:
img_paths = {
    0: "images/german_sheperd.jpg",
    1: "images/Golden_Retriever.jpg",
    2: "images/siberian_husky.jpg",
    3: "images/persian_cat.jpg",
    4: "images/scottish_fold_cat.jpg",
    5: "images/sphynx_cat.jpg"
}

QUERY_IMG = "images/query_german_sheperd.jpg"

In [None]:
# SAMPLE CODE TO GENERATE IMAGE EMBEDDINGS USING CLIP'S IMAGE ENCODER

from transformers import CLIPProcessor, CLIPModel
from PIL import Image

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

model.eval() # drops dropout layer and uses learned running mean and variance in batch-norm.

def get_img_embeddings_using_clip_img_encoder(img_path):
    img = Image.open(img_path)

    inputs = processor(images=img, return_tensors="pt")
    image_tensor = inputs['pixel_values']  # shape: (1, 3, 224, 224)

    # Encode
    with torch.no_grad():
        embeddings = model.get_image_features(image_tensor)  # Hugging Face
        embeddings = embeddings.cpu().numpy().astype('float32')  # convert to NumPy
        return embeddings

## Cross-model/Mulitmodal Retrieval

![Alt text](diagrams/RAG%20-%20Cross-Model%20RetrievalMultimodal%20Retrieval.jpg)

## Text-to-Image Retrieval

![Alt text](diagrams/RAG%20-%20Text-to-Image%20Retrieval.jpg)

### Practice
1) Use FAISS library & CLIP's vision encoder + text decoder
2) Load the existing image store + VectorDB created previously
3) Use the sample query text given below
3) Perform similarity search and retrieve top 2 images

In [29]:
img_paths = {
    0: "images/german_sheperd.jpg",
    1: "images/Golden_Retriever.jpg",
    2: "images/siberian_husky.jpg",
    3: "images/persian_cat.jpg",
    4: "images/scottish_fold_cat.jpg",
    5: "images/sphynx_cat.jpg"
}

QUERY_TXT = "A sphynx cat"

In [30]:
# SAMPLE CODE TO GENERATE TEXT EMBEDDINGS USING CLIP'S TEXT ENCODER

def get_text_embeddings_using_clip_text_encoder(text):
    inputs = processor(text=text, return_tensors="pt", padding=True)

    with torch.no_grad():
        embeddings = model.get_text_features(**inputs)
        embeddings = embeddings.cpu().numpy().astype('float32')  # convert to NumPy
        return embeddings

## Image-to-Text Retreival

![Alt text](diagrams/RAG%20-%20Image-to-text%20Retrieval.jpg)

### Practice
1) Use FAISS library & CLIP's vision encoder + text decoder.
2) load sentences from sentences.txt, make document store + VectorDB out of it.
3) Also store document store + VectorDB in texts folder.
4) Use the query image(path defined below)
5) Perform similarity search and retrieve top 2 sentences.

In [None]:
QUERY_IMG = "images/query_german_sheperd.jpg"

In [None]:
# Write code

# Create Streamlit App
### Create an HR Chatbot that uses RAG in backend to answer employee queries

### Instructions:
1) use chatGPT to generate sample data and put it in a file
2) Create FAISS index using this data
1) Complete the code given below(Note that the streamlit UI code is complete, you just need to implement retrieve, augment, generate functions)
2) Copy the code into a new file: app.py
3) Run using terminal command: streamlit run app.py

In [None]:
prompt_template = """
You are acting as an HR chatbot for company 'Dense Fusion'. Answer user query using given context/
User Query: {user_query}
Context: {context}
""".strip()

def retrieve(user_query):
    return ""

def augment(user_query, context):
    return ""

def generate(prompt):
    return ""


def RAG(user_query):

    context = retrieve(user_query)
    prompt = augment(user_query, context)
    response = generate(prompt)
    return response


# STEAMLIT UI CODE
import streamlit as st

st.set_page_config(page_title="RAG Chat App", page_icon="üìö")

st.title("üìö RAG-powered Q&A")
st.write("Ask a question and get an answer using Retrieval-Augmented Generation.")

# User input
user_input = st.text_input("Enter your question:")

# Submit button
if st.button("Ask"):
    if not user_input.strip():
        st.warning("Please enter a question.")
    else:
        with st.spinner("Generating answer..."):
            response = RAG(user_input)

        st.subheader("Answer")
        st.write(response)