# RAG with Phi-2 & ChromaDB

In [8]:
!pip install chromadb langchain sentence-transformers transformers torch langchain-community fitz frontend faiss-cpu rank_bm25



In [9]:
!pip install pymupdf==1.22.5



## Extract Only Text (Ignore Tables & Images)

In [10]:
import fitz  # PyMuPDF

pdf_path = "/kaggle/input/wagonr-usermanual/Wagon-R-Petrol-Revised.pdf"

doc = fitz.open(pdf_path)

text_content = []
for page in doc:
    text = page.get_text("text")  # Extract text only
    text_content.append(text)

full_text = "\n".join(text_content)
print(full_text[:1000])  # Preview first 1000 characters



FOREWORD
This manual is an essential part of your
vehicle and should remain with the vehicle
when resold or otherwise transferred to a
new owner or operator. Please read this
manual carefully before operating your
new MARUTI SUZUKI and review the
manual from time to time. It contains
important information on safety, operation
and maintenance. You are invited to avail
the three Free Inspection Services as
described in the manual. Three free
inspection coupons are attached to this
manual. Please show this manual to your
dealer workshop while you take your
MARUTI SUZUKI for any Service.
To prolong the life of your vehicle and
reduce maintenance cost, the periodic
maintenance must be carried out accord-
ing 
to 
“PERIODIC 
MAINTENANCE
SCHEDULE” described in “INSPECTION
AND MAINTENANCE” section of this man-
ual. It is essential for preventing trouble
and accidents to ensure your satisfaction
and safety.
Daily inspection and care as per “DAILY
INSPECTION CHECKLIST” described in
the “OPERAT

## Load & Split the User Guide

Instead of fixed-length chunks (500 characters), use semantic-aware splitting to avoid breaking important sections.

In [11]:
from langchain.text_splitter import TokenTextSplitter

# Use token-based splitting (better for LLMs like Phi-2)
token_splitter = TokenTextSplitter(chunk_size=256, chunk_overlap=50)
chunks = token_splitter.split_text(full_text)

print(f"Total Chunks: {len(chunks)}")


Total Chunks: 541


## Initialize BM25 (Keyword Search)

BM25 is a traditional keyword-based ranking algorithm that prioritizes exact matches.

In [12]:
from rank_bm25 import BM25Okapi
import nltk

nltk.download("punkt")
from nltk.tokenize import word_tokenize

# Tokenize all chunks
tokenized_chunks = [word_tokenize(chunk.lower()) for chunk in chunks]

# Initialize BM25 model
bm25 = BM25Okapi(tokenized_chunks)

def bm25_search(query, top_n=3):
    query_tokens = word_tokenize(query.lower())
    scores = bm25.get_scores(query_tokens)
    top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_n]
    return [chunks[i] for i in top_indices]

# Test BM25
query = "How to check engine oil level in WagonR?"
bm25_results = bm25_search(query)
print("BM25 Results:", bm25_results)


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
BM25 Results: ['.\nIf the oil level indication is near the lower\nlimit, add enough oil to raise the level to\nthe upper limit.\nRefilling\n69RM06007\nRemove the oil filler cap and pour oil\nslowly through the filler hole to bring the oil\nlevel to the upper limit on the dipstick. Be\ncareful not to overfill the oil. Too much oil is\nalmost as bad as too little oil. After refilling,\nstart the engine and allow it to idle for\nabout a minute. Stop the engine, wait\nabout 5 minutes and check the oil level\nagain.\nChanging engine oil and filter\nDrain the engine oil while the engine is still\nwarm.\n68KH067a\n1) Remove the oil filler cap.\n2) Place a drain pan under the drain plug.\n3) Using a wrench, remove the drain plug\nand drain the engine oil.\nNOTICE\nFailure to check the oil level regularly\ncould lead to serious engine trouble\ndue to insufficient oil.\nOpen\nClose

## Initialize ChromaDB (Vector Search)

In [13]:
import chromadb
from langchain.embeddings import HuggingFaceEmbeddings

# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path="chroma_db")

# Load sentence transformer for embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create a collection in ChromaDB
collection = chroma_client.get_or_create_collection(name="user_manual")

# Add chunks to ChromaDB
for i, chunk in enumerate(chunks):
    collection.add(
        ids=[str(i)],
        documents=[chunk],
        embeddings=[embedding_model.embed_query(chunk)]
    )

print("Vector search database created!")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector search database created!


##  Merge & Re-rank Results

In [14]:
from collections import Counter

def hybrid_search(query, bm25_weight=0.5, vector_weight=0.5, top_n=3):
    # Get BM25 results
    bm25_results = bm25_search(query)

    # Get ChromaDB (Vector Search) results
    vector_results = collection.query(
        query_embeddings=[embedding_model.embed_query(query)],  
        n_results=3
    )["documents"]

    # Flatten vector results (fixes the TypeError)
    vector_results = [item for sublist in vector_results for item in sublist]

    # Combine results
    combined_results = bm25_results + vector_results
    ranked_results = Counter(combined_results)

    # Sort by weighted frequency
    final_results = sorted(
        ranked_results.keys(),
        key=lambda doc: (ranked_results[doc] * bm25_weight) + (vector_results.count(doc) * vector_weight),
        reverse=True
    )

    return final_results[:top_n]

# Test Fixed Hybrid Search
query = "How do I change engine oil in WagonR?"
results = hybrid_search(query)
print("Final Hybrid Search Results:", results)




## Retrieve Relevant Text for a Query

In [7]:
query = "How do I check the engine oil level in WagonR?"

# Retrieve top 3 matching chunks
results = collection.query(
    query_embeddings=[embedding_model.embed_query(query)],
    n_results=3
)

# Print retrieved chunks
for i, doc in enumerate(results["documents"][0]):
    print(f"🔹 Chunk {i+1}: {doc}\n")


🔹 Chunk 1: 
14
32
50
68
86
104
(1)
0W-16
0W-20
5W-30
oC
oF
EXAMPLE
EXAMPLE
Upper
Lower
EXAMPLE

6-8
INSPECTION AND MAINTENANCE
It is important to keep the engine oil at the
correct level for proper lubrication of your
vehicle’s engine. Check the oil level with
the vehicle on a level surface. The oil level
indication may be inaccurate if the vehicle
is on a slope. The oil level should be
checked either before starting the engine
or at least 5 minutes after stopping the
engine. 
Pull out the oil dipstick, wipe oil off with a
clean cloth, insert the dipstick all the way
into the engine, and then remove it again.
The oil on the stick should be between the
upper and lower limits shown on the stick.
If the oil level indication is near the lower
limit, add enough oil to raise the level to
the upper limit.
Refilling
69RM06007
Remove the oil filler cap and pour oil
slowly through the filler

🔹 Chunk 2: ” for manual transmission
or Auto Gear Shift oil.
Manual transmission oil /
Auto Gear Shift o

## Generate an Answer using Phi-2

In [18]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load Phi-2 Model & Tokenizer
model_name = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Run Hybrid Search
query = "How do I change the engine oil in WagonR?"
results = hybrid_search(query)  # Returns ranked document chunks

# 📌 Debug: Print Retrieved Context
print("✅ Retrieved Context Chunks:\n", results)

# Chunking the Context
max_chunk_size = 400  # Set a reasonable token limit
context_chunks = []
current_chunk = ""

for doc in results:
    if len(current_chunk) + len(doc) < max_chunk_size:
        current_chunk += "\n" + doc
    else:
        context_chunks.append(current_chunk)
        current_chunk = doc

if current_chunk:
    context_chunks.append(current_chunk)  # Add last chunk

# Generate Responses for Each Chunk
responses = []
for i, chunk in enumerate(context_chunks):
    prompt = f"""You are an expert assistant providing clear and concise instructions. 
Answer the following query in a well-structured and easy-to-follow format.
Query: {query}

Context:
{chunk}

Answer:
"""

    # Generate response
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    output = model.generate(**inputs, max_new_tokens=150, pad_token_id=tokenizer.eos_token_id)

    # Decode and store response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    responses.append(response)

# Combine Responses into Final Answer
final_response = "\n".join(responses)

print("\n🚀 AI Response:\n", final_response)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Retrieved Context Chunks:

🚀 AI Response:
 You are an expert assistant providing clear and concise instructions. 
Answer the following query in a well-structured and easy-to-follow format.
Query: How do I change the engine oil in WagonR?

Context:


Answer:

1. Gather the necessary tools and materials:
- A new engine oil filter
- Engine oil
- A wrench or socket set
- A drain pan
- A funnel

2. Park the WagonR on a level surface and turn off the engine.

3. Locate the oil drain plug underneath the engine. It is usually a bolt or nut that can be removed with a wrench or socket set.

4. Place the drain pan underneath the oil drain plug to catch the old oil.

5. Use the wrench or socket set to remove the oil drain plug and allow the old oil to drain completely.

6. Once the old oil has drained, replace the oil drain plug and
You are an expert assistant providing clear and concise instructions. 
Answer the following query in a well-structured and easy-to-follow format.
Query: How do I cha

In [20]:
!zip -r output.zip /kaggle/working/

  adding: kaggle/working/ (stored 0%)
  adding: kaggle/working/.virtual_documents/ (stored 0%)
  adding: kaggle/working/chroma_db/ (stored 0%)
  adding: kaggle/working/chroma_db/chroma.sqlite3 (deflated 62%)
  adding: kaggle/working/chroma_db/89b20535-419b-4bf7-8008-e4dec88a2a64/ (stored 0%)
  adding: kaggle/working/chroma_db/89b20535-419b-4bf7-8008-e4dec88a2a64/link_lists.bin (stored 0%)
  adding: kaggle/working/chroma_db/89b20535-419b-4bf7-8008-e4dec88a2a64/data_level0.bin (deflated 100%)
  adding: kaggle/working/chroma_db/89b20535-419b-4bf7-8008-e4dec88a2a64/header.bin (deflated 61%)
  adding: kaggle/working/chroma_db/89b20535-419b-4bf7-8008-e4dec88a2a64/length.bin (deflated 99%)


In [21]:
from IPython.display import FileLink
FileLink(r'output.zip')