# RAG pipeline

## Installs and checks

In [1]:
import torch
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
import chromadb
import shutil
import os
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import markdown2
from bs4 import BeautifulSoup

In [2]:
# check PyTorch and CUDA
print("Torch version: ", torch.__version__)
print("CUDA available: ", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU device name: ", torch.cuda.get_device_name(0))

Torch version:  2.9.1+cu130
CUDA available:  True
GPU device name:  NVIDIA GeForce RTX 5070 Ti Laptop GPU


In [3]:
print(os.getcwd())

C:\Users\brews\Projects\Code\GitHub\AI-ML-Assignment5-RAG


In [5]:
# point to a fresh local folder:
# (reset persistent data)
DB_DIR = "./chroma_d83"

if os.path.exists(DB_DIR):
    shutil.rmtree(DB_DIR)
    

In [6]:
# load model and move to GPU if available
model_name = "sentence-transformers/all-mpnet-base-v2"
model = SentenceTransformer(model_name)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(DEVICE)
# print("Device: ", DEVICE)
# print("CUDA available: ", torch.cuda.is_available())
# print("Device: ", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

In [7]:
# check dimension
def embedding_dim_for_model(m):
    v = m.encode(["hello"], convert_to_numpy=True)
    return v.shape[-1]

print("Embedding dim: ", embedding_dim_for_model(model))

Embedding dim:  768


In [8]:
# forward pass sanity test
vec = model.encode(["This is a test"], convert_to_tensor=True, device=DEVICE)
print(vec.shape)

torch.Size([1, 768])


## Initialize Chroma DB

In [9]:
# initialize Chroma v2 PersistentClient
client = chromadb.PersistentClient(path=DB_DIR)
# create/get collection (new API)
collection = client.get_or_create_collection("embed_test")

In [10]:
def rag(question):
    q_emb = model.encode([question], convert_to_numpy=True, device=DEVICE)

    results = collection.query(
        query_embeddings=q_emb,
        n_results=3
    )
    context = "\n".join(results["documents"][0])
    answer = f"Context:\n{context}\n\nAnswer: (model answer here)"
    return answer

## Load and Chunk Data

In [11]:
md_path = "rag_data4.md" # rag_data.md was too redundant

def md_to_plaintext(md):
    # convert markdown to HTML
    html = markdown2.markdown(md)
    # strip HTML tags -> plain text
    soup = BeautifulSoup(html, "html.parser")
    return soup.get_text()
    
with open(md_path, "r", encoding="utf-8") as f: # f is the file object
    markdown_text = f.read() # read the file at md_path and store in this variable


print(markdown_text[:500])
print("Total characters:", len(markdown_text))
print("Total lines:", len(markdown_text.splitlines()))

# Introduction to the Twelve-Year Zodiac Cycle

The twelve-year zodiac cycle is a traditional system used in many East Asian cultures to describe personality traits, compatibility, and recurring patterns in human behavior. Each year is assigned an animal sign, and each animal is connected to specific symbolic qualities, natural elements, and mythological associations. Although the zodiac is ancient, it remains culturally influential today, especially in areas such as holiday customs, family plan
Total characters: 5519
Total lines: 79


In [12]:
plain_text = md_to_plaintext(markdown_text) # convert markdown to plain text
print(plain_text[:500])

Introduction to the Twelve-Year Zodiac Cycle
The twelve-year zodiac cycle is a traditional system used in many East Asian cultures to describe personality traits, compatibility, and recurring patterns in human behavior. Each year is assigned an animal sign, and each animal is connected to specific symbolic qualities, natural elements, and mythological associations. Although the zodiac is ancient, it remains culturally influential today, especially in areas such as holiday customs, family plannin


In [13]:
import tiktoken
# choose tokenzizer: mpnet can use "cl100k_base"
enc = tiktoken.get_encoding("cl100k_base")
# max_tokens = 200 #approximate chunk size
# chunks = []
# for para in plain_text.split("\n\n"): # this chunking logic is splitting mid-sentence
#     tokens = enc.encode(para)
#     for i in range(0, len(tokens), max_tokens):
#         chunk_tokens = tokens[i:i+max_tokens]
#         chunks.append(enc.decode(chunk_tokens))

# print(f"Tokenized into {len(chunks)} chunks") # 7 chunks

In [17]:
import nltk
# nltk.download('punkt_tab')
sentences = nltk.sent_tokenize(plain_text)
chunks = []
current = ""
for sent in sentences:
    if len(enc.encode(current + sent)) < 200:
        current += " " + sent
    else:
        chunks.append(current.strip())
        current = sent

if current:
    chunks.append(current.strip())

## Embed and Store Chunks

In [18]:
# embed the chunks
embeds = model.encode(chunks, convert_to_numpy=True, device=DEVICE)

In [19]:
# add chunks and embeddings to Chroma
collection.add(
    documents=chunks,
    embeddings=embeds.tolist(),
    ids=[f"chunk_{i}" for i in range(len(chunks))],
)

print("Stored chunks in Chroma: ", collection.count())

Stored chunks in Chroma:  6


## Retrieval

In [20]:
def retrieve(query, k=2):
    # embed the query
    q_emb = model.encode([query], convert_to_numpy=True, device=DEVICE)

    # query Chroma using embeddings
    res = collection.query(query_embeddings=q_emb.tolist(), n_results=k)

    # extract the retrieved documents
    retrieved_chunks = res["documents"][0] if "documents" in res else []

    return retrieved_chunks

In [21]:
retrieve("Tell me about Virgo's personality traits.")

['In contemporary settings, the zodiac often appears in:\n\nNew Year celebrations, where the qualities of the upcoming sign set the tone for the year. Compatibility readings, which compare personality traits across signs. Folklore and literature, where animals represent certain virtues or challenges. Daily horoscopes, which adapt ancient symbolism into modern guidance. Understanding the zodiac provides a unique lens into cultural traditions and symbolic interpretations of human behavior.',
 'These natural groupings help explain which relationships tend to be harmonious and which may face challenges. First Trine — Rat, Dragon, Monkey\nThese signs are associated with initiative, intelligence, and strong motivation. Together, they create highly dynamic partnerships built on ambition and mutual inspiration. Second Trine — Ox, Snake, Rooster\nThis trine emphasizes discipline, logic, and precision. The three signs generally communicate well and value consistency in relationships. Third Trine

In [22]:
def build_prompt(query, retrieved_chunks):
    context = "\n\n---\n\n".join(retrieved_chunks)
    # prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}\nAnswer:"
    # return prompt
    return f"""
    You are an expert assistant.

    Synthesize an accurate, coherent answer using ONLY the information below.
    Your job is to COMBINE, SUMMARIZE, and EXPLAIN the relevant points.

    If the documents disagree, explain the conflict.
    If the answer is not present in the documents, say you don’t know.

    CONTEXT DOCUMENTS:
    {context}

    QUESTION:
    {query}

    FINAL ANSWER (well-structured, factual, synthesized):
    """

In [23]:
build_prompt("Tell me about Virgo's personality traits.", retrieve("Virgo traits"))

"\n    You are an expert assistant.\n\n    Synthesize an accurate, coherent answer using ONLY the information below.\n    Your job is to COMBINE, SUMMARIZE, and EXPLAIN the relevant points.\n\n    If the documents disagree, explain the conflict.\n    If the answer is not present in the documents, say you don’t know.\n\n    CONTEXT DOCUMENTS:\n    These natural groupings help explain which relationships tend to be harmonious and which may face challenges. First Trine — Rat, Dragon, Monkey\nThese signs are associated with initiative, intelligence, and strong motivation. Together, they create highly dynamic partnerships built on ambition and mutual inspiration. Second Trine — Ox, Snake, Rooster\nThis trine emphasizes discipline, logic, and precision. The three signs generally communicate well and value consistency in relationships. Third Trine — Tiger, Horse, Dog\nThis group shares ideals of courage, independence, and moral conviction. Their compatibility is rooted in their shared sense o

In [24]:
# llm_model_name = "google/flan-t5-xxl" # if GPU VRAM > 20GB

In [25]:
llm_model_name = "google/flan-t5-xl"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm_model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name).to(DEVICE)

def generate(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(DEVICE)
    outputs = llm_model.generate(**inputs, max_length=512)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [26]:
k = min(8, collection.count())

def rag(query, k):
    retrieved = retrieve(query, k) # use Chroma retrieval
    prompt = build_prompt(query, retrieved) # combine query + context
    answer = generate(prompt)     # call LLM
    return answer

In [28]:
result = rag("In 20 words or less, what is the zodiac?", 3)
print(result)

These signs are associated with initiative, intelligence, and strong motivation. Together, they create highly dynamic partnerships built on ambition and mutual inspiration.


## Test 3 Cases

In [29]:
# 1 factual question:
# query1 = "Is there such a thing as the Chinese Zodiac?"
query1 = "Is there fireflies in the Zodiac?"
result1 = rag(query1, k=2)
print(result1)

No, there are no fireflies in the Zodiac.


In [30]:
# 2 general knowledge question
query2 = "How many albums has Beyonce made?"
result2 = rag(query2, k=2)
print(result2)

From family planning to holiday customs, the zodiac is used to describe personality traits, compatibility, and recurring patterns in human behavior.


In [32]:
# 3 synthesis question
# query3 = "List the houses of astrology"
query3 = "Describe Taurus's personality and approach to health."
# query3 = "Tell me about Monkey."
# query3 = "What do you know about the sign Dragon?"
# query3 = "Can you make me a list of all the animals in the zodiac?"
# query3 = "When it comes to the Zodiac, what's lucky?"
# query3 = "What are the lucky numbers for Dog? I only want to know about Dog."
result3 = rag(query3, k=5)
print(result3)

Rat is linked to strategy.
