## 1. Setup and Imports
Adding all libraries and files


In [59]:
import os
import pandas as pd
import numpy as np
import chromadb
import umap.umap_ as umap
import matplotlib.pyplot as plt

from dotenv import load_dotenv
from openai import OpenAI
from pypdf import PdfReader
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    SentenceTransformersTokenTextSplitter,
)
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
from sentence_transformers import CrossEncoder
import ollama
import hf_xet


In [60]:
def word_wrap(s, width=80):
    """Wraps text to a specified width."""
    lines = []
    for line in s.splitlines():
        lines.extend(textwrap.wrap(line, width))
    return '\n'.join(lines)

def project_embeddings(embeddings, umap_transform):
    """Projects embeddings using UMAP."""
    umap_embeddings = np.empty((len(embeddings), 2))
    for i, embedding in enumerate(embeddings):
        umap_embeddings[i] = umap_transform.transform([embedding])[0]
    return umap_embeddings

import textwrap

## 2. Adding Env variables 

In [61]:
# Load environment variables
load_dotenv()
#openai_key = os.getenv("OPENAI_API_KEY")
#if not openai_key:
#    raise ValueError("OPENAI_API_KEY not found in environment variables.")
#client = OpenAI(api_key=openai_key)

False

In [62]:
#Initializing Ollama 
client = ollama.Client()
ollama_model = "gemma3:1b"

### Processing PDF file

In [63]:
try:
    reader = PdfReader("Credit_Card_Policy_Doc.pdf")
    pdf_texts = [p.extract_text().strip() for p in reader.pages if p.extract_text()]
    raw_text = "\n\n".join(pdf_texts)
    if not raw_text:
        raise ValueError("No text extracted from PDF.")
    print(f"Successfully loaded {len(pdf_texts)} pages from PDF.")
except FileNotFoundError:
    print("Error: PDF file 'Credit_Card_Policy_Doc.pdf' not found.")
    raw_text = "" # Set to empty or default if file not found
except Exception as e:
    print(f"Error reading PDF: {e}")
    raw_text = "" # Set to empty or default on other errors

Successfully loaded 4 pages from PDF.


## 3. Splitting text into chunks + embedding

In [64]:
# Split text into chunks
character_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", ". ", " ", ""], chunk_size=1000, chunk_overlap=0
)
character_split_texts = character_splitter.split_text(raw_text)

token_splitter = SentenceTransformersTokenTextSplitter(
    chunk_overlap=0, tokens_per_chunk=256
)
token_split_texts = []
for text in character_split_texts:
    token_split_texts.extend(token_splitter.split_text(text))

In [65]:
print(f"Total text chunks: {len(token_split_texts)}")

Total text chunks: 15


In [66]:
# Setup ChromaDB and Embedding Function
embedding_function = SentenceTransformerEmbeddingFunction()
chroma_client = chromadb.Client() 
collection_name = "qa-collection"
try:
    chroma_client.delete_collection(name=collection_name)
    print(f"Existing collection '{collection_name}' deleted.")
except Exception:
    pass 


Existing collection 'qa-collection' deleted.


In [67]:
#Adding everything together
chroma_collection = chroma_client.create_collection(
    collection_name, embedding_function=embedding_function
)

In [68]:
# Adding chuunks to ChromaDB
if token_split_texts:
    ids = [str(i) for i in range(len(token_split_texts))]
    chroma_collection.add(ids=ids, documents=token_split_texts)
    print(f"Added {chroma_collection.count()} documents to ChromaDB.")
else:
    print("Skipping ChromaDB population as no text chunks were generated.")

Added 15 documents to ChromaDB.


## 4. Functions for RAG

In [69]:
def retrieve_documents(query_texts, n_results=5, collection=chroma_collection):
    """Retrieves documents from ChromaDB for given queries."""
    if collection.count() == 0:
        print("Warning: ChromaDB collection is empty. Cannot retrieve.")
        return {"ids": [[] for _ in query_texts], "documents": [[] for _ in query_texts], "embeddings": [[] for _ in query_texts]}
    return collection.query(
        query_texts=query_texts, n_results=n_results, include=["documents", "embeddings"]
    )

In [70]:
def generate_llm_answer(query, context, system_prompt, model=ollama_model): # Use ollama_model as default
    """Generates an answer using Ollama based on query and context."""
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Based on the following context:\n\n{context}\n\nAnswer the query: '{query}'"},
    ]
    try:
        # Use the ollama client's chat method
        response = client.chat(
            model=model,
            messages=messages,
        )
        # Extract content from the Ollama response structure
        return response['message']['content']
    except Exception as e:
        print(f"Error calling Ollama: {e}")
        return "Error generating answer from LLM."


In [71]:
def plot_embeddings(
    original_query,
    projected_dataset_embeddings,
    projected_retrieved_embeddings,
    projected_original_query_embedding,
    projected_augmented_query_embedding=None, # Make augmented optional
    filename="embeddings_plot.png"
    ):
    """Plots projected embeddings."""
    plt.figure()
    # [Plotting code as before]...
    plt.scatter(
        projected_dataset_embeddings[:, 0],
        projected_dataset_embeddings[:, 1],
        s=10, color="gray", label="Dataset Embeddings"
    )
    plt.scatter(
        projected_retrieved_embeddings[:, 0],
        projected_retrieved_embeddings[:, 1],
        s=100, facecolors="none", edgecolors="g", label="Retrieved Embeddings"
    )
    plt.scatter(
        projected_original_query_embedding[:, 0],
        projected_original_query_embedding[:, 1],
        s=150, marker="X", color="r", label="Original Query"
    )
    if projected_augmented_query_embedding is not None:
         plt.scatter(
            projected_augmented_query_embedding[:, 0],
            projected_augmented_query_embedding[:, 1],
            s=150, marker="X", color="orange", label="Augmented Query"
        )

    plt.gca().set_aspect("equal", "datalim")
    plt.title(f"Embeddings for: {word_wrap(original_query)}")
    plt.axis("off")
    plt.legend()
    plt.savefig(filename)
    print(f"Saved embedding plot to {filename}")
    plt.show()

## 5. Implementing RAG

In [72]:
original_query = "What is the card limit for Associate Vice Presidents?"
customer_service_prompt = """
You are a knowledgeable customer service agent. You're helpful and very kind as an agent. 
Customers love talking to you because you answer their questions. 
Your users are inquiring about credit card policy details. 
Provide a clear and concise answer based *only* on the provided context. DO NOT reference any other content apart from the document. 
Provide your answer in an easy to understand format and also cite which page and section you found the information from. 
"""
n_results_retrieval = 5
results_df = pd.DataFrame(columns=["Type", "Query", "Context", "Answer"])

In [73]:
# --- Strategy 1: Original Query (No RAG) ---
print("\n--- Running Strategy: Original Query (No RAG) ---")
answer_no_rag = generate_llm_answer(original_query, "No context provided.", customer_service_prompt)
results_df.loc[len(results_df)] = ["Original Query (No RAG)", original_query, "N/A", answer_no_rag]
print("Answer (No RAG):", word_wrap(answer_no_rag))


--- Running Strategy: Original Query (No RAG) ---
Answer (No RAG): I’m sorry, but the context provided doesn’t contain any information about credit
card limits for Associate Vice Presidents.
Therefore, I cannot answer your question.


In [74]:
# --- Strategy 2: Basic RAG ---
print("\n--- Running Strategy: Basic RAG ---")
retrieved_results_basic = retrieve_documents(query_texts=[original_query], n_results=n_results_retrieval)
if retrieved_results_basic["documents"] and retrieved_results_basic["documents"][0]:
    context_basic = "\n\n".join(retrieved_results_basic["documents"][0])
    answer_basic_rag = generate_llm_answer(original_query, context_basic, customer_service_prompt)
    results_df.loc[len(results_df)] = ["Basic RAG", original_query, context_basic, answer_basic_rag]
    print("Answer (Basic RAG):", word_wrap(answer_basic_rag))
else:
    print("Skipping Basic RAG answer generation as no documents were retrieved.")
    results_df.loc[len(results_df)] = ["Basic RAG", original_query, "No documents retrieved", "N/A"]



--- Running Strategy: Basic RAG ---
Answer (Basic RAG): “The CEO has a limit of up to $10,000.”


In [75]:
# --- Strategy 3: Query Expansion (Hypothetical Answer) ---
print("\n--- Running Strategy: Query Expansion (Hypothetical Answer) ---")
hypothetical_answer_prompt = """You are a helpful expert customer support agent.
Provide an example answer to the given question, that might be found in a credit card policy documentation."""

# Using generate_llm_answer for consistency, though the original used a dedicated function
hypothetical_answer = generate_llm_answer(original_query, "", hypothetical_answer_prompt) # Empty context for generation
joint_query_hyp = f"{original_query} {hypothetical_answer}"
print("Joint Query:", word_wrap(joint_query_hyp))

retrieved_results_hyp = retrieve_documents(query_texts=[joint_query_hyp], n_results=n_results_retrieval)
if retrieved_results_hyp["documents"] and retrieved_results_hyp["documents"][0]:
    context_hyp = "\n\n".join(retrieved_results_hyp["documents"][0])
    answer_hyp_rag = generate_llm_answer(original_query, context_hyp, customer_service_prompt) # Use original query for final answer
    results_df.loc[len(results_df)] = ["Query Expansion (Hypothetical)", joint_query_hyp, context_hyp, answer_hyp_rag]
    print("Answer (Hypothetical Expansion):", word_wrap(answer_hyp_rag))
else:
    print("Skipping Hypothetical Expansion answer generation as no documents were retrieved.")
    results_df.loc[len(results_df)] = ["Query Expansion (Hypothetical)", joint_query_hyp, "No documents retrieved", "N/A"]




--- Running Strategy: Query Expansion (Hypothetical Answer) ---
Joint Query: What is the card limit for Associate Vice Presidents? Okay, here’s a helpful and
professional response to the query “What is the card limit for Associate Vice
Presidents?” keeping in mind a credit card policy context:
“Thank you for your inquiry. The card limit for Associate Vice Presidents is
generally set at $[Amount - e.g., $10,000] based on factors such as credit
history, overall financial responsibility, and card usage patterns.  We want to
ensure responsible card usage and maintain the stability of the account.
However, this amount is subject to review based on individual circumstances and
approval within our policy.  I recommend reviewing the full credit card policy
document available on our website at [Link to Policy Document] for a complete
understanding of these guidelines.”
**Explanation of why this response is good:**
* **Acknowledges the Request:**  It starts by thanking the user, which is
polite

In [76]:
# --- Strategy 4: Query Expansion (Multiple Subqueries) ---
print("\n--- Running Strategy: Query Expansion (Multiple Subqueries) ---")
subquery_gen_prompt = """
You are a knowledgeable customer service agent.
Your users are inquiring about credit card policy details.
For the given question, propose up to five *distinct* and *related* questions to help find the information needed.
Provide concise, single-topic questions. Each question should be complete and directly related to the original inquiry.
List each question on a separate line without numbering.
"""
# Using generate_llm_answer again
subqueries_str = generate_llm_answer(original_query, "", subquery_gen_prompt)
subqueries = [q.strip() for q in subqueries_str.split("\n") if q.strip() and not q.strip().startswith("1.")] # Basic cleaning

print("Generated Subqueries:", subqueries)
all_queries_sub = [original_query] + subqueries
retrieved_results_sub = retrieve_documents(query_texts=all_queries_sub, n_results=n_results_retrieval)

# Deduplicate documents
unique_docs_sub = set()
if retrieved_results_sub["documents"]:
    for doc_list in retrieved_results_sub["documents"]:
        unique_docs_sub.update(doc_list)
context_sub = "\n\n".join(list(unique_docs_sub))

if context_sub:
    answer_sub_rag = generate_llm_answer(original_query, context_sub, customer_service_prompt) # Use original query
    results_df.loc[len(results_df)] = ["Query Expansion (Subqueries)", " | ".join(all_queries_sub), context_sub, answer_sub_rag]
    print("Answer (Subquery Expansion):", word_wrap(answer_sub_rag))
else:
    print("Skipping Subquery Expansion answer generation as no documents were retrieved.")
    results_df.loc[len(results_df)] = ["Query Expansion (Subqueries)", " | ".join(all_queries_sub), "No documents retrieved", "N/A"]



--- Running Strategy: Query Expansion (Multiple Subqueries) ---
Generated Subqueries: ['*   What is the maximum dollar amount the card allows for this role?', '*   Are there any restrictions on spending limits associated with this card?', '*   Does the card offer any spending thresholds that trigger a higher limit?', '*   Can you clarify if the card limit is consistent across all departments or roles?', '*   Is there a specific amount exceeding which the card limit would be impacted?']
Answer (Subquery Expansion): “The card limit for an Associate Vice President is up to $10,000.”


In [77]:
# --- Strategy 5: Re-ranking with CrossEncoder ---
print("\n--- Running Strategy: Re-ranking ---")
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

# Option 1: Re-rank initial results (from Basic RAG)
print("Re-ranking initial retrieved documents...")
if retrieved_results_basic["documents"] and retrieved_results_basic["documents"][0]:
    initial_docs = retrieved_results_basic["documents"][0]
    pairs_rerank_initial = [[original_query, doc] for doc in initial_docs]
    scores_initial = cross_encoder.predict(pairs_rerank_initial)
    reranked_indices_initial = np.argsort(scores_initial)[::-1]
    context_reranked_initial = "\n\n".join([initial_docs[i] for i in reranked_indices_initial])
    answer_reranked_initial = generate_llm_answer(original_query, context_reranked_initial, customer_service_prompt)
    results_df.loc[len(results_df)] = ["Re-ranking (Initial)", original_query, context_reranked_initial, answer_reranked_initial]
    print("Answer (Re-ranking Initial):", word_wrap(answer_reranked_initial))
else:
     print("Skipping Initial Re-ranking as no documents were retrieved initially.")
     results_df.loc[len(results_df)] = ["Re-ranking (Initial)", original_query, "No documents retrieved", "N/A"]



--- Running Strategy: Re-ranking ---


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Re-ranking initial retrieved documents...
Answer (Re-ranking Initial): The card limit for Associate Vice Presidents is up to $10,000.


In [78]:
# Option 2: Re-rank deduplicated results from subqueries
print("\nRe-ranking documents retrieved via subqueries...")
if unique_docs_sub:
    pairs_rerank_sub = [[original_query, doc] for doc in unique_docs_sub]
    scores_sub = cross_encoder.predict(pairs_rerank_sub)
    # Select top N unique documents based on score
    top_n_rerank = 5 # Or keep original number retrieved per query
    reranked_indices_sub = np.argsort(scores_sub)[::-1][:top_n_rerank]
    context_reranked_sub = "\n\n".join([list(unique_docs_sub)[i] for i in reranked_indices_sub])
    answer_reranked_sub = generate_llm_answer(original_query, context_reranked_sub, customer_service_prompt)
    results_df.loc[len(results_df)] = ["Re-ranking (Subqueries)", " | ".join(all_queries_sub), context_reranked_sub, answer_reranked_sub]
    print("Answer (Re-ranking Subqueries):", word_wrap(answer_reranked_sub))
else:
     print("Skipping Subquery Re-ranking as no documents were retrieved via subqueries.")
     results_df.loc[len(results_df)] = ["Re-ranking (Subqueries)", " | ".join(all_queries_sub), "No documents retrieved", "N/A"]



Re-ranking documents retrieved via subqueries...
Answer (Re-ranking Subqueries): According to the provided text, the card limit for Associate Vice Presidents is
“approved by an associate vice president or vice president.”


## Results

In [None]:

results_df = results_df.reset_index(drop=True)


print("\n--- Final Comparison ---")
print(results_df[['Type', 'Answer']].to_string()) # Display Type and Answer clearly


results_df.to_csv('results_comparison.csv', index=False)
print("\nSaved final comparison to results_comparison.csv")


results_df


--- Final Comparison ---
                             Type                                                                                                                                                                     Answer
0         Original Query (No RAG)  I’m sorry, but the context provided doesn’t contain any information about credit card limits for Associate Vice Presidents. \n\nTherefore, I cannot answer your question.
1                       Basic RAG                                                                                                                                    “The CEO has a limit of up to $10,000.”
2  Query Expansion (Hypothetical)                                                                         According to the provided text, the limit for an Associate Vice President’s card is up to $10,000.
3    Query Expansion (Subqueries)                                                                                                         “The card limit 

Unnamed: 0,Type,Query,Context,Answer
0,Original Query (No RAG),What is the card limit for Associate Vice Pres...,,"I’m sorry, but the context provided doesn’t co..."
1,Basic RAG,What is the card limit for Associate Vice Pres...,which accrue through use of the credit card wi...,"“The CEO has a limit of up to $10,000.”"
2,Query Expansion (Hypothetical),What is the card limit for Associate Vice Pres...,which accrue through use of the credit card wi...,"According to the provided text, the limit for ..."
3,Query Expansion (Subqueries),What is the card limit for Associate Vice Pres...,eligibility ( authorized employees ) employees...,“The card limit for an Associate Vice Presiden...
4,Re-ranking (Initial),What is the card limit for Associate Vice Pres...,"• once the authorization form is received, a r...",The card limit for Associate Vice Presidents i...
5,Re-ranking (Subqueries),What is the card limit for Associate Vice Pres...,1 corporate credit card policy policy # 7. 36 ...,"According to the provided text, the card limit..."
