# Re-initializing embedding model

In [1]:
import os
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS

vector_store_path = "../vector_store" 

model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'} 
encode_kwargs = {'normalize_embeddings': False}

print(f"Re-initializing embedding model: {model_name} (using {model_kwargs['device']})...")

try:
    embeddings_model = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    print("Embedding model re-initialized successfully!")
except Exception as e:
    print(f"Error re-initializing embedding model: {e}")
    print("Please ensure 'sentence-transformers' and 'transformers' are installed and working.")


# Load the FAISS vector store
print(f"\nLoading FAISS vector store from: {vector_store_path}...")

try:
    vector_store = FAISS.load_local(vector_store_path, embeddings_model, allow_dangerous_deserialization=True)
    print("FAISS vector store loaded successfully!")
    print(f"Number of vectors in loaded FAISS index: {vector_store.index.ntotal}")
except Exception as e:
    print(f"Error loading FAISS vector store: {e}")
    print("Please ensure the 'vector_store' directory exists and contains FAISS index files.")
    print("Also ensure the embedding model is correctly re-initialized.")


top_k_chunks = 5 

print(f"\nSetting up retriever to fetch top {top_k_chunks} chunks...")
retriever = vector_store.as_retriever(search_kwargs={"k": top_k_chunks})
print("Retriever set up successfully!")

sample_query = "problems with unauthorized credit card charges"
print(f"\nTesting retriever with sample query: '{sample_query}'")

try:
    retrieved_docs = retriever.invoke(sample_query) 
    print(f"Retrieved {len(retrieved_docs)} documents.")
    print("\nSample of retrieved document content and metadata:")
    for i, doc in enumerate(retrieved_docs[:2]): 
        print(f"--- Document {i+1} ---")
        print(f"Content (first 200 chars): {doc.page_content[:200]}...")
        print(f"Metadata: {doc.metadata}")
except Exception as e:
    print(f"Error testing retriever: {e}")
    print("Ensure the embedding model and vector store are correctly loaded.")

Re-initializing embedding model: sentence-transformers/all-MiniLM-L6-v2 (using cpu)...
Embedding model re-initialized successfully!

Loading FAISS vector store from: ../vector_store...
FAISS vector store loaded successfully!
Number of vectors in loaded FAISS index: 136540

Setting up retriever to fetch top 5 chunks...
Retriever set up successfully!

Testing retriever with sample query: 'problems with unauthorized credit card charges'
Retrieved 5 documents.

Sample of retrieved document content and metadata:
--- Document 1 ---
Content (first 200 chars): unauthorized charges on credit account statement shows xxxx xxxx xxxx xxxx xxxx xxxx...
Metadata: {'complaint_id': 12508103, 'product': 'Credit card', 'original_index': 7724, 'start_index': 0}
--- Document 2 ---
Content (first 200 chars): credit card xxxx xxxx was activated by someone else with unauthorized charges along with open old disputes...
Metadata: {'complaint_id': 8687697, 'product': 'Credit card', 'original_index': 73169, 'star

# Integrate the Large Language Model (LLM)


In [2]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoConfig
import torch

# Define the LLM model name
llm_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 

print(f"\nLoading LLM tokenizer and model: {llm_model_name}...")

try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

    # Load model configuration to check for 'max_position_embeddings'
    config = AutoConfig.from_pretrained(llm_model_name)
    max_model_length = config.max_position_embeddings if hasattr(config, "max_position_embeddings") else 4096 

    # Load model
    model = AutoModelForCausalLM.from_pretrained(
        llm_model_name,
        torch_dtype=torch.float16, 
        device_map="cpu", 
        trust_remote_code=True 
    )
    print("LLM model loaded successfully!")

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512, 
        do_sample=True,
        temperature=0.7,
        top_p=0.95,

        max_length=min(max_model_length, 2048), 
        pad_token_id=tokenizer.eos_token_id, 
    )

    # Initialize LangChain's HuggingFacePipeline LLM
    llm = HuggingFacePipeline(pipeline=pipe)
    print("LangChain LLM initialized successfully!")


except ImportError:
    print("Error: 'transformers' library not found. Please install it: `pip install transformers`")
except Exception as e:
    print(f"An unexpected error occurred during LLM setup: {e}")


Loading LLM tokenizer and model: TinyLlama/TinyLlama-1.1B-Chat-v1.0...


Device set to use cpu


LLM model loaded successfully!
LangChain LLM initialized successfully!


-- Test the LLM with a simple prompt --

In [3]:

test_prompt = "<|user|>\nWhat is the capital of France?<|end|>\n<|assistant|>"
print(f"\nTesting LLM with prompt: '{test_prompt}'")
response = llm.invoke(test_prompt) 

print("\n--- Full LLM Response (for debugging) ---")
print(response) 

print("\n--- Extracted LLM Response ---")

try:
        start_index = response.find("<|assistant|>")
        if start_index != -1:
            generated_text = response[start_index + len("<|assistant|>"):].strip()
            if generated_text:
                print(generated_text.split('\n')[0])
            else:
                print("[No specific answer generated beyond prompt structure]")
        else:
            print("Could not find '<|assistant|>' in the response. Full raw output:")
            print(response) 
except Exception as e:
        print(f"Error during response extraction: {e}")
        print("Full raw output:")
        print(response)


except ImportError:
    print("Error: 'transformers' library not found. Please install it: `pip install transformers`")
except Exception as e:
    print(f"An unexpected error occurred during LLM setup: {e}")



Testing LLM with prompt: '<|user|>
What is the capital of France?<|end|>
<|assistant|>'

--- Full LLM Response (for debugging) ---
<|user|>
What is the capital of France?<|end|>
<|assistant|>
The capital of France is Paris, located in the Île-de-France region.

--- Extracted LLM Response ---
The capital of France is Paris, located in the Île-de-France region.


# --- Create the RAG Chain (RetrievalQA) ---


In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# --- Define Prompt Template ---

template = """<|system|>
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints.
Use the following retrieved complaint excerpts (Context) to formulate your answer.
If the Context doesn't contain enough information to answer the question,
state that you don't have enough information based on the provided context.
Keep the answer concise and to the point.
<|end|>
<|user|>
Context:
{context}

Question: {question}<|end|>
<|assistant|>
"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

# --- Create the RAG Chain (RetrievalQA) ---

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

print("\nRAG Chain (RetrievalQA) created successfully!")




RAG Chain (RetrievalQA) created successfully!


# --- Test the RAG Chain ---

In [None]:
# --- Test the RAG Chain with a sample query ---
sample_rag_query = "What are common complaints about credit cards?"

print(f"\nTesting RAG Chain with query: '{sample_rag_query}'")

try:
    rag_response = qa_chain.invoke({"query": sample_rag_query}) # Use .invoke() for newer LangChain versions
    print("\n--- RAG Chain Result ---")
    print(f"Answer: {rag_response['result']}")

    print("\n--- Retrieved Sources ---")
    for i, doc in enumerate(rag_response['source_documents']):
        print(f"Document {i+1} (Complaint ID: {doc.metadata.get('complaint_id', 'N/A')}, Product: {doc.metadata.get('product', 'N/A')}):")
        # Print first 300 characters of content
        print(f"Content (first 300 chars): {doc.page_content[:300]}...")

except Exception as e:
    print(f"An error occurred during RAG chain invocation: {e}")
    print("Ensure LLM, retriever, and prompt are correctly set up.")


Testing RAG Chain with query: 'What are common complaints about credit cards?'

--- RAG Chain Result ---
Answer: <|system|>
You are a financial analyst assistant for CrediTrust. Your task is to answer questions about customer complaints.
Use the following retrieved complaint excerpts (Context) to formulate your answer.
If the Context doesn't contain enough information to answer the question,
state that you don't have enough information based on the provided context.
Keep the answer concise and to the point.
<|end|>
<|user|>
Context:
is not right and i am wanting to do a formal complaint with these credit card practices can you please assist best xxxx xxxx xxxx

xxxx xxxx xxxx xxxx hide full complaint what product or service is your complaint about product or service credit card type store credit card

understatement i have been informed by xxxx xxxx representatives that they have received numerous complaints from xxxx xxxx business mastercard cardholders regarding the same issue

cons

# Evaluation

In [None]:
evaluation_questions = [
    "What are common complaints about credit cards?",
    "Are there issues related to billing or fees?",
    "What kind of deceptive practices are mentioned?",
    "What problems do consumers face with store credit cards?",
]

print("Evaluation questions defined.")

# Assuming qa_chain and evaluation_questions are defined in previous cells

print("\n--- Running Qualitative Evaluation ---")
evaluation_results = []

for i, query in enumerate(evaluation_questions):
    print(f"\n--- Question {i+1}: {query} ---")
    try:
        rag_response = qa_chain.invoke({"query": query})

        generated_answer = rag_response['result']
        source_documents = rag_response['source_documents']

        print(f"Generated Answer: {generated_answer}")
        print("\nRetrieved Sources (Top 2 for evaluation):")
        
        # Collect sources for the table
        sources_for_table = []
        for j, doc in enumerate(source_documents[:2]): 
            source_info = f"Document {j+1} (ID: {doc.metadata.get('complaint_id', 'N/A')}, Product: {doc.metadata.get('product', 'N/A')})"
            print(f"- {source_info}")
            print(f"  Content (first 150 chars): {doc.page_content[:150]}...")
            sources_for_table.append(source_info) 

        evaluation_results.append({
            "Question": query,
            "Generated Answer": generated_answer,
            "Retrieved Sources": "; ".join(sources_for_table), 
            "Quality Score": "", 
            "Comments/Analysis": "" 
        })

    except Exception as e:
        print(f"An error occurred for query '{query}': {e}")
        evaluation_results.append({
            "Question": query,
            "Generated Answer": f"Error: {e}",
            "Retrieved Sources": "N/A",
            "Quality Score": "0",
            "Comments/Analysis": "System error during generation."
        })

print("\n--- Qualitative Evaluation Run Complete ---")
print("\nBelow is the raw data collected for your evaluation table:")
import json
print(json.dumps(evaluation_results, indent=2)) 