# RAG Financial Chatbot - Interactive Demo

This notebook demonstrates the RAG (Retrieval-Augmented Generation) chatbot system for answering financial questions.

## Overview

1. **Setup**: Load environment and initialize components
2. **Document Ingestion**: Process and embed financial documents
3. **RAG Pipeline**: Query the system and retrieve answers
4. **Analysis**: Visualize retrieval and performance metrics

## 1. Setup and Imports

In [None]:
# Import required libraries
import sys
import os
from pathlib import Path
import json
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Add parent directory to path
sys.path.append(str(Path.cwd().parent))

# Load environment variables
load_dotenv(Path.cwd().parent / ".env")

print("✓ Imports successful")

## 2. Initialize RAG Components

In [None]:
# Import application modules
from app.core.config import get_settings
from app.core.embeddings import EmbeddingGenerator
from app.services.pinecone_service import PineconeService
from app.rag.retriever import PineconeRetriever
from app.rag.chain import RAGChain

# Load settings
settings = get_settings()

print(f"✓ Settings loaded")
print(f"  - Embedding Model: {settings.embedding_model}")
print(f"  - LLM Model: {settings.llm_model}")
print(f"  - Pinecone Index: {settings.pinecone_index_name}")

In [None]:
# Initialize embedding generator
print("Initializing embedding generator...")
embedding_gen = EmbeddingGenerator(settings.embedding_model)
print(f"✓ Embedding dimension: {embedding_gen.get_dimension()}")

In [None]:
# Initialize Pinecone service
print("Connecting to Pinecone...")
pinecone_service = PineconeService(
    api_key=settings.pinecone_api_key,
    environment=settings.pinecone_environment,
    index_name=settings.pinecone_index_name
)

# Get index stats
try:
    stats = pinecone_service.get_index_stats()
    print(f"✓ Connected to Pinecone")
    print(f"  - Total vectors: {stats.get('total_vector_count', 0)}")
    print(f"  - Dimension: {stats.get('dimension', 0)}")
except Exception as e:
    print(f"⚠ Note: {str(e)}")
    print("  Run the ingestion script first if the index doesn't exist")

In [None]:
# Initialize retriever
print("Initializing retriever...")
retriever = PineconeRetriever(
    pinecone_service=pinecone_service,
    embedding_generator=embedding_gen,
    top_k=settings.top_k
)
print("✓ Retriever initialized")

In [None]:
# Initialize RAG chain
print("Initializing RAG chain...")
rag_chain = RAGChain(
    retriever=retriever,
    openai_api_key=settings.openai_api_key,
    model_name=settings.llm_model,
    temperature=settings.temperature,
    max_tokens=settings.max_tokens
)
print("✓ RAG chain initialized")

## 3. Query the RAG System

In [None]:
# Helper function to display results
def display_result(result):
    print("\n" + "="*80)
    print(f"QUESTION: {result['question']}")
    print("="*80)
    print(f"\nANSWER:\n{result['answer']}")
    print(f"\n{'-'*80}")
    print(f"\nSOURCES: {', '.join(result['sources'])}")
    print(f"\nRETRIEVED DOCUMENTS: {len(result['retrieved_docs'])}")
    print(f"\nTop 3 Most Relevant Chunks:")
    for i, doc in enumerate(result['retrieved_docs'][:3], 1):
        print(f"\n  [{i}] Score: {doc['score']:.3f} | Source: {doc['source']}")
        print(f"      {doc['text'][:150]}...")
    print("\n" + "="*80)

### Example Query 1: Revenue Information

In [None]:
question1 = "What was TechCorp's total revenue in Q1 2024?"
result1 = rag_chain.invoke(question1, top_k=5)
display_result(result1)

### Example Query 2: Financial Metrics

In [None]:
question2 = "What is the Net Revenue Retention rate and what does it mean?"
result2 = rag_chain.invoke(question2, top_k=5)
display_result(result2)

### Example Query 3: Strategic Information

In [None]:
question3 = "What are TechCorp's strategic priorities for 2024?"
result3 = rag_chain.invoke(question3, top_k=5)
display_result(result3)

### Example Query 4: Complex Analysis

In [None]:
question4 = "Compare the gross margin between Q1 2024 and the full year 2023"
result4 = rag_chain.invoke(question4, top_k=5)
display_result(result4)

## 4. Interactive Query Interface

In [None]:
# Interactive query function
def ask_question(question, top_k=5):
    """Ask a question and display formatted result"""
    result = rag_chain.invoke(question, top_k=top_k)
    display_result(result)
    return result

# Try your own questions here!
# Example:
# ask_question("What is the customer acquisition cost?")

## 5. Batch Query Analysis

In [None]:
# Define a set of test questions
test_questions = [
    "What was the total revenue in Q1 2024?",
    "What is the operating margin?",
    "How much did TechCorp invest in R&D?",
    "What are the main risk factors?",
    "What is the customer count?",
]

# Process all questions
results = []
for question in test_questions:
    print(f"Processing: {question}")
    result = rag_chain.invoke(question, top_k=3)
    results.append({
        'question': question,
        'answer_length': len(result['answer']),
        'num_sources': len(result['sources']),
        'top_score': result['retrieved_docs'][0]['score'] if result['retrieved_docs'] else 0
    })

# Create DataFrame
df_results = pd.DataFrame(results)
print("\n" + "="*80)
print("BATCH QUERY RESULTS")
print("="*80)
print(df_results)

## 6. Retrieval Quality Analysis

In [None]:
# Visualize retrieval scores
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Answer lengths
axes[0].bar(range(len(df_results)), df_results['answer_length'])
axes[0].set_xlabel('Query Index')
axes[0].set_ylabel('Answer Length (characters)')
axes[0].set_title('Response Length Distribution')
axes[0].grid(axis='y', alpha=0.3)

# Plot 2: Top retrieval scores
axes[1].bar(range(len(df_results)), df_results['top_score'], color='green')
axes[1].set_xlabel('Query Index')
axes[1].set_ylabel('Similarity Score')
axes[1].set_title('Top Document Relevance Scores')
axes[1].grid(axis='y', alpha=0.3)
axes[1].set_ylim([0, 1])

plt.tight_layout()
plt.show()

print(f"\nAverage answer length: {df_results['answer_length'].mean():.0f} characters")
print(f"Average top score: {df_results['top_score'].mean():.3f}")

## 7. Document Retrieval Heatmap

In [None]:
# Analyze which documents are most frequently retrieved
source_counts = {}
for question in test_questions:
    result = rag_chain.invoke(question, top_k=3)
    for source in result['sources']:
        source_counts[source] = source_counts.get(source, 0) + 1

# Create visualization
plt.figure(figsize=(10, 6))
sources = list(source_counts.keys())
counts = list(source_counts.values())

plt.barh(sources, counts, color='steelblue')
plt.xlabel('Times Retrieved')
plt.title('Document Retrieval Frequency')
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

print("\nMost frequently retrieved documents:")
for source, count in sorted(source_counts.items(), key=lambda x: x[1], reverse=True):
    print(f"  - {source}: {count} times")

## 8. Conversational RAG (With History)

In [None]:
# Simulate a conversation
chat_history = []

# Turn 1
q1 = "What was TechCorp's revenue in Q1 2024?"
r1 = rag_chain.invoke(q1)
print(f"Q: {q1}")
print(f"A: {r1['answer'][:200]}...\n")
chat_history.append({"question": q1, "answer": r1['answer']})

# Turn 2 (with context)
q2 = "How does that compare to the previous year?"
r2 = rag_chain.invoke_with_chat_history(q2, chat_history)
print(f"Q: {q2}")
print(f"A: {r2['answer'][:200]}...\n")
chat_history.append({"question": q2, "answer": r2['answer']})

# Turn 3 (with more context)
q3 = "What were the main growth drivers?"
r3 = rag_chain.invoke_with_chat_history(q3, chat_history)
print(f"Q: {q3}")
print(f"A: {r3['answer'][:200]}...")

## 9. Embedding Similarity Exploration

In [None]:
# Test query variations
queries = [
    "What was the revenue?",
    "How much money did the company make?",
    "Tell me about sales figures",
]

print("Testing query variations...\n")
for query in queries:
    docs = retriever.retrieve(query, top_k=3)
    print(f"Query: '{query}'")
    print(f"  Top score: {docs[0]['score']:.3f}")
    print(f"  Source: {docs[0]['source']}")
    print()

## 10. Summary and Next Steps

In [None]:
print("RAG System Summary")
print("=" * 80)
print(f"✓ Embedding Model: {settings.embedding_model}")
print(f"✓ LLM Model: {settings.llm_model}")
print(f"✓ Vector Database: Pinecone ({settings.pinecone_index_name})")
print(f"✓ Documents Indexed: {stats.get('total_vector_count', 'N/A')}")
print(f"✓ Embedding Dimension: {embedding_gen.get_dimension()}")
print("\nNext Steps:")
print("  1. Add your own financial documents to data/sample_docs/")
print("  2. Run ingestion script to update the index")
print("  3. Query with domain-specific questions")
print("  4. Deploy the FastAPI server for production use")
print("  5. Integrate with your frontend application")
print("=" * 80)