# RAG System Comparison Analysis

This notebook systematically compares Traditional RAG vs Knowledge Graph Enhanced RAG using GPT-4o for:
1. **Prompt Generation**: Generate differentiating test prompts
2. **Response Analysis**: Compare and evaluate system responses
3. **Performance Assessment**: Analyze strengths/weaknesses of each approach

## Comparison Flow:
1. 🤖 **GPT-4o generates test prompts** for system comparison
2. 👤 **User selects preferred prompt** from generated options
3. 🔄 **Both RAG systems respond** to the selected prompt
4. 📊 **GPT-4o analyzes responses** and provides detailed comparison


## Setup and Initialization


In [1]:
# Basic imports and setup
import asyncio
import os
import openai
from getpass import getpass
import importlib
import sys

# Import RAG components
from aimakerspace.text_utils import TextFileLoader, CharacterTextSplitter
from aimakerspace.vectordatabase import VectorDatabase
from aimakerspace.knowledge_graph import KnowledgeGraphEnhancedVectorDB
from aimakerspace.openai_utils.prompts import UserRolePrompt, SystemRolePrompt
from aimakerspace.openai_utils.chatmodel import ChatOpenAI

# Reload modules to ensure latest versions
if 'aimakerspace.vectordatabase' in sys.modules:
    importlib.reload(sys.modules['aimakerspace.vectordatabase'])
    from aimakerspace.vectordatabase import VectorDatabase

if 'aimakerspace.knowledge_graph' in sys.modules:
    importlib.reload(sys.modules['aimakerspace.knowledge_graph'])
    from aimakerspace.knowledge_graph import KnowledgeGraphEnhancedVectorDB

import nest_asyncio
nest_asyncio.apply()


In [2]:
# OpenAI API Key setup
openai.api_key = getpass("OpenAI API Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

# Initialize ChatOpenAI for both RAG responses and GPT-4o analysis
chat_openai = ChatOpenAI(model_name="gpt-4o-mini")  # For RAG systems
gpt4o_analyzer = ChatOpenAI(model_name="gpt-4o")    # For prompt generation and analysis


In [None]:
# Load and process documents
text_loader = TextFileLoader("data/PMarcaBlogs.txt")
documents = text_loader.load_documents()
text_splitter = CharacterTextSplitter()
split_documents = text_splitter.split_texts(documents)

print(f"📄 Loaded {len(documents)} document(s)")
print(f"✂️ Split into {len(split_documents)} chunks")


In [None]:
# Build Traditional RAG Vector Database
print("🔨 Building Traditional RAG Vector Database...")
traditional_vector_db = VectorDatabase()
traditional_vector_db = asyncio.run(traditional_vector_db.abuild_from_list(split_documents))
print("✅ Traditional RAG ready")


In [None]:
# Build Knowledge Graph Enhanced RAG Vector Database
print("🧠 Building Knowledge Graph Enhanced RAG Vector Database...")
kg_enhanced_db = KnowledgeGraphEnhancedVectorDB()
kg_enhanced_db = asyncio.run(kg_enhanced_db.build_from_list(split_documents, num_categories=4))
print("✅ Knowledge Graph RAG ready")


In [None]:
# RAG System Templates and Pipelines
RAG_SYSTEM_TEMPLATE = """You are a knowledgeable assistant that answers questions based strictly on provided context.

Instructions:
- Only answer questions using information from the provided context
- If the context doesn't contain relevant information, respond with "I don't know"
- Be accurate and cite specific parts of the context when possible
- Keep responses detailed and comprehensive
- Only use the provided context. Do not use external knowledge.
- Only provide answers when you are confident the context supports your response."""

RAG_USER_TEMPLATE = """Context Information:
{context}

Question: {user_query}

Please provide your answer based solely on the context above."""

rag_system_prompt = SystemRolePrompt(RAG_SYSTEM_TEMPLATE)
rag_user_prompt = UserRolePrompt(RAG_USER_TEMPLATE)

class TraditionalRAGPipeline:
    def __init__(self, vector_db: VectorDatabase, llm: ChatOpenAI):
        self.vector_db = vector_db
        self.llm = llm
    
    def run_pipeline(self, query: str, k: int = 3) -> dict:
        # Get traditional vector search results
        results = self.vector_db.search_by_text(query, k=k)
        
        # Build context
        context_parts = []
        for i, (text, score) in enumerate(results, 1):
            context_parts.append(f"[Source {i}]: {text}")
        
        context_prompt = "\n\n".join(context_parts)
        
        # Create messages for LLM
        system_message = rag_system_prompt.create_message()
        user_message = rag_user_prompt.create_message(
            user_query=query,
            context=context_prompt
        )
        
        # Get LLM response
        response = self.llm.run([system_message, user_message])
        
        return {
            "response": response,
            "context": results,
            "method": "Traditional RAG"
        }

class KnowledgeGraphRAGPipeline:
    def __init__(self, kg_db: KnowledgeGraphEnhancedVectorDB, llm: ChatOpenAI):
        self.kg_db = kg_db
        self.llm = llm
    
    def run_pipeline(self, query: str, k: int = 3) -> dict:
        # Get graph-enhanced results with entity expansion
        results = self.kg_db.search_with_graph_expansion(query, k=k)
        
        # Build context with entity information
        context_parts = []
        for i, (text, score, metadata) in enumerate(results, 1):
            entities = metadata.get('entities', [])
            category = metadata.get('category_name', 'Unknown')
            
            entity_info = f" [Key entities: {', '.join(entities[:3])}]" if entities else ""
            context_parts.append(f"[Source {i} - {category}]{entity_info}: {text}")
        
        context_prompt = "\n\n".join(context_parts)
         
        # Create messages for LLM
        system_message = rag_system_prompt.create_message()
        user_message = rag_user_prompt.create_message(
            user_query=query,
            context=context_prompt
        )
        
        # Get LLM response
        response = self.llm.run([system_message, user_message])
        
        return {
            "response": response,
            "context": results,
            "entities_used": [metadata.get('entities', []) for _, _, metadata in results],
            "categories_used": [metadata.get('category_name', 'Unknown') for _, _, metadata in results],
            "method": "Knowledge Graph RAG"
        }

# Initialize both pipelines
traditional_rag = TraditionalRAGPipeline(traditional_vector_db, chat_openai)
kg_rag = KnowledgeGraphRAGPipeline(kg_enhanced_db, chat_openai)

print("🚀 Both RAG pipelines initialized and ready for comparison!")


## Step 1: Generate Test Prompts with GPT-4o

Using GPT-4o to generate sophisticated test prompts that will highlight the differences between traditional RAG and knowledge graph enhanced RAG.


In [None]:
# User input for document URL (or description)
document_description = input("Please provide the document URL or description for context (e.g., 'PMarca Blog Archives - startup advice from Marc Andreessen'): ")

if not document_description:
    document_description = "PMarca Blog Archives - startup advice and business insights from Marc Andreessen"

print(f"📄 Using document description: {document_description}")


In [None]:
# Generate test prompts using GPT-4o
prompt_generation_template = f"""Given the provided document, what would be a few example prompts to show the differing capabilities of a knowledge graph augmented rag system vs a traditional rag system?

Document: {document_description}

Please provide 5 different prompts that would highlight the advantages of knowledge graph enhanced RAG, such as:
- Entity relationship understanding
- Cross-concept connections
- Category-based insights
- Complex multi-step reasoning
- Semantic understanding beyond keyword matching

Format your response as:
1. [Prompt 1]
2. [Prompt 2]
3. [Prompt 3]
4. [Prompt 4]
5. [Prompt 5]

Each prompt should be designed to reveal meaningful differences between the two approaches.
Keep in mind that the following system prompt is used for both RAG systems:
{RAG_SYSTEM_TEMPLATE}

We would like to avoid unanswerable prompts.
"""

print("🤖 Generating test prompts with GPT-4o...")
generated_prompts_response = gpt4o_analyzer.run([{"role": "user", "content": prompt_generation_template}])
print("\n📝 Generated Test Prompts:")
print("=" * 60)
print(generated_prompts_response)


## Step 2: User Prompt Selection

Select which generated prompt you'd like to use for the comparison.


In [None]:
# Extract prompts from the response for easier selection
import re

# Parse the numbered prompts
prompt_pattern = r'\d+\.\s*(.+?)(?=\n\d+\.|$)'
matches = re.findall(prompt_pattern, generated_prompts_response, re.DOTALL)

if matches:
    prompts_list = [match.strip() for match in matches]
    
    print("\n🎯 Available Prompts for Testing:")
    print("=" * 50)
    for i, prompt in enumerate(prompts_list, 1):
        print(f"{i}. {prompt}")
        print("-" * 30)
    
    # Get user selection
    while True:
        try:
            choice = int(input(f"\nSelect a prompt number (1-{len(prompts_list)}): "))
            if 1 <= choice <= len(prompts_list):
                selected_prompt = prompts_list[choice - 1]
                break
            else:
                print(f"Please enter a number between 1 and {len(prompts_list)}")
        except ValueError:
            print("Please enter a valid number")
else:
    # Fallback if parsing fails
    selected_prompt = input("\nPlease copy and paste your preferred prompt from above: ")

print(f"\n✅ Selected Prompt: {selected_prompt}")


## Step 3: Run Both RAG Systems

Execute the selected prompt on both Traditional RAG and Knowledge Graph Enhanced RAG systems.


In [None]:
print("🔄 Running both RAG systems with selected prompt...")
print("=" * 70)

# Run Traditional RAG
print("\n🔍 Traditional RAG Processing...")
traditional_result = traditional_rag.run_pipeline(selected_prompt, k=3)
print()

# Run Knowledge Graph RAG
print("🧠 Knowledge Graph RAG Processing...")
kg_result = kg_rag.run_pipeline(selected_prompt, k=3)

print("✅ Both systems completed!")


In [None]:
# Display both responses for comparison
print("\n📊 RAG SYSTEM RESPONSES COMPARISON")
print("=" * 80)

print("\n🔹 TRADITIONAL RAG RESPONSE:")
print("-" * 50)
print(traditional_result['response'])

print("\n\n🔹 KNOWLEDGE GRAPH RAG RESPONSE:")
print("-" * 50)
print(kg_result['response'])

# Show additional context info for KG RAG
if 'categories_used' in kg_result:
    unique_categories = set(kg_result['categories_used'])
    unique_entities = set([entity for entities in kg_result['entities_used'] for entity in entities])
    
    print(f"\n🏷️ KG RAG Additional Context:")
    print(f"   Categories Used: {', '.join(unique_categories)}")
    print(f"   Unique Entities: {len(unique_entities)} entities")
    print(f"   Top Entities: {', '.join(list(unique_entities)[:8])}...")


## Step 4: GPT-4o Analysis and Comparison

Using GPT-4o to provide detailed analysis and comparison of both responses.


In [None]:
# Create comprehensive analysis prompt for GPT-4o
analysis_template = f"""Given the following document used to provide context to our RAG systems, and prompt used to obtain a response, please analyze, compare, and rate the following responses while pointing out any significant differences between them and how the different approaches may have influenced this including potential pros and cons of both.

Document: {document_description}

Prompt: {selected_prompt}

<Start traditional RAG response>
{traditional_result['response']}
<End traditional RAG response>

<Start Knowledge Graph Augmented RAG response>
{kg_result['response']}
<End Knowledge Graph Augmented RAG response>

Please provide a detailed analysis covering:

1. **Response Quality Comparison**: Which response better addresses the prompt and why?
2. **Content Depth**: Compare the depth and comprehensiveness of each response
3. **Accuracy Assessment**: Evaluate the accuracy and relevance of information provided
4. **Approach Differences**: How did each RAG approach influence the response?
5. **Strengths & Weaknesses**: Key advantages and limitations of each approach
6. **Use Case Recommendations**: When would each approach be more suitable?
7. **Overall Rating**: Rate each response (1-10) with justification

Please be specific about how the knowledge graph enhancement (entity extraction, relationship mapping, graph-based community detection clustering) affected the response quality compared to traditional vector similarity search."""

print("🧠 Generating detailed analysis with GPT-4o...")
analysis_response = gpt4o_analyzer.run([{"role": "user", "content": analysis_template}])

print("\n📊 GPT-4o COMPREHENSIVE ANALYSIS")
print("=" * 80)
print(analysis_response)


## Additional Analysis: Context Comparison

Let's also examine the specific contexts each system retrieved to understand the differences in retrieval strategies.


In [None]:
print("\n🔍 CONTEXT RETRIEVAL COMPARISON")
print("=" * 70)

print("\n🔹 TRADITIONAL RAG CONTEXT SOURCES:")
print("-" * 40)
for i, (text, score) in enumerate(traditional_result['context'], 1):
    print(f"Source {i} (Similarity Score: {score:.3f}):")
    print(f"   📄 Content: {text[:200]}...")
    print()

print("\n🔹 KNOWLEDGE GRAPH RAG CONTEXT SOURCES:")
print("-" * 40)
for i, (text, score, metadata) in enumerate(kg_result['context'], 1):
    category = metadata.get('category_name', 'Unknown')
    entities = metadata.get('entities', [])
    print(f"Source {i} (Similarity Score: {score:.3f}):")
    print(f"   📂 Semantic Category: {category}")
    if entities:
        print(f"   🏷️  Key Entities: {', '.join(entities[:5])}")
    print(f"   📄 Content: {text[:200]}...")
    print()


In [None]:
# Generate context analysis
context_analysis_template = f"""Please analyze the context retrieval strategies of these two RAG systems based on the sources they selected:

Query: {selected_prompt}

Traditional RAG Sources (similarity-based):
{chr(10).join([f"Source {i+1} (Score: {score:.3f}): {text[:150]}..." for i, (text, score) in enumerate(traditional_result['context'])])}

Knowledge Graph RAG Sources (entity + similarity-based):
{chr(10).join([f"Source {i+1} (Score: {score:.3f}, Category: {metadata.get('category_name', 'Unknown')}, Entities: {', '.join(metadata.get('entities', [])[:3])}): {text[:150]}..." for i, (text, score, metadata) in enumerate(kg_result['context'])])}

Please compare:
1. How different are the retrieved contexts?
2. Which approach found more relevant information for this specific query?
3. How did entity extraction and graph-based community detection influence the Knowledge Graph RAG's choices?
4. Are there any important perspectives or information that one system missed?

Provide specific insights about the retrieval strategy differences."""

context_analysis = gpt4o_analyzer.run([{"role": "user", "content": context_analysis_template}])

print("\n📊 CONTEXT RETRIEVAL ANALYSIS")
print("=" * 60)
print(context_analysis)


## Summary and Conclusion

This comparison analysis demonstrates the practical differences between traditional vector similarity RAG and knowledge graph enhanced RAG approaches.


In [None]:
print("\n🎯 COMPARISON SUMMARY")
print("=" * 50)
print(f"Selected Prompt: {selected_prompt}")
print(f"Document Context: {document_description}")
print("\n✅ Analysis Complete!")
print("🔄 To run another comparison, restart from Step 1 with a different prompt")
