# Compare Configurations

This notebook compares multiple RAG configurations side-by-side.

**Usage:**
1. Edit the `CONFIG_FILES` list to select which configurations to compare
2. Modify the `test_queries` list with your test questions
3. Run all cells to see side-by-side comparison

## Configuration

In [None]:
# ========== CONFIGURATION ==========
CONFIG_FILES = [
    "../../configs/test_configs/baseline.yaml",
    "../../configs/test_configs/improved_qa_prompt.yaml"
]

test_queries = [
    "What is a sliver?",
    "How do I delete a project?",
    "How can I use FABcache?",
    "I want to change my slice name",
    "what does it mean if KANS has 3/4 GPU"
]
# ===================================

## Setup

In [None]:
import sys
sys.path.append('../..')

from utils.testing.yaml_config_loader import load_config
from utils.testing.test_helpers import run_test, print_comparison
import pandas as pd

## Load Configurations

In [None]:
# Load all configurations
configs = [load_config(f) for f in CONFIG_FILES]

print(f"Comparing {len(configs)} configurations:\n")
for i, config in enumerate(configs, 1):
    print(f"{i}. {config['name']}")
    print(f"   Model: {config['rag_config']['llm']['model']}")
    print(f"   Temperature: {config['rag_config']['llm']['temperature']}")
    print(f"   Rerank top N: {config['rag_config']['retrieval']['rerank_top_n']} docs")
    print()

## Run Comparison Tests

In [None]:
# Run tests for each query across all configs
all_results = []

for query in test_queries:
    print(f"\nTesting query: {query}")
    print("="*80)
    
    query_results = []
    
    for config in configs:
        result = run_test(config, query)
        query_results.append(result)
        all_results.append(result)
    
    # Print side-by-side comparison
    print_comparison(query, query_results)

## Detailed Comparison Table

In [None]:
# Create detailed comparison DataFrame
comparison_data = []

for result in all_results:
    comparison_data.append({
        'Query': result['question'][:40] + '...' if len(result['question']) > 40 else result['question'],
        'Config': result.get('config_name', 'Unknown'),
        'Success': result.get('success', False),
        'Context Docs': len(result.get('context', [])),
        'Answer Preview': result.get('answer', '')[:100] + '...' if len(result.get('answer', '')) > 100 else result.get('answer', '')
    })

df = pd.DataFrame(comparison_data)
display(df)

## Full Answers Comparison

View full answers side-by-side for detailed analysis

In [None]:
# Group results by query
from collections import defaultdict

results_by_query = defaultdict(list)
for result in all_results:
    results_by_query[result['question']].append(result)

# Print full answers for each query
for query, results in results_by_query.items():
    print(f"\n{'='*80}")
    print(f"Query: {query}")
    print(f"{'='*80}\n")
    
    for result in results:
        print(f"\n[{result.get('config_name', 'Unknown')}]")
        print("-" * 80)
        print(result.get('answer', 'No answer'))
        print("-" * 80)