In [1]:
from typing import List, Dict, Any
import pandas as pd
import matplotlib.pyplot as plt
from pipeline_evaluator import PipelineEvaluator
from pathlib import Path
from rag.loader.document_loader import get_default_documents
import os
from dotenv import load_dotenv
import openai

project_root = Path.cwd().parent.parent
env_path = project_root / "src" / "rag" / ".env"

# Load environment variables from the correct path
load_dotenv()

# Verify API keys are loaded
api_keys = {
    "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
    "HUGGINGFACE_API_KEY": os.getenv("HUGGINGFACE_API_KEY"),
    "COHERE_API_KEY": os.getenv("COHERE_API_KEY")
}

openai.api_key = os.getenv("OPENAI_API_KEY")

# Check for missing keys
missing_keys = [key for key, value in api_keys.items() if not value]
if missing_keys:
    raise ValueError(f"Missing required API keys: {', '.join(missing_keys)}")

# Print verification (safely)
for key_name, key_value in api_keys.items():
    if key_value:
        print(f"{key_name} loaded: {key_value[:8]}...")

def load_test_dataset(csv_path: str) -> List[Dict[str, Any]]:
    """Load and validate the test dataset."""
    df = pd.read_csv(csv_path)
    
    # Define required columns based on our dataset schema
    required_columns = ['user_input', 'reference', 'reference_contexts']
    
    # Check if all required columns exist
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"Missing required columns in dataset: {missing_columns}")
    
    # Convert DataFrame to list of dictionaries
    dataset = df.to_dict('records')
    
    # Parse the reference_contexts string into a list
    for sample in dataset:
        # Convert string representation of list to actual list
        if isinstance(sample['reference_contexts'], str):
            # Safely evaluate the string as a Python expression
            sample['reference_contexts'] = eval(sample['reference_contexts'])
    
    return dataset

def plot_results(results: Dict[str, Any], metric_name: str, output_dir: Path) -> None:
    """Create a bar plot for a specific metric across all pipeline variants."""
    variants = list(results.keys())
    values = [r["summary"][metric_name] for r in results.values()]
    
    plt.figure(figsize=(12, 6))
    plt.bar(variants, values)
    plt.xticks(rotation=45)
    plt.title(f"{metric_name} Across Pipeline Variants")
    plt.tight_layout()
    
    # Create output directory if it doesn't exist
    output_dir.mkdir(parents=True, exist_ok=True)
    plt.savefig(output_dir / f"{metric_name}_comparison.png")
    plt.close()

# Setup paths
base_dir = Path.cwd().parent  # Go up one level from current directory
data_dir = base_dir / "data"
output_dir = base_dir / "results"

# Load test dataset
test_dataset = load_test_dataset(str(data_dir / "ragas_testset.csv"))

import os
from dotenv import load_dotenv
from langchain_unstructured import UnstructuredLoader
from typing import List
from langchain.schema import Document
load_dotenv()

# documents = get_default_documents()

def load_insurance_docs(file_paths: List[str]) -> List[Document]:
    """
    Load multiple insurance documents using UnstructuredLoader.
    
    Args:
        file_paths: List of paths to PDF documents
        
    Returns:
        List of loaded Document objects
    """
    documents = []
    for path in file_paths:
        loader = UnstructuredLoader(
            path,
            chunking_strategy="by_title"
        )
        documents.extend(loader.load())
    return documents

# Default insurance document paths
DEFAULT_DOCS = [
    "../docs/nrma.pdf",
    "../docs/allianz.pdf"
]

def get_default_documents() -> List[Document]:
    """Helper function to load the default insurance documents."""
    return load_insurance_docs(DEFAULT_DOCS)


documents = get_default_documents()

evaluator = PipelineEvaluator(
    test_dataset=test_dataset,
    documents=documents,
    api_key=os.getenv("OPENAI_API_KEY")  # Explicitly pass the key
)

# Run evaluations
results = evaluator.run_all_evaluations()

# Plot results for each metric
metrics_to_plot = [
    "avg_precision",
    "avg_recall",
    "avg_mrr",
    "avg_rouge_l",
    "avg_bleu",
    "avg_judge_score_normalized",
    "composite_score"
]

for metric in metrics_to_plot:
    plot_results(results, metric, output_dir)

# Print summary results
print("\nEvaluation Results Summary:")
for variant, result in results.items():
    print(f"\n{variant}:")
    for metric, value in result["summary"].items():
        print(f"  {metric}: {value:.3f}")

OPENAI_API_KEY loaded: sk-proj-...
HUGGINGFACE_API_KEY loaded: hf_lWWyk...
COHERE_API_KEY loaded: 15vHmbKj...


  from .autonotebook import tqdm as notebook_tqdm
INFO: pikepdf C++ to Python logger bridge initialized
INFO: Using default tokenizer.


sk-proj-ucBFGqoEjPs1KlmHLIFAT3BlbkFJqB6u4HJ74YjOOBbzO811


INFO: HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO: Loading faiss.
INFO: Successfully loaded faiss.
INFO: Failed to load GPU Faiss: name 'GpuIndexIVFFlat' is not defined. Will not load constructor refs for GPU indexes.
  return HuggingFaceHub(
  retrieved_docs = pipeline.retriever.get_relevant_documents(query)
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO: HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 400 Bad Request"


BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for response_format 'JudgeResponse': In context=('properties', 'score'), 'minimum' is not permitted.", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}

In [None]:
# Plot results for each metric
metrics_to_plot = [
    "avg_precision",
    "avg_recall",
    "avg_mrr",
    "avg_rouge_l",
    "avg_bleu",
    "avg_judge_score_normalized",
    "composite_score"
]

for metric in metrics_to_plot:
    plot_results(results, metric, output_dir)

# Print summary results
print("\nEvaluation Results Summary:")
for variant, result in results.items():
    print(f"\n{variant}:")
    for metric, value in result["summary"].items():
        print(f"  {metric}: {value:.3f}")