# Structured Data Evaluation Harness

This notebook provides an interactive interface for evaluating generated structured data against ground truth data.

## Setup

1. Ensure your data files are ready:
   - `generated_data.xlsx` or `generated_data.csv` - Your generated data
   - `ground_truth.xlsx` or `ground_truth.csv` - Your ground truth data

2. Configure the paths and column mappings below.

In [None]:
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd
import numpy as np

from eval_harness.evaluator import StructuredDataEvaluator

# Load environment variables
load_dotenv()

# Configuration - Update these paths
GENERATED_DATA_PATH = os.getenv("GENERATED_DATA_PATH", "generated_data.xlsx")
GROUND_TRUTH_PATH = os.getenv("GROUND_TRUTH_PATH", "ground_truth.xlsx")

# Optional: Column mapping if column names differ
# Example: {"generated_col1": "ground_truth_col1", "generated_col2": "ground_truth_col2"}
COLUMN_MAPPING = None  # Set to dict if needed

# Match strategy: "index", "all_pairs", or "truncate"
MATCH_STRATEGY = "index"

## Initialize Evaluator

In [None]:
# Initialize the evaluator
evaluator = StructuredDataEvaluator(
    generated_data_path=GENERATED_DATA_PATH,
    ground_truth_path=GROUND_TRUTH_PATH,
    column_mapping=COLUMN_MAPPING,
    match_strategy=MATCH_STRATEGY,
)

print(f"Loaded {len(evaluator.generated_df)} rows from generated data")
print(f"Loaded {len(evaluator.ground_truth_df)} rows from ground truth data")
print(f"Evaluating {len(evaluator.column_names)} columns: {evaluator.column_names}")

## Preview Data

In [None]:
# Preview generated data
print("\n=== Generated Data Preview ===")
display(evaluator.generated_df.head())

# Preview ground truth data
print("\n=== Ground Truth Data Preview ===")
display(evaluator.ground_truth_df.head())

## Evaluate All Columns

In [None]:
# Evaluate all columns with all metrics
results = evaluator.evaluate_all_columns()

# Display summary report
summary_df = evaluator.get_summary_report(results)
display(summary_df)

## Evaluate Specific Column

In [None]:
# Evaluate a specific column
column_name = evaluator.column_names[0]  # Change to your column name

column_results = evaluator.evaluate_column(column_name)

print(f"\n=== Results for column: {column_name} ===")
print(f"Number of comparisons: {column_results['num_comparisons']}")
print("\nMetrics:")
for metric_name, metric_stats in column_results.items():
    if metric_name not in ["column_name", "num_comparisons"]:
        if isinstance(metric_stats, dict):
            print(f"\n{metric_name}:")
            for stat_name, stat_value in metric_stats.items():
                print(f"  {stat_name}: {stat_value:.4f}")
        else:
            print(f"{metric_name}: {metric_stats}")

## Evaluate with Specific Metrics

In [None]:
# Evaluate with only specific metrics
specific_metrics = ["cosine_similarity", "edit_distance", "jaccard_similarity"]

results_specific = evaluator.evaluate_all_columns(metrics=specific_metrics)
summary_specific = evaluator.get_summary_report(results_specific)

display(summary_specific)

## Detailed Analysis

In [None]:
# Get detailed results
import json

print("\n=== Detailed Results ===")
print(json.dumps(results, indent=2, default=str))

## Export Results

In [None]:
# Export summary to CSV
output_path = "evaluation_results.csv"
summary_df.to_csv(output_path, index=False)
print(f"Results exported to {output_path}")