In [None]:
import sys
sys.path.append('..')

from core import load_csv_data, save_results, calculate_metrics, print_section
import os
import pandas as pd

print_section("SYSTEM EVALUATION")

## Create Sample Train/Test Data

For demonstration, create sample CSV files.

In [None]:
# Create sample train.csv
train_data = {
    'story_id': ['train_001', 'train_002', 'train_003'],
    'novel_file': ['evermoor_sample.txt', 'evermoor_sample.txt', 'evermoor_sample.txt'],
    'backstory': [
        'Elizabeth lived in Paris before coming to Evermoor.',
        'Thomas Blackwood arrived at Evermoor in 1842.',
        'Elizabeth discovered secrets about her grandmother in 1820.'
    ],
    'label': [1, 0, 1]
}

train_df = pd.DataFrame(train_data)
train_df.to_csv('../data/train.csv', index=False)

# Create sample test.csv
test_data = {
    'story_id': ['test_001', 'test_002'],
    'novel_file': ['evermoor_sample.txt', 'evermoor_sample.txt'],
    'backstory': [
        'Lord Edmund invited Elizabeth to solve a mystery.',
        'Elizabeth met Thomas in London before 1847.'
    ]
}

test_df = pd.DataFrame(test_data)
test_df.to_csv('../data/test.csv', index=False)

print("✓ Created sample train/test files")

## Evaluate on Training Set

This would calibrate thresholds in real usage.

In [None]:
# Load training data
train_samples = load_csv_data('../data/train.csv')

print(f"\nTraining samples: {len(train_samples)}")
print("=" * 60)

for sample in train_samples:
    print(f"ID: {sample['story_id']}")
    print(f"  Backstory: {sample['backstory'][:60]}...")
    print(f"  Label: {sample['label']}")
    print()

## Performance Metrics

Calculate accuracy, precision, recall, F1 on training data.

In [None]:
# Simulate predictions (in real usage, run full pipeline)
import pickle

# For demo, use random predictions
import random
random.seed(42)

train_predictions = [random.choice([0, 1]) for _ in train_samples]
train_labels = [int(s['label']) for s in train_samples]

# Calculate metrics
metrics = calculate_metrics(train_predictions, train_labels)

print("\nTraining Set Metrics:")
print("=" * 60)
for metric, value in metrics.items():
    print(f"{metric.capitalize()}: {value:.3f}")

## Generate Test Predictions

Run pipeline on test set.

In [None]:
# Load test data
test_samples = load_csv_data('../data/test.csv')

print(f"\nTest samples: {len(test_samples)}")

# Generate predictions (in production, run full pipeline for each)
test_results = []

for sample in test_samples:
    # Simulate prediction
    prediction = random.choice([0, 1])
    confidence = random.uniform(0.6, 0.95)
    rationale = f"Based on analysis of novel and backstory. Score: {random.uniform(0.3, 0.7):.3f}"
    
    test_results.append({
        'story_id': sample['story_id'],
        'prediction': prediction,
        'confidence': confidence,
        'rationale': rationale
    })
    
print(f"✓ Generated {len(test_results)} predictions")

## Save Results

In [None]:
# Save to results.csv
save_results(test_results, '../results/results.csv')

print("\n✓ Results saved to results.csv")
print("\nSample predictions:")
print("=" * 60)

results_df = pd.DataFrame(test_results)
print(results_df)

In [None]:
print("\n✓ Module 8 Complete: Evaluation finished!")
print("\nTo use this system on real data:")
print("1. Place novels in data/novels/")
print("2. Update train.csv and test.csv")
print("3. Run run_pipeline.ipynb")