# Advanced Evaluation with skdr-eval

This notebook demonstrates advanced usage patterns for offline policy evaluation using skdr-eval.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import LinearRegression
import skdr_eval

## 1. Generate Synthetic Data with Custom Parameters

In [None]:
# Generate larger dataset with more operators
logs, ops_all, true_q = skdr_eval.make_synth_logs(
    n=10000, 
    n_ops=8, 
    seed=42
)

print(f"Dataset size: {len(logs)}")
print(f"Operators: {list(ops_all)}")
print(f"Time range: {logs['timestamp'].min()} to {logs['timestamp'].max()}")

## 2. Compare Multiple Model Types

In [None]:
models = {
    "RandomForest": RandomForestRegressor(n_estimators=200, max_depth=15, random_state=42),
    "HistGradientBoosting": HistGradientBoostingRegressor(max_iter=200, max_depth=8, random_state=42),
    "LinearRegression": LinearRegression(),
}

# Evaluate with more splits for robust estimates
report, detailed_results = skdr_eval.evaluate_sklearn_models(
    logs=logs,
    models=models,
    fit_models=True,
    n_splits=5,
    outcome_estimator="hgb",
    random_state=42,
    policy_train="pre_split",
    policy_train_frac=0.8,
)

print("\nEvaluation Results:")
print(report)

## 3. Visualize Results

In [None]:
# Create comparison plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# DR vs SNDR comparison
models_list = report['model'].tolist()
dr_values = report['dr_value'].tolist()
sndr_values = report['sndr_value'].tolist()

x = np.arange(len(models_list))
width = 0.35

ax1.bar(x - width/2, dr_values, width, label='DR', alpha=0.8)
ax1.bar(x + width/2, sndr_values, width, label='SNDR', alpha=0.8)
ax1.set_xlabel('Models')
ax1.set_ylabel('Estimated Value')
ax1.set_title('DR vs SNDR Estimates')
ax1.set_xticks(x)
ax1.set_xticklabels(models_list, rotation=45)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Confidence intervals
dr_ci_lower = report['dr_ci_lower'].tolist()
dr_ci_upper = report['dr_ci_upper'].tolist()
dr_errors = [np.array(dr_values) - np.array(dr_ci_lower), 
             np.array(dr_ci_upper) - np.array(dr_values)]

ax2.errorbar(models_list, dr_values, yerr=dr_errors, fmt='o', capsize=5, capthick=2)
ax2.set_xlabel('Models')
ax2.set_ylabel('DR Estimate')
ax2.set_title('DR Estimates with 95% Confidence Intervals')
ax2.tick_params(axis='x', rotation=45)
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()