# Regression Metrics with Confidence Intervals

This notebook demonstrates how to use the regression metrics with confidence intervals.

## Setup and Imports

In [None]:
import numpy as np
import pandas as pd
import sys
import os

# Add the confidenceinterval package to Python path
sys.path.insert(0, 'confidenceinterval')

# NEW UNIFIED INTERFACE - Import only one class!
from confidenceinterval import MetricEvaluator, evaluate_regression

# OLD WAY (still works but not needed):
# from confidenceinterval.regression_metrics import mae, mse, rmse, r2_score, mape, regression_conf_methods

print("âœ… New Unified Interface Loaded!")
print("Available task types: 'regression', 'classification'")

# Create regression evaluator 
evaluator = MetricEvaluator(task_type='regression', method='bootstrap_bca')
print(f"Available regression methods: {evaluator.available_methods()}")
print(f"Available regression metrics: {evaluator.available_metrics()}")

Available methods: ['bootstrap_bca', 'bootstrap_percentile', 'bootstrap_basic', 'jackknife']


## 1. Create Sample Data

Let's create some sample regression data to work with:

In [4]:
# Set random seed for reproducibility
np.random.seed(42)

# Generate sample data
n_samples = 30

# Create true values (e.g., house prices)
y_true = [150, 200, 180, 220, 175, 250, 190, 210, 165, 240,
          185, 195, 225, 170, 205, 230, 160, 215, 245, 180,
          200, 235, 175, 190, 220, 165, 210, 255, 185, 195]

# Create predictions with some error (simulating model predictions)
y_pred = [145, 205, 185, 215, 180, 245, 195, 205, 170, 235,
          190, 200, 24450, 175, 200, 225, 165, 210, 250, 185,
          195, 240, 170, 185, 225, 160, 215, 250, 180, 190]

print(f"Sample size: {len(y_true)}")
print(f"True values range: {min(y_true)} to {max(y_true)}")
print(f"Predicted values range: {min(y_pred)} to {max(y_pred)}")

Sample size: 30
True values range: 150 to 255
Predicted values range: 145 to 24450


## 2. Basic Metrics (without confidence intervals)

In [5]:
# Calculate basic metrics
mae_basic = mae(y_true, y_pred, compute_ci=False)
mse_basic = mse(y_true, y_pred, compute_ci=False)
rmse_basic = rmse(y_true, y_pred, compute_ci=False)
r2_basic = r2_score(y_true, y_pred, compute_ci=False)
mape_basic = mape(y_true, y_pred, compute_ci=False)

print("Basic Metrics (no confidence intervals):")
print(f"MAE:  {mae_basic:.4f}")
print(f"MSE:  {mse_basic:.4f}")
print(f"RMSE: {rmse_basic:.4f}")
print(f"RÂ²:   {r2_basic:.4f}")
print(f"MAPE: {mape_basic:.4f}%")

Basic Metrics (no confidence intervals):
MAE:  812.3333
MSE:  19561711.6667
RMSE: 4422.8624
RÂ²:   -25146.1797
MAPE: 361.3551%


## 3. Metrics with Confidence Intervals

### Using Jackknife Method (Fast)

In [6]:
# Using jackknife method (fast, good for smaller datasets)
print("Jackknife Method (95% confidence intervals):")

mae_val, mae_ci = mae(y_true, y_pred, method='jackknife', confidence_level=0.95)
mse_val, mse_ci = mse(y_true, y_pred, method='jackknife', confidence_level=0.95)
rmse_val, rmse_ci = rmse(y_true, y_pred, method='jackknife', confidence_level=0.95)
r2_val, r2_ci = r2_score(y_true, y_pred, method='jackknife', confidence_level=0.95)
mape_val, mape_ci = mape(y_true, y_pred, method='jackknife', confidence_level=0.95)

print(f"MAE:  {mae_val:.4f}  [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")
print(f"MSE:  {mse_val:.4f}  [{mse_ci[0]:.4f}, {mse_ci[1]:.4f}]")
print(f"RMSE: {rmse_val:.4f}  [{rmse_ci[0]:.4f}, {rmse_ci[1]:.4f}]")
print(f"RÂ²:   {r2_val:.4f}  [{r2_ci[0]:.4f}, {r2_ci[1]:.4f}]")
print(f"MAPE: {mape_val:.4f}% [{mape_ci[0]:.4f}%, {mape_ci[1]:.4f}%]")

Jackknife Method (95% confidence intervals):
MAE:  812.3333  [-838.8487, 2463.5154]
MSE:  19561711.6667  [-20446429.7541, 59569853.0874]
RMSE: 4422.8624  [-2309.9771, 15457.7093]
RÂ²:   -25146.1797  [-75759.5914, 29899.4711]
MAPE: 361.3551% [-372.4812%, 1095.1913%]


### Using Bootstrap Method (More Robust)

In [7]:
# Using bootstrap method (more robust, slower)
print("Bootstrap BCA Method (95% confidence intervals):")

mae_val, mae_ci = mae(y_true, y_pred, method='bootstrap_bca', confidence_level=0.95, n_resamples=2000)
mse_val, mse_ci = mse(y_true, y_pred, method='bootstrap_bca', confidence_level=0.95, n_resamples=2000)
rmse_val, rmse_ci = rmse(y_true, y_pred, method='bootstrap_bca', confidence_level=0.95, n_resamples=2000)
r2_val, r2_ci = r2_score(y_true, y_pred, method='bootstrap_bca', confidence_level=0.95, n_resamples=2000)
mape_val, mape_ci = mape(y_true, y_pred, method='bootstrap_bca', confidence_level=0.95, n_resamples=2000)

print(f"MAE:  {mae_val:.4f}  [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")
print(f"MSE:  {mse_val:.4f}  [{mse_ci[0]:.4f}, {mse_ci[1]:.4f}]")
print(f"RMSE: {rmse_val:.4f}  [{rmse_ci[0]:.4f}, {rmse_ci[1]:.4f}]")
print(f"RÂ²:   {r2_val:.4f}  [{r2_ci[0]:.4f}, {r2_ci[1]:.4f}]")
print(f"MAPE: {mape_val:.4f}% [{mape_ci[0]:.4f}%, {mape_ci[1]:.4f}%]")

Bootstrap BCA Method (95% confidence intervals):
MAE:  812.3333  [5.0000, 4041.6667]
MSE:  19561711.6667  [25.0000, 97808458.3333]
RMSE: 4422.8624  [5.0000, 9889.8159]
RÂ²:   -25146.1797  [-142522.0969, 0.9716]
MAPE: 361.3551% [2.5142%, 1837.8898%]


## 4. Different Confidence Levels

In [8]:
# Test different confidence levels
confidence_levels = [0.90, 0.95, 0.99]

print("MAE with different confidence levels (jackknife):")
for conf_level in confidence_levels:
    mae_val, mae_ci = mae(y_true, y_pred, method='jackknife', confidence_level=conf_level)
    width = mae_ci[1] - mae_ci[0]
    print(f"   {int(conf_level*100)}% CI: {mae_val:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}] (width: {width:.4f})")

MAE with different confidence levels (jackknife):
   90% CI: 812.3333 [-559.4286, 2184.0952] (width: 2743.5238)
   95% CI: 812.3333 [-838.8487, 2463.5154] (width: 3302.3641)
   99% CI: 812.3333 [-1412.9889, 3037.6556] (width: 4450.6444)


## 5. NEW UNIFIED INTERFACE ðŸŽ¯

The new `MetricEvaluator` class provides a single interface for all metrics:

In [None]:
# Create regression evaluator with different methods
print("=== UNIFIED INTERFACE EXAMPLES ===")

# Example 1: Default settings
reg_eval = MetricEvaluator(task_type='regression')  # default: bootstrap_bca, 95% CI
mae_score, mae_ci = reg_eval.evaluate('mae', y_true, y_pred)
print(f"1. Default MAE: {mae_score:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")

# Example 2: Jackknife method
reg_eval_jack = MetricEvaluator(task_type='regression', method='jackknife')
mae_score, mae_ci = reg_eval_jack.evaluate('mae', y_true, y_pred)
print(f"2. Jackknife MAE: {mae_score:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")

# Example 3: Different confidence level
reg_eval_90 = MetricEvaluator(task_type='regression', confidence_level=0.90)
mae_score, mae_ci = reg_eval_90.evaluate('mae', y_true, y_pred)
print(f"3. 90% CI MAE: {mae_score:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")

# Example 4: Evaluate all common metrics at once
print("\n4. All metrics with one call:")
all_results = reg_eval.evaluate_all(y_true, y_pred)
for metric, result in all_results.items():
    score, ci = result
    print(f"   {metric}: {score:.4f} [{ci[0]:.4f}, {ci[1]:.4f}]")

### Convenience Functions

For even simpler usage, you can use the convenience functions:

In [None]:
# One-line evaluations with convenience functions
mae_val, mae_ci = evaluate_regression('mae', y_true, y_pred)
print(f"Quick MAE: {mae_val:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}]")

rmse_val, rmse_ci = evaluate_regression('rmse', y_true, y_pred, method='jackknife')
print(f"Quick RMSE (jackknife): {rmse_val:.4f} [{rmse_ci[0]:.4f}, {rmse_ci[1]:.4f}]")

r2_val, r2_ci = evaluate_regression('r2', y_true, y_pred, confidence_level=0.99)
print(f"Quick RÂ² (99% CI): {r2_val:.4f} [{r2_ci[0]:.4f}, {r2_ci[1]:.4f}]")

print("\nâœ… UNIFIED INTERFACE SUMMARY:")
print("1. Import only: from confidenceinterval import MetricEvaluator")  
print("2. Choose task: MetricEvaluator(task_type='regression' or 'classification')")
print("3. Evaluate: evaluator.evaluate('metric_name', y_true, y_pred)")
print("4. Or use: evaluate_regression('metric_name', y_true, y_pred)")

## 6. Method Comparison

In [10]:
import time

# Compare different methods
methods = ['jackknife', 'bootstrap_bca', 'bootstrap_percentile']

print("Method Comparison (MAE with 95% CI):")
print("-" * 60)

for method in methods:
    start_time = time.time()
    
    if method == 'jackknife':
        mae_val, mae_ci = mae(y_true, y_pred, method=method)
    else:
        mae_val, mae_ci = mae(y_true, y_pred, method=method, n_resamples=1000)
    
    elapsed_time = time.time() - start_time
    
    print(f"{method:18s}: {mae_val:.4f} [{mae_ci[0]:.4f}, {mae_ci[1]:.4f}] ({elapsed_time:.4f}s)")

Method Comparison (MAE with 95% CI):
------------------------------------------------------------
jackknife         : 812.3333 [-838.8487, 2463.5154] (0.0017s)
bootstrap_bca     : 812.3333 [5.0000, 3563.8662] (0.0589s)
bootstrap_percentile: 812.3333 [5.0000, 2427.0000] (0.0325s)
