# Simple Test Notebook

This notebook runs a simple test of the X-learner hyperparameter tuning framework.

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from src.dgp import SimulatedDataset, simulate_dataset
from src.xlearner import XlearnerWrapper
from src.tuning import grid_search, random_search
from src.metrics_helpers import pehe
from src.experiment import run_experiment

  from .autonotebook import tqdm as notebook_tqdm


## 1. Generate Simulated Dataset

In [6]:
# Create a simulated dataset
dgp = SimulatedDataset(N=1000, d=15, alpha=0.1, seed=42)

print("Dataset Generated:")
print(f"  Sample size: {dgp.N}")
print(f"  Number of features: {dgp.d}")
print(f"  Confounding strength (alpha): {dgp.alpha}")
print(f"  Treatment distribution: {np.mean(dgp.W):.2%} treated")
print(f"  True average treatment effect: {np.mean(dgp.tau):.3f}")
print(f"  True treatment effect std: {np.std(dgp.tau):.3f}")

Dataset Generated:
  Sample size: 1000
  Number of features: 15
  Confounding strength (alpha): 0.1
  Treatment distribution: 43.10% treated
  True average treatment effect: 0.527
  True treatment effect std: 0.784


## 2. Fit X-Learner with Default Parameters

In [7]:
# Create and fit X-learner wrapper
wrapper = XlearnerWrapper(
    models=RandomForestRegressor(n_estimators=50, random_state=0),
    propensity_model=RandomForestClassifier(n_estimators=50, random_state=0),
)

wrapper.fit(dgp.X, dgp.Y, W=dgp.W)

# Predict CATE
tau_pred = wrapper.predict(dgp.X)

# Calculate PEHE
pehe_score = pehe(dgp.tau, tau_pred)

print("\nX-Learner Results (Default Parameters):")
print(f"  Predicted average treatment effect: {np.mean(tau_pred):.3f}")
print(f"  Predicted treatment effect std: {np.std(tau_pred):.3f}")
print(f"  PEHE (lower is better): {pehe_score:.3f}")


X-Learner Results (Default Parameters):
  Predicted average treatment effect: 0.535
  Predicted treatment effect std: 0.701
  PEHE (lower is better): 0.168




## 3. Test Grid Search Tuning

In [8]:
# Define parameter grid
param_grid = {
    "models__n_estimators": [20, 50, 80],
    "models__max_depth": [3, 5, 8],
}

# Create base estimator
base_estimator = XlearnerWrapper(
    models=RandomForestRegressor(random_state=0),
    propensity_model=RandomForestClassifier(n_estimators=50, random_state=0),
)

# Run grid search
print("\nRunning Grid Search...")
best_estimator, best_params, best_score = grid_search(
    estimator=base_estimator,
    param_grid=param_grid,
    X=dgp.X,
    Y=dgp.Y,
    W=dgp.W,
    cv=2,
    verbose=False
)

# Evaluate tuned model
tau_pred_tuned = best_estimator.predict(dgp.X)
pehe_tuned = pehe(dgp.tau, tau_pred_tuned)

print("\nGrid Search Results:")
print(f"  Best parameters: {best_params}")
print(f"  Best CV score (MSE): {best_score:.3f}")
print(f"  PEHE with tuned model: {pehe_tuned:.3f}")
print(f"  PEHE improvement: {pehe_score - pehe_tuned:.3f}")


Running Grid Search...

Grid Search Results:
  Best parameters: {'models__n_estimators': 80, 'models__max_depth': 8}
  Best CV score (MSE): 1.344
  PEHE with tuned model: 0.146
  PEHE improvement: 0.022




## 4. Run Small Experiment

In [9]:
# Define learners and tuners
learners = [
    {
        "name": "x_rf",
        "models": RandomForestRegressor(random_state=0),
        "propensity_model": RandomForestClassifier(n_estimators=50, random_state=0),
        
    }
]

tuners = [
    {
        "name": "grid",
        "fn": grid_search,
        "param_space": {"models__n_estimators": [20, 50, 80, 100]},
        "kwargs": {"cv": 3, "verbose": False}
    }
]

dgp_params = {"N": 100, "d": 10, "alpha": 0.5}

# Run experiment
print("\nRunning Experiment (R=3 repetitions)...")
summary, raw = run_experiment(
    learners=learners,
    tuners=tuners,
    R=3,
    simulate_dataset_fn=simulate_dataset,
    dgp_params=dgp_params,
    base_seed=42,
    cv_plug=3
)

print("\nExperiment Results:")
for result in summary:
    print(f"\nLearner: {result['learner']}, Tuner: {result['tuner']}")
    print(f"  PEHE Mean: {result['pehe_mean']:.3f}")
    print(f"  PEHE Variance: {result['pehe_var']:.3f}")
    print(f"  PEHE Plug-in Mean: {result['pehe_plug_mean']:.3f}")
    print(f"  PEHE Plug-in Variance: {result['pehe_plug_var']:.3f}")


Running Experiment (R=3 repetitions)...





Experiment Results:

Learner: x_rf, Tuner: grid
  PEHE Mean: 0.344
  PEHE Variance: 0.030
  PEHE Plug-in Mean: 0.097
  PEHE Plug-in Variance: 0.002


