In [23]:
from causalsim import *
import metrics
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import numpy as np 
import pandas as pd

In [60]:
n = 1000
p = 1
beta = 1
sigma = 1
learners = {"LR" : "Causal_LR(data)", "X" : "Causal_XLearner(data, LinearRegression())", 
            "DR" : "Causal_DRLearner(data)", "CF" : "Causal_CausalForest(data)",
            "T" : "Causal_TLearner(data, LinearRegression())" , "S" : "Causal_SLearner(data, LinearRegression())"}

data_str = f"simulation_simple(n = {n}, p = {p}, beta = {beta}, sigma = {sigma})"

In [61]:
res = run_experiment(learners, data_str, 10)

In [62]:
res['S']

{'mse': [0.9661432784772463,
  0.9820542021895405,
  0.9445336997055479,
  0.9838076703743152,
  1.1029116981302378,
  0.9540228827447459,
  1.032068497486038,
  0.9928098432877719,
  1.0391974702831157,
  1.0238103496924924],
 'bias': [0.10256964808595934,
  -0.07784512559322006,
  -0.049586727150639935,
  0.00502976982163876,
  0.0530173858123709,
  -0.03264071264273877,
  -0.046742590313595414,
  -0.0013916899778879089,
  -0.04776679511904577,
  0.10486593102180243],
 'r2': [-0.011009085703599597,
  -0.006208912632883701,
  -0.0026100298647608877,
  -2.5715631001288486e-05,
  -0.002555077732897537,
  -0.0011180101517032082,
  -0.0021214724560927323,
  -1.950831557406829e-06,
  -0.0022004358580471894,
  -0.010857738012841622]}

In [44]:
def run_experiment(learners, data_str, num_sim):

    '''n = n  # Number of individuals
    p = p     # Number of covariates
    beta = beta  # Beta_1 value for treatment effect
    sigma = sigma # Sigma value for noise term'''

    metrics_result = {}
    for learner in learners:
            metrics_result[learner] = {'mse': [], 'bias': [], 'r2': []}
        
    
    for i in range(num_sim):
        data = eval(data_str)
        tau = np.array(data['tau'])

        for learner in learners:
            tau_hat = eval(learners[learner])
            metric_i = metrics.evaluate(tau, tau_hat)

            if metric_i[0] < -100 or metric_i[0] > 100:
                continue
                
            metrics_result[learner]['mse'].append(metric_i[0])
            metrics_result[learner]['bias'].append(metric_i[1])
            metrics_result[learner]['r2'].append(metric_i[2])

        
    
    return metrics_result

In [3]:
def plot_bars(metrics_lr, metrics_x, metrics_dr, metrics_cf, exp_string):
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    means = [np.mean(metrics_lr['mse']), np.mean(metrics_x['mse']), np.mean(metrics_dr['mse']), np.mean(metrics_cf['mse'])]
    errors = [np.std(metrics_lr['mse']), np.std(metrics_x['mse']), np.std(metrics_dr['mse']), np.std(metrics_cf['mse'])]
    metrics.bar_plot(axes[0],means, errors, xnames = ['LR', 'X', 'DR', 'CF'], ylabel = 'MSE', title = f'MSE {exp_string}')
    
    means = [np.mean(metrics_lr['bias']), np.mean(metrics_x['bias']), np.mean(metrics_dr['bias']), np.mean(metrics_cf['bias'])]
    errors = [np.std(metrics_lr['bias']), np.std(metrics_x['bias']), np.std(metrics_dr['bias']), np.std(metrics_cf['bias'])]
    metrics.bar_plot(axes[1],means, errors, xnames = ['LR', 'X', 'DR', 'CF'], ylabel = 'Bias', title = f'Bias {exp_string}')
    
    means = [np.mean(metrics_lr['r2']), np.mean(metrics_x['r2']), np.mean(metrics_dr['r2']), np.mean(metrics_cf['r2'])]
    errors = [np.std(metrics_lr['r2']), np.std(metrics_x['r2']), np.std(metrics_dr['r2']), np.std(metrics_cf['r2'])]
    metrics.bar_plot(axes[2],means, errors, xnames = ['LR', 'X', 'DR', 'CF'], ylabel = 'R2', title = f'R2 {exp_string}')

    plt.tight_layout()
    
    # Show the plot
    plt.show()