In [103]:
from sklearn import linear_model
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [46]:
def compute_pehe_ate(t, y_f, y_cf, y_pred_f, y_pred_cf):
    y_treated_true = t * y_f + (1 - t) * y_cf
    y_control_true = t * y_cf + (1 - t) * y_f

    y_treated_pred = t * y_pred_f + (1 - t) * y_pred_cf
    y_control_pred = t * y_pred_cf + (1 - t) * y_pred_f

    pehe = np.mean(np.square((y_treated_pred - y_control_pred) - (y_treated_true - y_control_true)))
    ate = np.mean(np.abs((y_treated_pred - y_control_pred) - (y_treated_true - y_control_true)))

    return pehe, ate

## Linear regression

In [106]:
def generate_mimic_result_linear_regression(data_path):
    gamma = [0.1, 0.3, 0.5, 0.7]
    for g in gamma:
        final = []
        for i in range(1, 11):
            tmp = pd.read_csv(data_path.format(g, i))
            final.append(tmp)
        res_df = pd.concat(final)
        res_df['y'] = res_df['0'] * res_df['1'] + (1 - res_df['0']) * res_df['2']
        X, y = res_df.drop(columns=['y', '1', '2']), res_df['y']
        reg = linear_model.LinearRegression().fit(X, y)
        X_test =  res_df.drop(columns=['y', '1', '2'])
        res_df['pred'] = reg.predict(X_test)
        pehe, ate = compute_pehe_ate(res_df['0'], res_df['1'], res_df['2'], res_df['pred'], res_df['pred'])
        mse = mean_squared_error(res_df['y'], res_df['pred'])
        print('gamma: {}\tPEHE: {:.4f}\tATE: {:.4f}\tRMSE: {:.4f}\n'.format(g, np.sqrt(pehe), ate, np.sqrt(mse)))
                    

In [107]:
data_path = 'data/data_synthetic/data_baseline_mimic_mean_syn_{}/{}.csv'
generate_mimic_result_linear_regression(data_path)

gamma: 0.1	PEHE: 0.6270	ATE: 0.5970	RMSE: 0.1050

gamma: 0.3	PEHE: 0.5671	ATE: 0.5095	RMSE: 0.1313

gamma: 0.5	PEHE: 0.5422	ATE: 0.4764	RMSE: 0.1633

gamma: 0.7	PEHE: 0.2196	ATE: 0.1835	RMSE: 0.1508



In [112]:
full_syn_data_path = 'data/data_synthetic/data_baseline_syn_{}/{}.csv'
generate_mimic_result_linear_regression(full_syn_data_path)

gamma: 0.1	PEHE: 0.4104	ATE: 0.2639	RMSE: 0.1460

gamma: 0.3	PEHE: 0.4691	ATE: 0.2929	RMSE: 0.1326

gamma: 0.5	PEHE: 0.7430	ATE: 0.4211	RMSE: 0.1854

gamma: 0.7	PEHE: 0.3420	ATE: 0.2289	RMSE: 0.1081



### KNN

In [93]:
from sklearn.neighbors import KNeighborsRegressor

In [95]:
final = []
for i in range(1, 11):
    tmp = pd.read_csv(data_path.format(0.1, i))
    final.append(tmp)
res_df = pd.concat(final)
res_df['y'] = res_df['0'] * res_df['1'] + (1 - res_df['0']) * res_df['2']

In [96]:
res_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,34,35,36,37,38,39,40,41,42,y
0,0.0,0.128649,0.504753,-0.000559,0.000749,1.679069,-0.002774,0.000382,-1.504222,2.641652,...,0.0,0.0,0.0,0.0,-0.049326,-0.753471,0.149936,0.000000,1.0,0.504753
1,0.0,0.065864,0.470731,-0.000559,0.000749,-0.996847,-0.002774,0.000382,2.697070,0.766558,...,0.0,0.0,0.0,0.0,-0.049326,-0.753471,-0.191394,0.000000,1.0,0.470731
2,0.0,-0.049369,0.444563,-0.000559,0.000749,-0.116507,-0.002774,0.000382,-0.860122,-1.204693,...,1.0,0.0,0.0,0.0,-0.049326,1.527492,1.190992,32.154922,1.0,0.444563
3,0.0,-0.056160,0.413261,-0.000559,0.000749,2.537730,-0.002774,0.000382,-1.128497,-0.531581,...,0.0,0.0,0.0,0.0,-0.049326,-0.753471,0.206824,0.000000,1.0,0.413261
4,0.0,0.050287,0.624826,-1.000780,0.000749,-0.016048,-0.749048,-0.926426,0.127987,-4.522170,...,0.0,0.0,0.0,0.0,-0.049326,-0.753471,-0.504279,0.000000,1.0,0.624826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9059,1.0,-0.596710,0.087873,-0.000559,0.000749,0.261536,-0.002774,0.000382,-0.804007,0.003184,...,0.0,0.0,0.0,0.0,-0.049326,1.404197,-0.137350,21.162287,1.0,-0.596710
9060,0.0,0.232030,0.462088,-0.000559,0.000749,0.513213,-0.002774,0.000382,0.534942,0.525682,...,0.0,0.0,0.0,0.0,-0.049326,1.342549,0.519710,30.168745,0.0,0.462088
9061,0.0,-0.102404,0.614440,-0.000559,0.000749,-0.400040,-0.002774,0.000382,1.915732,0.526163,...,0.0,0.0,0.0,0.0,-0.049326,1.496668,0.519710,26.177572,1.0,0.614440
9062,0.0,0.015564,0.660908,-0.000559,0.000749,0.280042,-0.002774,0.000382,0.152385,0.261727,...,0.0,0.0,0.0,0.0,-0.049326,-0.753471,-0.037795,0.000000,1.0,0.660908


In [97]:
def generate_mimic_result_knn(data_path):
    gamma = [0.1, 0.3, 0.5, 0.7]
    for g in gamma:
        final = []
        for i in range(1, 11):
            tmp = pd.read_csv(data_path.format(g, i))
            final.append(tmp)
        res_df = pd.concat(final)
        res_df['y'] = res_df['0'] * res_df['1'] + (1 - res_df['0']) * res_df['2']
        X, y = res_df.drop(columns=['y', '1', '2']), res_df['y']
        reg = KNeighborsRegressor(n_neighbors=2).fit(X, y)
        X_test =  res_df.drop(columns=['y', '1', '2'])
        res_df['pred'] = reg.predict(X_test)
        pehe, ate = compute_pehe_ate(res_df['0'], res_df['1'], res_df['2'], res_df['pred'], res_df['pred'])
        print('gamma: {}\tPEHE: {:.4f}\tATE: {:.4f}\n'.format(g, np.sqrt(pehe), ate))

In [98]:
data_path = 'data/data_synthetic/data_baseline_mimic_mean_syn_{}/{}.csv'
generate_mimic_result_linear_regression(data_path)

gamma: 0.1	PEHE: 0.6270	ATE: 0.5970

gamma: 0.3	PEHE: 0.5671	ATE: 0.5095

gamma: 0.5	PEHE: 0.5422	ATE: 0.4764

gamma: 0.7	PEHE: 0.2196	ATE: 0.1835



In [102]:
full_syn_data_path = 'data/data_synthetic/data_baseline_syn_{}/{}.csv'
generate_mimic_result_linear_regression(full_syn_data_path)

gamma: 0.1	PEHE: 0.4104	ATE: 0.2639

gamma: 0.3	PEHE: 0.4691	ATE: 0.2929

gamma: 0.5	PEHE: 0.7430	ATE: 0.4211

gamma: 0.7	PEHE: 0.3420	ATE: 0.2289

