In [15]:
%reload_ext autoreload
%autoreload 2

import sys
import os

path_to_project = os.path.abspath(os.path.join(os.getcwd(), '../'))    
sys.path.insert(1, os.path.join(path_to_project))

In [16]:
import itertools
import pandas as pd
from sklearn.preprocessing import StandardScaler
from src.data_dict import *
from src.utils import one_hot
from src.directory import data_dir, NHANES_preprocessed_filename
from src.estimators import aipw_estimator, unadjusted_DM_estimator, ipw_estimator, t_learner, s_learner, x_learner

In [17]:
# load df
NHANES_preprocessed_filepath = os.path.join(data_dir, NHANES_preprocessed_filename)
df = pd.read_csv(NHANES_preprocessed_filepath, index_col='SEQN')

In [None]:
# define relevant features
# features
z_col = light_col
t_col = sleep_deprivation_col
y_cols = [htn_col, diastolic_col, systolic_col]

# columns not to transform
all_cols = df.columns
untransformed_cols = [x for x in all_cols if x in [*y_cols, t_col, z_col]]

In [None]:
# prune df
df.dropna(how='any', inplace=True)
df = df.query(f'age >= {age_cutoff}')

## transform df
# apply scalers
scaler = StandardScaler()
numerical_transformation_cols = list(set(numerical_cols) - set(untransformed_cols))
df[numerical_transformation_cols] = scaler.fit_transform(df[numerical_transformation_cols])

# make z col binary (indicator of recommended max lux value during sleep)
light_cutoff = 1 # nightly minute-mean summed light exposure
df[z_col] = df[z_col].apply(lambda x: 1 if x <= light_cutoff else x)
df[z_col] = df[z_col].apply(lambda x: 0 if x > light_cutoff else x)

# one-hot encode multiclass categoricals
multiclass_cols = df[categorical_cols].columns[df[categorical_cols].nunique() > 2].tolist() 
categorical_transformation_cols = list(set(multiclass_cols) - set(untransformed_cols))
df = one_hot(df, categorical_transformation_cols)
df.columns = df.columns.str.replace('.0', '')

# get df as float
df = df.astype(float)

# get covariates
x_cols = list(set(df.columns) - set([*y_cols, t_col, z_col]))

# update lists of variable type
all_cols = df.columns
categorical_cols = [x for x in df.columns if any([x.startswith(y) for y in categorical_cols])]

In [20]:
estimators = [aipw_estimator, unadjusted_DM_estimator, ipw_estimator, t_learner, s_learner, x_learner]  
estimator_names = estimator_names = [x.__name__ for x in estimators]
pairs = [' vs '.join(x[::-1]) for x in itertools.combinations(df[t_col].unique().astype(int).astype(str), 2)]
index = pd.MultiIndex.from_product([estimator_names, pairs, y_cols], names=['estimator', 'pair','outcome'])
tau_results = pd.DataFrame(index=index, columns=['tau'])

In [21]:
for tau_estimator, outcome in itertools.product(estimators, y_cols):
    estimator_name = tau_estimator.__name__
    print(f'Estimating effect of {t_col} on {outcome} using {estimator_name}...')
    continuous_outcome = False if outcome == 'HTN' else True
    results = tau_estimator(df, 
                            treatment_var=t_col,
                            outcome_var=outcome, 
                            covariates=x_cols,
                            continuous_outcome=continuous_outcome)
    
    for pair in results.keys():
        tau = results[pair]['tau']
        tau_results.loc[(estimator_name, pair, outcome)] = tau

Estimating effect of sleep_deprivation on HTN using aipw_estimator...
	S-learner score: 0.7929028504944735
Estimating effect of sleep_deprivation on DBP using aipw_estimator...
	S-learner score: 0.2681988480528994
Estimating effect of sleep_deprivation on SBP using aipw_estimator...
	S-learner score: 0.3735066684223861
Estimating effect of sleep_deprivation on HTN using unadjusted_DM_estimator...
Estimating effect of sleep_deprivation on DBP using unadjusted_DM_estimator...
Estimating effect of sleep_deprivation on SBP using unadjusted_DM_estimator...
Estimating effect of sleep_deprivation on HTN using ipw_estimator...
Estimating effect of sleep_deprivation on DBP using ipw_estimator...
Estimating effect of sleep_deprivation on SBP using ipw_estimator...
Estimating effect of sleep_deprivation on HTN using t_learner...
Estimating effect of sleep_deprivation on DBP using t_learner...
Estimating effect of sleep_deprivation on SBP using t_learner...
Estimating effect of sleep_deprivation o

In [22]:
tau_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tau
estimator,pair,outcome,Unnamed: 3_level_1
aipw_estimator,1 vs 0,HTN,-0.009871
aipw_estimator,1 vs 0,DBP,-0.669594
aipw_estimator,1 vs 0,SBP,-0.678402
aipw_estimator,2 vs 0,HTN,-0.005574
aipw_estimator,2 vs 0,DBP,-0.772072
aipw_estimator,2 vs 0,SBP,-0.355259
aipw_estimator,2 vs 1,HTN,0.004297
aipw_estimator,2 vs 1,DBP,-0.102477
aipw_estimator,2 vs 1,SBP,0.323143
unadjusted_DM_estimator,1 vs 0,HTN,-0.051161
