In [2]:
import pandas as pd
from CTL.causal_tree_learn import CausalTree
from sklearn.model_selection import train_test_split
import numpy as np

In [3]:
asthma = pd.read_csv('data/asthma.txt', delimiter=' ', index_col=None)

asthma.columns = ['physician', 'age', 'sex', 'education', 'insurance', 'drug coverage', 'severity',
                  'comorbidity', 'physical comorbidity', 'mental comorbidity', 'satisfaction']

In [4]:
asthma

Unnamed: 0,physician,age,sex,education,insurance,drug coverage,severity,comorbidity,physical comorbidity,mental comorbidity,satisfaction
0,1,36,0,5,1,1,2,4,49.97533,49.57280,1
1,1,37,1,6,2,1,3,3,40.93979,56.11393,1
2,1,43,1,5,1,1,4,3,50.32495,57.11961,1
3,1,39,1,6,1,1,4,1,49.34402,49.65159,1
4,1,46,1,1,2,1,4,0,37.37425,52.95411,0
...,...,...,...,...,...,...,...,...,...,...,...
271,2,40,1,4,1,1,3,2,54.41926,48.36971,0
272,2,22,0,4,2,0,3,0,37.72739,49.10201,0
273,2,48,1,4,1,1,3,2,41.22965,42.62088,1
274,2,32,0,5,2,1,3,1,39.91915,51.36588,0


In [5]:
asthma.iloc[:,0].unique()

array([1, 2], dtype=int64)

In [6]:
y = asthma['satisfaction'].values
treatment = asthma['physician'].values

x = asthma.drop(['satisfaction', 'physician'], axis=1).values

In [7]:
columns = asthma.drop(['satisfaction', 'physician'], axis=1).columns

y[y == 0] = -1

treatment[treatment == 1] = 0
treatment[treatment == 2] = 1

In [8]:
np.random.seed(0)


x_train, x_test, y_train, y_test, treat_train, treat_test = train_test_split(x, y, treatment, 
                                                                             test_size=0.4, random_state=42)
         

In [9]:
# regular CTL
ctl = CausalTree(magnitude=False)
ctl.fit(x_train, y_train, treat_train)
ctl.prune()
ctl_predict = ctl.predict(x_test)

In [10]:
# honest CTL (CT-HL)
cthl = CausalTree(honest=True)
cthl.fit(x_train, y_train, treat_train)
cthl.prune()
cthl_predict = cthl.predict(x_test)

In [11]:
# val honest CTL (CT-HV)
cthv = CausalTree(val_honest=True)
cthv.fit(x_train, y_train, treat_train)
cthv.prune()
cthv_predict = cthv.predict(x_test)

# adaptive CT (Athey and Imbens, PNAS 2016)
ct_adaptive = CausalTree(weight=0.0, split_size=0.0)
ct_adaptive.fit(x_train, y_train, treat_train)
ct_adaptive.prune()
ct_adaptive_predict = cthv.predict(x_test)

In [12]:
# honest CT (Athey and Imbens, PNAS 2016)
ct_honest = CausalTree(honest=True, weight=0.0, split_size=0.0)
ct_honest.fit(x_train, y_train, treat_train)
ct_honest.prune()
ct_honest_predict = ct_honest.predict(x_test)

ct_adaptive.plot_tree(features=columns, filename="bin_tree_adaptive", show_effect=True)
ct_honest.plot_tree(features=columns, filename="bin_tree_honest", show_effect=True)
ctl.plot_tree(features=columns, filename="bin_tree", show_effect=True)
cthl.plot_tree(features=columns, filename="bin_tree_honest_learn", show_effect=True)
cthv.plot_tree(features=columns, filename="bin_tree_honest_validation", show_effect=True)