# Multiple treatment uplift modeling experiments

In [1]:
import os, pandas as pd, numpy as np, matplotlib as mpl, matplotlib.pyplot as plt, seaborn as sns, missingno as msno, statsmodels.api as sm
from causalml.inference.tree import UpliftRandomForestClassifier
from causalml.metrics import plot_gain
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split

In [2]:
train_list = ["Train_Fold1.csv","Train_Fold2.csv","Train_Fold3.csv","Train_Fold4.csv","Train_Fold5.csv"]
test_list = ["Test_Fold1.csv","Test_Fold2.csv","Test_Fold3.csv","Test_Fold4.csv","Test_Fold5.csv"]
train_folds = []
test_folds = []
for fold in train_list:
    train_folds.append(pd.read_csv(fold, sep = ','))
for fold in test_list:
    test_folds.append(pd.read_csv(fold, sep = ','))

### Model elements

In [None]:
# Hillstrom
treatment = "treat"
target = "visit"
control = "CONTROL"
remove_others = ["TREATMENT1","TREATMENT2","labels"]
remove_others.extend([treatment, target])
predictors = [x for x in list(train_folds[0].columns) if x not in remove_others]

In [None]:
# Gerber
treatment = "treat"
target = "response"
control = "control"
predictors = ["sex","yob","g2000","g2002","p2000","p2002","p2004","hh_size","p2004_mean","g2004_mean"]
for i in list(range(len(train_list))):
    train_folds[i][treatment] = train_folds[i][treatment].map({'trt_ Civic Duty':'control','trt_ Self':'Treatment1','trt_ Neighbors':'Treatment2'})
    test_folds[i][treatment] = test_folds[i][treatment].map({'trt_ Civic Duty':'control','trt_ Self':'Treatment1','trt_ Neighbors':'Treatment2'})

In [None]:
# Bank
#treatment = "treat"
#control = "control"
#target = "y"
#predictors = ["age","balance","loan","day","duration","campaign","jobretired","admin","jobservices","maritalmarried","maritalsingle","maritaldivorced","educationsecondary"]
treatment = 'contact'
target = 'y'
control = "unknown" 
no_predictors = [treatment,target,'contact_unknown','contact_cellular','contact_telephone']
predictors = [x for x in list(train_folds[0].columns) if x not in no_predictors]

In [None]:
# Bladder
treatment = "treat"
control = "trt_placebo"
target = "response"
predictors = ["number","size","start","stop","rtumor","rsize","status","enum"]

In [None]:
# Colon1
treatment = "treat"
control = "trt_Obs"
target = "response"
predictors = ["time.x","time.y","sex","age","obstruct","perfor","adhere","nodes","status","differ","extent","surg","node4"]

In [None]:
# Colon2
treatment = "treat"
control = "trt_Obs"
target = "response"
predictors = ["time.x","sex","age","obstruct","perfor","adhere","nodes","status","differ","extent","surg","node4"]

In [None]:
# AOD
treatment = "treat"
control = "community"
target = "suf12"
predictors = ["illact","crimjust","subprob","subdep","white"]

In [3]:
# Turnover
treatment = "treat"
control = "control"
target = "Volturn"
predictors = ["firmSmall","firmMedium","firmLarge","gender","married","cohabitation","divorced","single","NumberChildren","belgian","Age","Seniority","WorkHomeDistance","consultancy","research","WorkPercentage","SalaryFixed","SalaryOvertime","MealVouchers","Sickness","training","Partner"]

### Multiple treatment uplift models

In [4]:
uplift_model = []
y_pred = []
result = []
for i in list(range(len(train_list))):
    uplift_model.append(UpliftRandomForestClassifier(n_estimators = 10,evaluationFunction = 'CTS',random_state = 100,control_name=control,max_features = int(round(np.sqrt(len(predictors))))))
    uplift_model[i].fit(train_folds[i][predictors].values,treatment = train_folds[i][treatment].values,y = train_folds[i][target].values)
    y_pred.append(uplift_model[i].predict(test_folds[i][predictors].values))
    result.append(pd.DataFrame(y_pred[i],columns=uplift_model[i].classes_))

In [11]:
filename = "CTS_rf_results.sav"
joblib.dump(result, filename)

['CTS_rf_results.sav']

In [None]:
best_treatment = np.where((result < 0).all(axis=1),'CONTROL',result.idxmax(axis=1))
actual_is_best = np.where(test_folds[0][treatment] == best_treatment, 1, 0)
actual_is_control = np.where(test_folds[0][treatment] == 'CONTROL', 1, 0)
synthetic = (actual_is_best == 1) | (actual_is_control == 1)
synth = result[synthetic]

In [None]:
auuc_metrics = (synth.assign(is_treated = 1 - actual_is_control[synthetic],visit = test_folds[0].loc[synthetic, target].values,uplift_tree = synth.max(axis=1)).drop(columns=list(uplift_model.classes_)))

In [None]:
plot_gain(auuc_metrics, outcome_col=target, treatment_col='is_treated')

In [3]:
results = joblib.load("/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/ed_rf_results.sav")
results[0].to_csv(r'/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/res_F1.csv')
results[1].to_csv(r'/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/res_F2.csv')
results[2].to_csv(r'/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/res_F3.csv')
results[3].to_csv(r'/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/res_F4.csv')
results[4].to_csv(r'/Users/diegoolaya/OneDrive - Vrije Universiteit Brussel/Research/Mulit_uplift_python/Bank/res_F5.csv')

In [None]:
result