In [8]:
import numpy as np
import pandas as pd
import seaborn as sns

from statistics import mean
from random import sample

from sklearn import preprocessing
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

from pyuplift.variable_selection import *
from pyuplift.transformation import *
from pyuplift.datasets import load_linear
from pyuplift.model_selection import treatment_cross_val_score

import warnings
warnings.filterwarnings('ignore')

In [2]:
# plotly
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot
 
import cufflinks
cufflinks.go_offline()
cufflinks.set_config_file(world_readable=True, theme='pearl', offline=True)

In [3]:
df = load_linear(100000)

In [4]:
df.head()

Unnamed: 0,x1,x2,x3,t,y
0,-46.820879,-4.793302,58.531404,1,0.0
1,-82.282485,1.525968,-95.607308,1,61.684505
2,-6.53801,7.961892,79.099086,1,0.0
3,-71.336192,-13.656977,-105.250311,1,0.0
4,90.635089,-10.069767,45.317666,1,145.685205


In [5]:
models = {
    'Econometric (Linear regression)': Econometric(LinearRegression(n_jobs=-1)),
    'Econometric (Random forest)': Econometric(RandomForestRegressor(n_jobs=-1)),
    
    'Two Model (Linear regression)': TwoModel(LinearRegression(n_jobs=-1), LinearRegression(n_jobs=-1)),
    'Two Model (Random forest)': TwoModel(RandomForestRegressor(n_jobs=-1), RandomForestRegressor(n_jobs=-1)),
    
    'Dummy (Linear regression)': Dummy(LinearRegression(n_jobs=-1)),
    'Dummy (Random forest)': Dummy(RandomForestRegressor(n_jobs=-1)),

    'Lai (Logistic Regression)': Lai(LogisticRegression(n_jobs=-1)),
    'Lai (Random forest)': Lai(RandomForestClassifier(n_jobs=-1)),
    
    'WeightedLai (Logistic regression)': WeightedLai(LogisticRegression(n_jobs=-1)),
    'WeightedLai (Random forest)': WeightedLai(RandomForestClassifier(n_jobs=-1)),

    'Kane (Logistic regression)': Kane(LogisticRegression(n_jobs=-1)),
    'Kane (Random forest)': Kane(RandomForestClassifier(n_jobs=-1)),
    
    'GeneralizedKane (Logistic regression)': GeneralizedKane(LogisticRegression(n_jobs=-1)),
    'GeneralizedKane (Random forest)': GeneralizedKane(RandomForestClassifier(n_jobs=-1)),

    'Reflective (Logistic regression)': Reflective(LogisticRegression(n_jobs=-1)),
    'Reflective (Random forest)': Reflective(RandomForestClassifier(n_jobs=-1)),
    
    'Pessimistic (Logistic regression)': Pessimistic(LogisticRegression(n_jobs=-1)),
    'Pessimistic (Random forest)': Pessimistic(RandomForestClassifier(n_jobs=-1)),
    
    'Jaskowski (Logistic regression)': Jaskowski(LogisticRegression(n_jobs=-1)),
    'Jaskowski (Random forest)': Jaskowski(RandomForestClassifier(n_jobs=-1)),
}

In [6]:
X = df.drop(['t', 'y'], axis=1).values
y = df['y'].values
t = df['t'].values

In [10]:
cv = 5
seeds = [111, 222, 333, 444, 555]

In [None]:
%%time
model_names, model_scores = [], []
for model_name in models:
    print(model_name)
    scores = treatment_cross_val_score(X, y, t, models[model_name], cv, seeds=seeds)
    for score in scores:
        model_names.append(model_name)
        model_scores.append(score)
df_stats = pd.DataFrame(data={'Model': model_names, 'Uplift': model_scores})

In [11]:
df_stats.pivot(columns="Model", values="Uplift").iplot(
        kind="box",
        yTitle="Uplift",
)

In [None]:
## RMSE