In [None]:
from causalml.dataset import synthetic_data
import pandas as pd
import numpy as np

In [None]:
y, X, treatment, tau, b, e = synthetic_data(mode=2, n=1000, p=5, sigma=1.0)

data = pd.concat([
    pd.DataFrame({"y": y, "treatment": treatment}),
    pd.DataFrame(X, columns=["X1", "X2", "X3", "X4", "X5"])],
    axis = 1
)
data

In [None]:
from src.sample_sizing import sample_t_test

print(sample_t_test.__doc__)

In [None]:
from src.sample_sizing import sample_binary

print(sample_binary.__doc__)

In [None]:
from causalml.inference.meta import BaseTRegressor

from xgboost import XGBRegressor

from causalml.inference.meta import XGBTRegressor

In [None]:
xgb_tlearner = BaseTRegressor(learner=XGBRegressor(random_state=42))

xgb_tlearner.fit(X=X, y=y, treatment=treatment)

In [None]:
xgb_tlearner

In [None]:
xgb_tlearner.predict(X=X).shape

In [None]:
uplift, outcome_c, outcome_t = xgb_tlearner.predict(X=X, return_components=True)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from causalml.metrics import plot

y_train, y_test, X_train, X_test,\
treatment_train, treatment_test,\
e_train, e_test, tau_train, tau_test = train_test_split(y, X, 
                                                        treatment,e,
                                                        tau,
                                                        test_size=0.2, 
                                                        random_state=42)

## Training T-learner on train
learner_t = XGBTRegressor()
learner_t.fit(X=X_train, treatment=treatment_train, y=y_train)

## Get predictions, on the test set
t_pred = learner_t.predict(X=X_test)

## Aggregating everything on a dataframe
df = pd.DataFrame({'y': y_test,
                   'w': treatment_test,
                   'T-Learner': t_pred.reshape(-1), 
                   'Actual': tau_test
                  })

In [None]:
## Plotting the 3 types of uplift curve. 
## If `treatment_effect_col` is provided (the true uplift) it uses that to 
## order the population by the highest score. Otherwise it uses the Treatment score.
plot(df,kind='qini', outcome_col='y', treatment_col='w',figsize=(10, 3.3))

In [None]:
from causalml.metrics import auuc_score, qini_score
print('AUUC:\n',auuc_score(df))

print('\nQINI Score\n',qini_score(df))

In [None]:
df = pd.DataFrame({'y': y_test,
                   'w': treatment_test,
                   'T-Learner': t_pred.reshape(-1), 
                   'Actual': tau_test,
                   'Actual_show': tau_test
                  })

plot(df,kind='qini', outcome_col='y', treatment_col='w',figsize=(10, 3.3), treatment_effect_col='Actual')

In [None]:
print('AUUC:\n',auuc_score(df, treatment_effect_col='Actual'))

print('\nQINI Score\n',qini_score(df, treatment_effect_col='Actual'))

In [None]:
from src.uplift_modelling import evaluate_treatment_control_split
from sklearn.ensemble import RandomForestClassifier

evaluate_treatment_control_split(X, treatment, model=RandomForestClassifier(n_estimators=100))

In [None]:
from sklearn.metrics import r2_score
from src.uplift_modelling import evaluate_treatment_control_outcome

In [None]:
evaluate_treatment_control_outcome(model = learner_t,
                                   x_data = X_test,
                                   y_data = y_test,
                                   treatment = treatment_test,
                                   metric_func= r2_score)

In [None]:
from src.uplift_modelling import run_cross_validate

run_cross_validate(model = BaseTRegressor(learner=XGBRegressor(random_state=42)),
                   metric_func = auuc_score,
                   x_data = X,
                   y_data = y,
                   treatment = treatment,
                   n_splits = 5,
                   rnd_seed = 42)


In [None]:
import shap
from sklearn.ensemble import RandomForestRegressor
#raw SHAP values
shap_values = learner_t.get_shap_values(X=X_test,
                                        tau=learner_t.predict(X_test),
                                        #we may specify the exact model to be used as additonal one
                                        model_tau_feature = RandomForestRegressor(n_estimators=100))
shap_values

In [None]:
#SHAP importance plot
learner_t.plot_shap_values(X=X_test, tau=learner_t.predict(X_test))

In [None]:
# interaction_idx set to None (no color coding for interaction effects)
learner_t.plot_shap_dependence(treatment_group=1, #for treatment_group parameter we use 1
                               # as this is coding we used for determinig treamtent value in input `treatment` vector
                               # for uplift modelling
                              feature_idx=1,
                              X=X_test,
                              tau=learner_t.predict(X_test),
                              interaction_idx=2)

In [None]:
from src.uplift_modelling import get_shap_input

explainer, shap_values = get_shap_input(learner_t, x_data=X_test, uplift_learner=RandomForestRegressor(n_estimators=100))

In [None]:
shap.initjs()

shap.force_plot(explainer.expected_value, shap_values[0,:], X_test[0,:])

In [None]:
shap.force_plot(explainer.expected_value, shap_values, X_test)

In [None]:
learner_t.get_importance(X=X_test, tau=learner_t.predict(X_test))

In [None]:
learner_t.plot_importance(X=X_test,
                          tau=learner_t.predict(X_test),
                          model_tau_feature=RandomForestRegressor(n_estimators=100))

In [None]:
from causalml.dataset import make_uplift_classification
from causalml.inference.tree import UpliftTreeClassifier, UpliftRandomForestClassifier
from causalml.inference.tree import uplift_tree_string, uplift_tree_plot
from IPython.display import Image

df, x_names = make_uplift_classification()

uplift_model = UpliftTreeClassifier(max_depth = 4,
                                    min_samples_leaf = 200,
                                    min_samples_treatment = 50,
                                    n_reg = 100,
                                    evaluationFunction='KL',
                                    control_name='control')

uplift_model.fit(df.iloc[:,1:20].values,
                 treatment=df['treatment_group_key'].values,
                 y=df['conversion'].values)

graph = uplift_tree_plot(uplift_model.fitted_uplift_tree,x_names)

In [None]:
Image(graph.create_png())