In [1]:
import numpy as np
import optuna
import pandas as pd
import xgboost as xgb

from xgboost import XGBClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report,
    confusion_matrix,
    make_scorer,
)
from sklearn.model_selection import cross_val_score, train_test_split

In [2]:
# load training dataset
train_data = pd.read_csv("../data/training.csv")
# load test dataset
test_data = pd.read_csv("../data/Test.csv")
train_data.head()

Unnamed: 0,ID,Promotion,purchase,V1,V2,V3,V4,V5,V6,V7
0,1,No,0,2,30.443518,-1.165083,1,1,3,2
1,3,No,0,3,32.15935,-0.645617,2,3,2,2
2,4,No,0,2,30.431659,0.133583,1,1,4,2
3,5,No,0,0,26.588914,-0.212728,2,1,4,2
4,8,Yes,0,3,28.044331,-0.385883,1,1,2,2


In [3]:
# define Group A as g1, Group B as g2
g1 = train_data[(train_data.Promotion == "No")]
g2 = train_data[(train_data.Promotion == "Yes")]

In [4]:
# split the dataset into training and testing sets, only from group 2
X = g2[["V1", "V2", "V3", "V4", "V5", "V6", "V7"]]
y = g2["purchase"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [5]:
# promotion strategy
def promotion_strategy(df, model):
    """promotion_strategy predicts purchases for df then convert them in "Yes", "No" format as promotions.

    Args:
        df (pd.DataFrame): a Pandas DataFrame only with columns from V1 - V7.
        model: a tuned model.

    Returns:
        np.array: a 1-D Numpy array with the values 'Yes' or 'No' related to whether or not an individual should receive a promotion based purchase prediction, should be the length of df.shape[0]

    Example:
        >>> promotion_strategy(df)
        array(['Yes', 'Yes', 'No'])

        Input: df
            V1	V2	  V3	V4	V5	V6	V7
            2	30	-1.1	1	1	3	2
            3	32	-0.6	2	3	2	2
            2	30	0.13	1	1	4	2

        Output: promotion
            array(['Yes', 'Yes', 'No']) indicates the first two users would receive the promotion and the last should not.
    """
    arr = model.predict(df)

    # use numpy.where to replace 1 with "Yes", else with "No"
    promotion = np.where(arr == 1, "Yes", "No")

    return promotion


# from test_results.py
def score(df, promo_pred_col="Promotion"):
    n_treat = df.loc[df[promo_pred_col] == "Yes", :].shape[0]
    n_control = df.loc[df[promo_pred_col] == "No", :].shape[0]
    n_treat_purch = df.loc[df[promo_pred_col] == "Yes", "purchase"].sum()
    n_ctrl_purch = df.loc[df[promo_pred_col] == "No", "purchase"].sum()
    irr = n_treat_purch / n_treat - n_ctrl_purch / n_control
    nir = 10 * n_treat_purch - 0.15 * n_treat - 10 * n_ctrl_purch

    return (irr, nir)


# from test_results.py, I made a slight modification for this notebook
def test_results(promotion_strategy, model):
    test_data = pd.read_csv("../data/Test.csv")
    df = test_data[["V1", "V2", "V3", "V4", "V5", "V6", "V7"]]
    promos = promotion_strategy(df, model)
    score_df = test_data.iloc[np.where(promos == "Yes")]
    irr, nir = score(score_df)

    return irr, nir

In [6]:
# define Optuna model tuning function
def objective(trial, scale_pos_weight, scoring):
    params = {
        "verbosity": 0,
        "max_depth": trial.suggest_int("max_depth", 1, 7),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1),
        "n_estimators": trial.suggest_int("n_estimators", 50, 300, 10),
        "gamma": trial.suggest_float("gamma", 0, 1),
        "subsample": trial.suggest_float("subsample", 0.5, 1),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-8, 10.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 10.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 300),
    }

    # scale_pos_weight is for balancing imbalanced dataset
    model = XGBClassifier(
        scale_pos_weight=scale_pos_weight, n_jobs=-1, random_state=42, **params
    )
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring=scoring)

    return scores.mean()

In [7]:
# define model tuning function for testing different scoring methods
def model_tuning(scoring, n_trials=2):
    """model_tuning tunes XGBoostClassifier with specified scoring method then save result to dict results.

    Args:
        scoring (str or make_score obj): scoring method. Run help(cross_val_score) for more info.
        n_trials (int, optional): number of trials. Defaults to 2.
    """
    # only 1.7% purchase rate in group 2, so we need to handle the imbalanced training dataset
    scale_pos_weight = len(y_train[y_train == 0]) / len(y_train[y_train == 1])

    # tune model
    study = optuna.create_study(direction="maximize")
    study.optimize(
        lambda trial: objective(trial, scale_pos_weight, scoring), n_trials=n_trials
    )

    # evaluation results
    best_params = study.best_params
    score = study.best_value
    tuned_xgb_clf = XGBClassifier(
        scale_pos_weight=scale_pos_weight, random_state=42, **best_params
    )
    tuned_xgb_clf.fit(X_train, y_train)
    # y_pred = tuned_xgb_clf.predict(X_test)
    # get how many promotions in Test dataset
    df = test_data[["V1", "V2", "V3", "V4", "V5", "V6", "V7"]]
    promotions = promotion_strategy(df, tuned_xgb_clf)
    n_promotions = promotions[promotions == "Yes"].shape[0]
    # model evaluation by test_results
    irr, nir = test_results(promotion_strategy, tuned_xgb_clf)

    # save each result to results
    results["scoring"].append(scoring)
    results["score"].append(score)
    results["irr"].append(irr)
    results["nir"].append(nir)
    results["n_promotions"].append(n_promotions)
    results["n_trials"].append(n_trials)
    results["best_params"].append(best_params)

In [8]:
# define treat score, see https://en.wikipedia.org/wiki/Confusion_matrix
def treat_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return tp / (tp + fn + fp)


# define irr score
def irr_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return tp / (tp + fp) - fn / (fn + tn)


# define nir score
def nir_score(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    return (10 * tp - 0.15 * (tp + fp)) - 10 * fn


# combine irr score and nir score together
def irr_nir_score(y_true, y_pred):
    irr = irr_score(y_true, y_pred)
    nir = nir_score(y_true, y_pred)
    return irr * nir

In [9]:
# define scoring methods
scorers = [
    "accuracy",
    "roc_auc",
    "recall",
    "precision",
    "f1",
    make_scorer(treat_score),
    make_scorer(irr_score),
    make_scorer(nir_score),
    make_scorer(irr_nir_score),
]

# define results as a dict
results = {
    "scoring": [],
    "score": [],
    "irr": [],
    "nir": [],
    "n_promotions": [],
    "n_trials": [],
    "best_params": [],
}

# loop scoring methods
for scorer in scorers:
    model_tuning(scorer, n_trials=500)

[32m[I 2023-04-17 10:35:42,764][0m A new study created in memory with name: no-name-45b9deae-0abc-4ff7-a445-0f4d5b2c2f5e[0m
[32m[I 2023-04-17 10:35:47,400][0m Trial 0 finished with value: 0.6656929426098537 and parameters: {'max_depth': 5, 'learning_rate': 0.0158451926990131, 'n_estimators': 250, 'gamma': 0.9128462077446471, 'subsample': 0.6931884178008995, 'colsample_bytree': 0.5944712441498081, 'reg_alpha': 8.270227584682699, 'reg_lambda': 6.887650915884723, 'min_child_weight': 89}. Best is trial 0 with value: 0.6656929426098537.[0m
[32m[I 2023-04-17 10:35:49,857][0m Trial 1 finished with value: 0.723407659718053 and parameters: {'max_depth': 6, 'learning_rate': 0.04142093305330655, 'n_estimators': 110, 'gamma': 0.07799417248743079, 'subsample': 0.9724431812706014, 'colsample_bytree': 0.6983609991903211, 'reg_alpha': 0.019932033541859667, 'reg_lambda': 6.143213873494366, 'min_child_weight': 14}. Best is trial 1 with value: 0.723407659718053.[0m
[32m[I 2023-04-17 10:35:52,84

In [10]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,scoring,score,irr,nir,n_promotions,n_trials,best_params
0,accuracy,0.952672,0.014332,-5.9,1810,500,"{'max_depth': 7, 'learning_rate': 0.0985469859..."
1,roc_auc,0.65664,0.020359,469.05,17835,500,"{'max_depth': 4, 'learning_rate': 0.0131945820..."
2,recall,0.855563,0.014231,-107.9,28355,500,"{'max_depth': 1, 'learning_rate': 0.0128012149..."
3,precision,0.029975,0.021447,243.45,7555,500,"{'max_depth': 7, 'learning_rate': 0.0671030022..."
4,f1,0.052957,0.020263,438.4,17263,500,"{'max_depth': 4, 'learning_rate': 0.0104123945..."
5,make_scorer(treat_score),0.026832,0.020738,488.2,17313,500,"{'max_depth': 4, 'learning_rate': 0.0119184497..."
6,make_scorer(irr_score),0.018176,0.02044,518.9,19463,500,"{'max_depth': 2, 'learning_rate': 0.0214920286..."
7,make_scorer(nir_score),109.44,0.014231,-107.9,28355,500,"{'max_depth': 1, 'learning_rate': 0.0348369824..."
8,make_scorer(irr_nir_score),1.885476,0.01454,-62.55,28006,500,"{'max_depth': 1, 'learning_rate': 0.0267492934..."


In [11]:
# save model tuning results as a CSV file
results_df.to_csv("../reports/baselines/metric_baseline.csv", index=False)