In [8]:
import numpy as np
import pandas as pd
from scipy.special import expit

def simulate_free_delivery_subscription_extended(n_samples=5000, seed=42):
    np.random.seed(seed)
    age = np.random.randint(18, 70, size=n_samples)
    purchase_freq = np.random.poisson(lam=2, size=n_samples)
    avg_spend = np.round(np.random.gamma(shape=2.0, scale=50.0, size=n_samples), 2)
    gender = np.random.binomial(1, p=0.5, size=n_samples)
    regions = np.random.choice(['Urban', 'Suburban', 'Rural'], size=n_samples, p=[0.5, 0.3, 0.2])
    treatment = np.random.binomial(1, 0.5, size=n_samples)

    region_effect_map = {'Urban': 0.3, 'Suburban': 0.2, 'Rural': 0.0}
    region_effect = np.array([region_effect_map[r] for r in regions])

    log_odds_baseline = (-4.0 - 0.02 * age + 0.5 * purchase_freq + 0.01 * avg_spend + 0.1 * gender + region_effect)
    treatment_effect = 1.0
    log_odds = log_odds_baseline + treatment * treatment_effect
    prob_subscription = expit(log_odds)
    subscription = np.random.binomial(1, prob_subscription)

    df = pd.DataFrame({
        'age': age,
        'purchase_freq': purchase_freq,
        'avg_spend': avg_spend,
        'gender': gender,
        'region': regions,
        'treatment': treatment,
        'subscription': subscription
    })
    return df


In [9]:
df = simulate_free_delivery_subscription_extended()
df.head()

Unnamed: 0,age,purchase_freq,avg_spend,gender,region,treatment,subscription
0,56,3,48.02,1,Suburban,0,0
1,69,2,23.69,1,Rural,1,0
2,46,0,32.95,0,Urban,0,0
3,32,1,97.25,0,Suburban,1,0
4,60,3,53.49,0,Urban,0,0


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

X = df.drop(columns=['subscription', 'treatment'])
T = df['treatment']
y = df['subscription']

X_train, X_test, T_train, T_test, y_train, y_test = train_test_split(
    X, T, y, test_size=0.2, random_state=42
)

encoder = OneHotEncoder(drop='first', sparse_output=False, handle_unknown='ignore')
X_train_encoded = encoder.fit_transform(X_train[['region']])
X_test_encoded = encoder.transform(X_test[['region']])

encoded_cols = encoder.get_feature_names_out(['region'])
X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=encoded_cols, index=X_train.index)
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=encoded_cols, index=X_test.index)

X_train_final = pd.concat([X_train.drop(columns='region'), X_train_encoded_df], axis=1)
X_test_final = pd.concat([X_test.drop(columns='region'), X_test_encoded_df], axis=1)

print(X_train_final.columns)


Index(['age', 'purchase_freq', 'avg_spend', 'gender', 'region_Suburban',
       'region_Urban'],
      dtype='object')


In [11]:
from sklift.models import SoloModel, TwoModels
from sklearn.ensemble import GradientBoostingClassifier
from sklift.metrics import qini_curve, qini_auc_score
import matplotlib.pyplot as plt
import numpy as np

In [None]:
s_learner = SoloModel(
    estimator=GradientBoostingClassifier(n_estimators=100, random_state=42),
    method='treatment_interaction'
)

s_learner.fit(X_train_final, y_train, T_train)
uplift_preds_s = s_learner.predict(X_test_final)

In [None]:
qini_x_s, qini_y_s = qini_curve(y_test, uplift_preds_s, T_test)
qini_auc_s = qini_auc_score(y_test, uplift_preds_s, T_test)

plt.figure(figsize=(8, 6))
plt.plot(qini_x_s, qini_y_s, label='S-Learner Qini Curve', linewidth=2)
plt.plot(qini_x_s, np.linspace(0, qini_y_s[-1], len(qini_x_s)), 'r--', label='Random Targeting')
plt.text(0.95, 0.05, f'Qini AUC = {qini_auc_s:.4f}', transform=plt.gca().transAxes,
         fontsize=12, bbox=dict(facecolor='white', alpha=0.7), horizontalalignment='right')
plt.xlabel("Proportion of Population")
plt.ylabel("Cumulative Incremental Outcome")
plt.title("Qini Curve - S-Learner")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
t_learner = TwoModels(
    estimator_trmnt=GradientBoostingClassifier(n_estimators=100, random_state=42),
    estimator_ctrl=GradientBoostingClassifier(n_estimators=100, random_state=42)
)

t_learner.fit(X_train_final, y_train, T_train)
uplift_preds_t = t_learner.predict(X_test_final)

In [None]:
qini_x_t, qini_y_t = qini_curve(y_test, uplift_preds_t, T_test)
qini_auc_t = qini_auc_score(y_test, uplift_preds_t, T_test)

plt.figure(figsize=(8, 6))
plt.plot(qini_x_t, qini_y_t, label='T-Learner Qini Curve', linewidth=2)
plt.plot(qini_x_t, np.linspace(0, qini_y_t[-1], len(qini_x_t)), 'r--', label='Random Targeting')
plt.text(0.95, 0.05, f'Qini AUC = {qini_auc_t:.4f}', transform=plt.gca().transAxes,
         fontsize=12, bbox=dict(facecolor='white', alpha=0.7), horizontalalignment='right')
plt.xlabel("Proportion of Population")
plt.ylabel("Cumulative Incremental Outcome")
plt.title("Qini Curve - T-Learner")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
print(f"S-Learner Qini AUC: {qini_auc_s:.4f}")
print(f"T-Learner Qini AUC: {qini_auc_t:.4f}")