In [None]:
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import average_precision_score, roc_auc_score
from sklift.models import ClassTransformation

In [None]:

# --- Feature engineering for transactions + demographics ---

def create_features(df):
    df = df.copy()
    # Transaction frequency features
    df['txns_per_day'] = df.groupby('customer_id')['transaction_id'].transform('count') / df.groupby('customer_id')['transaction_date'].transform(lambda x: (x.max() - x.min()).days + 1)

    # Monetary features
    df['avg_txn_amount'] = df.groupby('customer_id')['amount'].transform('mean')
    df['txn_amount_std'] = df.groupby('customer_id')['amount'].transform('std')

    # Recency features
    df['days_since_last_txn'] = (df['transaction_date'].max() - df['transaction_date']).dt.days

    # Ratio features
    df['high_value_txn_ratio'] = (df['amount'] > df['amount'].quantile(0.9)).astype(int)
    df['high_value_txn_ratio'] = df.groupby('customer_id')['high_value_txn_ratio'].transform('mean')

    # Example demographic & behavioral features
    if 'age' in df.columns:
        df['is_senior'] = (df['age'] > 60).astype(int)
    if 'income' in df.columns:
        df['log_income'] = np.log1p(df['income'])
    if 'interest_category' in df.columns:
        df['interest_category'] = df['interest_category'].astype('category')

    df.fillna(0, inplace=True)
    return df

In [None]:
# Prob. of Customer x1 converting due to a given Policy Change

# --- Example uplift modeling pipeline ---
# y = binary outcome (e.g., conversion)
# treatment = whether customer was exposed to a policy or control group
# X = customer features (transaction + demographics)

X = create_features(X)
categoricals = [c for c in X.columns if X[c].dtype=='object' or str(X[c].dtype).startswith('category')]

# CatBoost inside uplift framework
cat_model = CatBoostClassifier(
    iterations=2000,
    depth=6,
    learning_rate=0.05,
    loss_function='Logloss',
    eval_metric='AUC',
    random_seed=42,
    verbose=False
)

uplift_model = ClassTransformation(estimator=cat_model)
uplift_model.fit(X, y, treatment)

uplift_preds = uplift_model.predict(X)

print("Uplift predictions:", uplift_preds[:10])



In [None]:

# --- Evaluation ---
# Use uplift metrics: Qini coefficient, uplift curve
# Use cross-validation and compare to baseline classifiers

# --- Production strategy ---
# 1. Continuously update transaction + behavior data for each customer.
# 2. Retrain uplift model monthly/quarterly or when new policy tested.
# 3. Track effectiveness with Qini curves in production.
# 4. Use MLflow to log experiments.
# 5. Deploy via FastAPI/Flask to assign best policy per customer.