In [358]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import catboost

#### Marketing Promotion Campaign Uplift Modelling

- **recency** - months since last purchase
- **history** - value of the historical purchases
- **used_discount** - indicates if the customer used a discount before
- **zip_code** - class of the zip code as Suburban/Urban/Rural
- **is_referral** - inicates if the custormer was acquired from referral channel
- **channel** - channels that the customer using. Phone/Web/Multichannel
- **offer** - the offers sent to the customers, Discount/But One Get One/No Offer
- **conversion** - customer conversion (buy or not)

In [359]:
df_data = pd.read_csv('data.csv')

In [360]:
df_data

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0
...,...,...,...,...,...,...,...,...,...
63995,10,105.54,1,0,Urban,0,Web,Discount,0
63996,5,38.91,0,1,Urban,1,Phone,Discount,0
63997,6,29.99,1,0,Urban,1,Phone,Discount,0
63998,1,552.94,1,0,Surburban,1,Multichannel,Buy One Get One,0


### 2).

In [361]:
df_data.rename(columns={
    'offer': 'treatment',
    'conversion': 'target'
}, inplace=True)

In [362]:
df_data.loc[df_data['treatment'] == 'No Offer', 'treatment'] = 0
df_data.loc[df_data['treatment'] != 0, 'treatment'] = 1

In [363]:
df_data['treatment'] = df_data['treatment'].astype(int)

In [364]:
df_data['treatment'].dtype

dtype('int32')

In [365]:
y = df_data['target']
X = df_data.drop(columns='target')

### 3).

In [366]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, stratify=y, random_state=15)

In [367]:
X_train.iloc[:5]

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment
23036,1,526.72,1,0,Urban,1,Web,1
53943,9,670.23,0,1,Surburban,1,Web,1
16392,5,118.7,0,1,Rural,1,Phone,1
60643,2,97.11,1,0,Urban,0,Phone,0
8809,1,1139.94,1,1,Urban,1,Phone,1


In [368]:
treat_train = X_train['treatment']
treat_test = X_test['treatment']

In [369]:
X_train = X_train.drop(columns='treatment')
X_test = X_test.drop(columns='treatment')

### 4).

In [370]:
models_results = {
    'approach': [],
    'uplift@10%': [],
    'uplift@20%': [],
    'uplift@30%': [],
    'uplift@50%': []
}

**Solo model**

In [371]:
from sklift.models import SoloModel
from catboost import CatBoostClassifier
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_curve

In [372]:
cat_features = ['zip_code', 'channel']

In [373]:
sm = SoloModel(
    CatBoostClassifier(iterations=100, random_state=15,
                       silent=True, cat_features=cat_features)
)

In [374]:
sm.fit(X_train, y_train, treat_train)

In [375]:
uplift_sm = sm.predict(X_test)

In [376]:
def score(model_pred, k):
    sm_score_k = uplift_at_k(y_true=y_test, uplift=model_pred,
                           treatment=treat_test, strategy='by_group',
                           k=k)
    return sm_score_k

In [377]:
models_results['approach'].append('SoloModel')
models_results['uplift@10%'].append(score(uplift_sm, 0.1))
models_results['uplift@20%'].append(score(uplift_sm, 0.2))
models_results['uplift@30%'].append(score(uplift_sm, 0.3))
models_results['uplift@50%'].append(score(uplift_sm, 0.5))

**Transformation class**

In [378]:
from sklift.models import ClassTransformation

In [379]:
ct = ClassTransformation(
    CatBoostClassifier(iterations=100, random_state=15,
                       silent=True, cat_features=cat_features)
)

In [380]:
ct.fit(X_train, y_train, treat_train)
uplift_ct = ct.predict(X_test)

In [381]:
models_results['approach'].append('ClassTransformation')
models_results['uplift@10%'].append(score(uplift_ct, 0.1))
models_results['uplift@20%'].append(score(uplift_ct, 0.2))
models_results['uplift@30%'].append(score(uplift_ct, 0.3))
models_results['uplift@50%'].append(score(uplift_ct, 0.5))

In [382]:
models_results

{'approach': ['SoloModel', 'ClassTransformation'],
 'uplift@10%': [0.07679603431914267, 0.06745877824047308],
 'uplift@20%': [0.07077198822173858, 0.05830300370052002],
 'uplift@30%': [0.06875946682618209, 0.05473562783056579],
 'uplift@50%': [0.0641619235553319, 0.05762591369804701]}

**Two independed models**

In [383]:
from sklift.models import TwoModels

In [384]:
tm = TwoModels(
    CatBoostClassifier(iterations=100, random_state=15,
                       silent=True, cat_features=cat_features),
    CatBoostClassifier(iterations=100, random_state=15,
                       silent=True, cat_features=cat_features)
)

In [385]:
tm = tm.fit(X_train, y_train, treat_train)

In [386]:
uplitf_tm = tm.predict(X_test)

In [387]:
models_results['approach'].append('TwoModels')
models_results['uplift@10%'].append(score(uplitf_tm, 0.1))
models_results['uplift@20%'].append(score(uplitf_tm, 0.2))
models_results['uplift@30%'].append(score(uplitf_tm, 0.3))
models_results['uplift@50%'].append(score(uplitf_tm, 0.5))

### 5).

In [388]:
pd.DataFrame(data=models_results)

Unnamed: 0,approach,uplift@10%,uplift@20%,uplift@30%,uplift@50%
0,SoloModel,0.076796,0.070772,0.068759,0.064162
1,ClassTransformation,0.067459,0.058303,0.054736,0.057626
2,TwoModels,0.051746,0.069969,0.068971,0.064731
