In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('data.csv')
df.rename(columns={'conversion' : 'target', 'offer': 'treatment'}, inplace=True)
df.head(3)

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment,target
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0


In [3]:
df['target'].value_counts()

0    54606
1     9394
Name: target, dtype: int64

#### Посмотрим на распределение признаков

In [4]:
df['treatment'].value_counts()

Buy One Get One    21387
Discount           21307
No Offer           21306
Name: treatment, dtype: int64

#### Приведем treatment к бинарному виду (1 - есть предложение, 0 - нет)

In [5]:
df['treatment'] = df['treatment'].map({'Buy One Get One': 1, 'Discount' : 1, 'No Offer' : 0})

In [6]:
df.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,treatment,target
0,10,142.44,1,0,Surburban,0,Phone,1,0
1,6,329.08,1,1,Rural,1,Web,0,0
2,7,180.65,0,1,Surburban,1,Web,1,0
3,9,675.83,1,0,Rural,1,Web,1,0
4,2,45.34,1,0,Urban,0,Web,1,0


In [7]:
indices_train = df.index
indices_learn, indices_valid = train_test_split(df.index, test_size=0.3, random_state=123)

In [8]:
X_train = df.loc[indices_learn, :]
y_train = df.loc[indices_learn, 'target']
treat_train = df.loc[indices_learn, 'treatment']

X_val = df.loc[indices_valid, :]
y_val = df.loc[indices_valid, 'target']
treat_val =  df.loc[indices_valid, 'treatment']

#X_test = df.loc[indices_test, :]

cat_features = ['zip_code', 'channel']

models_results = {
    'approach': [],
    'uplift@10%': [],
    'uplift@20%': []
}

#### Посчитаем uplift, используя одну модель с признаком коммуникации

In [9]:
# Инструкция по установке пакета: https://github.com/maks-sh/scikit-uplift
# Ссылка на документацию: https://scikit-uplift.readthedocs.io/en/latest/
from sklift.metrics import uplift_at_k
from sklift.viz import plot_uplift_preds
from sklift.models import SoloModel

# sklift поддерживает любые модели, 
# которые удовлетворяют соглашениями scikit-learn
# Для примера воспользуемся catboost
from catboost import CatBoostClassifier


sm = SoloModel(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
sm = sm.fit(X_train, y_train, treat_train, estimator_fit_params={'cat_features': cat_features})

uplift_sm = sm.predict(X_val)
models_results['approach'].append('SoloModel')

for i in range(1,3):
    sm_score = uplift_at_k(y_true=y_val, uplift=uplift_sm, treatment=treat_val, strategy='by_group', k=0.1 * i)
    models_results[f'uplift@{i}0%'].append(sm_score)
    print(f'uplift@30%: {sm_score:.4f}')

uplift@30%: 0.0000
uplift@30%: 0.3124


#### Посчитаем uplift с трансформацией классов

In [10]:
from sklift.models import ClassTransformation

ct = ClassTransformation(CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True))
ct = ct.fit(X_train, y_train, treat_train, estimator_fit_params={'cat_features': cat_features})

uplift_ct = ct.predict(X_val)

models_results['approach'].append('ClassTransformation')
for i in range(1,3):
    ct_score = uplift_at_k(y_true=y_val, uplift=uplift_ct, treatment=treat_val, strategy='by_group', k=0.1*i)
    models_results[f'uplift@{i}0%'].append(ct_score)
    print(f'uplift@30%: {ct_score:.4f}')

  ct = ct.fit(X_train, y_train, treat_train, estimator_fit_params={'cat_features': cat_features})


uplift@30%: 1.0000
uplift@30%: 0.8390


#### Посчитаем uplift, используя две независимые модели

In [11]:
from sklift.models import TwoModels

tm = TwoModels(
    estimator_trmnt=CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True), 
    estimator_ctrl=CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True), 
    method='vanilla'
)

tm = tm.fit(
    X_train, y_train, treat_train,
    estimator_trmnt_fit_params={'cat_features': cat_features}, 
    estimator_ctrl_fit_params={'cat_features': cat_features}
)

uplift_tm = tm.predict(X_val)
models_results['approach'].append('TwoModels')

for i in range(1,3):
    tm_score = uplift_at_k(y_true=y_val, uplift=uplift_tm, treatment=treat_val, strategy='by_group', k=0.1*i)
    models_results[f'uplift@{i}0%'].append(tm_score)
    print(f'uplift@30%: {tm_score:.4f}')

uplift@30%: 0.2444
uplift@30%: 0.1791


In [12]:
models_results

{'approach': ['SoloModel', 'ClassTransformation', 'TwoModels'],
 'uplift@10%': [0.0, 1.0, 0.2443547204847537],
 'uplift@20%': [0.31243710922235246, 0.8389996092223525, 0.17913919011332552]}