### Uplift modeling
----
Для продвижения различных продуктов используются различные подходы:
 - look-alike модель
Наш стандартный подход. Зная, что целевое действие совершается некоторыми клиентами и не совершается другими клиентами, пытаемся построить модель бинарной классификации.
---
 - Response модель.
Модель оценивающая вероятность того, что клиент совершит действие, если с ним прокоммуницировать. Лучше всего подходит в случае, если продукт берут только при условии коммуникации.
---
 - Uplift модель.
Данный тип модели пытается оценить чистый эффект от коммуникации, пытаясь выделить клиентов, которые совершат действие только при  наличии коммуникации.

In [31]:
import numpy as np
import pandas as pd

In [25]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.preprocessing import LabelEncoder

In [14]:
data = pd.read_csv('data.csv')

In [15]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0


In [16]:
data.loc[data.offer!='No Offer', 'offer'] = 'Offer'

In [17]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Offer,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Offer,0
3,9,675.83,1,0,Rural,1,Web,Offer,0
4,2,45.34,1,0,Urban,0,Web,Offer,0


In [18]:
le = LabelEncoder()

In [19]:
cat_features = ['zip_code', 'channel', 'offer']
for column in cat_features:
    le.fit(data[column])
    data[column] = le.transform(data[column])

In [21]:
offer = 1

In [22]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,1,0,1,1,0
1,6,329.08,1,1,0,1,2,0,0
2,7,180.65,0,1,1,1,2,1,0
3,9,675.83,1,0,0,1,2,1,0
4,2,45.34,1,0,2,0,2,1,0


In [56]:
X, y = data.drop(columns='conversion'), data.conversion

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Single model aproach Или же S-learner

In [58]:
base_model = DecisionTreeClassifier(max_depth=20)

In [66]:
class SLearner:
    def __init__(self, communication_field='offer'):
        self.model = base_model
        self.com_field = communication_field
    def fit(self, X, y):
        self.model.fit(X, y)
    def predict(self, X):
        X_with_communication = X.copy()
        X_without_communication = X.copy()
        X_with_communication[self.com_field] = 1
        X_without_communication[self.com_field] = 0
        pred_with_c = self.model.predict_proba(X_with_communication)[:, 1]
        pred_without_c = self.model.predict_proba(X_without_communication)[:, 1]

        return pred_with_c-pred_without_c

In [78]:
slearner = SLearner()
slearner.fit(X_train, y_train)
pred_slearner = slearner.predict(X_test)

### Two model aproach или же T-leaner

In [81]:
class TLearner:
    def __init__(self, communication_field='offer'):
        self.model_treatment =  DecisionTreeClassifier(max_depth=20)
        self.model_control =  DecisionTreeClassifier(max_depth=20)
        self.com_field = communication_field

    def fit(self, X, y):
        X_with_communication = X[X[self.com_field]==1].drop(columns=self.com_field)
        y_with_communication = y[X[self.com_field]==1]
        X_without_communication = X[X[self.com_field]==0].drop(columns=self.com_field)
        y_without_communication = y[X[self.com_field]==0]
        self.model_treatment.fit(X_with_communication, y_with_communication)
        self.model_control.fit(X_without_communication, y_without_communication)
    def predict(self, X):
        X_with_communication = X.drop(columns=self.com_field)
        X_without_communication = X.drop(columns=self.com_field)
        pred_with_c = self.model_treatment.predict_proba(X_with_communication)[:, 1]
        pred_without_c = self.model_control.predict_proba(X_without_communication)[:, 1]

        return pred_with_c-pred_without_c

In [82]:
tlearner = TLearner()
tlearner.fit(X_train, y_train)
pred_tlearner = tlearner.predict(X_test)

In [85]:
pred_tlearner.max()

1.0

In [75]:
X_train[X_train.offer==1]

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer
44233,10,29.99,1,0,2,0,2,1
56296,8,292.30,1,1,2,1,2,1
23444,8,49.05,1,0,2,0,1,1
48936,12,324.32,1,0,2,0,2,1
57313,6,29.99,1,0,1,0,1,1
...,...,...,...,...,...,...,...,...
54343,10,29.99,0,1,1,0,2,1
62570,3,276.21,0,1,2,0,1,1
38158,4,43.49,0,1,2,0,1,1
860,8,492.53,1,1,0,1,1,1


NameError: name 'copy' is not defined

In [50]:
pred.min()

1.0

In [42]:
X_train.copy()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer
44233,10,29.99,1,0,2,0,2,1
58749,9,60.25,0,1,2,0,1,0
56296,8,292.30,1,1,2,1,2,1
23444,8,49.05,1,0,2,0,1,1
48936,12,324.32,1,0,2,0,2,1
...,...,...,...,...,...,...,...,...
62570,3,276.21,0,1,2,0,1,1
38158,4,43.49,0,1,2,0,1,1
860,8,492.53,1,1,0,1,1,1
15795,5,72.28,1,0,2,1,2,1


In [20]:
data

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,1,0,1,1,0
1,6,329.08,1,1,0,1,2,0,0
2,7,180.65,0,1,1,1,2,1,0
3,9,675.83,1,0,0,1,2,1,0
4,2,45.34,1,0,2,0,2,1,0
...,...,...,...,...,...,...,...,...,...
63995,10,105.54,1,0,2,0,2,1,0
63996,5,38.91,0,1,2,1,1,1,0
63997,6,29.99,1,0,2,1,1,1,0
63998,1,552.94,1,0,1,1,0,1,0


In [12]:
data

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,1,0,1,0,0
1,6,329.08,1,1,0,1,2,2,0
2,7,180.65,0,1,1,1,2,0,0
3,9,675.83,1,0,0,1,2,1,0
4,2,45.34,1,0,2,0,2,0,0
...,...,...,...,...,...,...,...,...,...
63995,10,105.54,1,0,2,0,2,1,0
63996,5,38.91,0,1,2,1,1,1,0
63997,6,29.99,1,0,2,1,1,1,0
63998,1,552.94,1,0,1,1,0,0,0


In [6]:
data_treatment = data[data.offer != 'No Offer']
data_control = data[data.offer == 'No Offer']

In [10]:
data

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0
...,...,...,...,...,...,...,...,...,...
63995,10,105.54,1,0,Urban,0,Web,Discount,0
63996,5,38.91,0,1,Urban,1,Phone,Discount,0
63997,6,29.99,1,0,Urban,1,Phone,Discount,0
63998,1,552.94,1,0,Surburban,1,Multichannel,Buy One Get One,0


In [5]:
data[data.offer == 'No Offer']

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
1,6,329.08,1,1,Rural,1,Web,No Offer,0
14,4,241.42,0,1,Rural,1,Multichannel,No Offer,0
15,3,58.13,1,0,Urban,1,Web,No Offer,1
20,9,29.99,0,1,Surburban,1,Phone,No Offer,0
23,2,29.99,0,1,Urban,1,Phone,No Offer,0
...,...,...,...,...,...,...,...,...,...
63980,3,487.10,0,1,Surburban,1,Phone,No Offer,0
63981,4,125.53,0,1,Rural,1,Phone,No Offer,0
63983,2,83.03,0,1,Urban,0,Phone,No Offer,0
63987,1,79.70,1,0,Surburban,1,Web,No Offer,0
