### Uplift modeling
----
Для продвижения различных продуктов используются различные подходы:
 - look-alike модель
Наш стандартный подход. Зная, что целевое действие совершается некоторыми клиентами и не совершается другими клиентами, пытаемся построить модель бинарной классификации.
---
 - Response модель.
Модель оценивающая вероятность того, что клиент совершит действие, если с ним прокоммуницировать. Лучше всего подходит в случае, если продукт берут только при условии коммуникации.
---
 - Uplift модель.
Данный тип модели пытается оценить чистый эффект от коммуникации, пытаясь выделить клиентов, которые совершат действие только при  наличии коммуникации.

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
data = pd.read_csv('data.csv')

In [5]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0


In [6]:
data.loc[data.offer!='No Offer', 'offer'] = 'Offer'

In [7]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Offer,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Offer,0
3,9,675.83,1,0,Rural,1,Web,Offer,0
4,2,45.34,1,0,Urban,0,Web,Offer,0


In [8]:
le = LabelEncoder()

In [9]:
cat_features = ['zip_code', 'channel', 'offer']
for column in cat_features:
    le.fit(data[column])
    data[column] = le.transform(data[column])

In [10]:
offer = 1

In [11]:
data.head()

Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,1,0,1,1,0
1,6,329.08,1,1,0,1,2,0,0
2,7,180.65,0,1,1,1,2,1,0
3,9,675.83,1,0,0,1,2,1,0
4,2,45.34,1,0,2,0,2,1,0


In [12]:
X, y = data.drop(columns='conversion'), data.conversion

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

### Single model aproach Или же S-learner

In [14]:
base_model = DecisionTreeClassifier(max_depth=20)

In [15]:
class SLearner:
    def __init__(self, communication_field='offer'):
        self.model = base_model
        self.com_field = communication_field
    def fit(self, X, y):
        self.model.fit(X, y)
    def predict(self, X):
        X_with_communication = X.copy()
        X_without_communication = X.copy()
        X_with_communication[self.com_field] = 1
        X_without_communication[self.com_field] = 0
        pred_with_c = self.model.predict_proba(X_with_communication)[:, 1]
        pred_without_c = self.model.predict_proba(X_without_communication)[:, 1]

        return pred_with_c-pred_without_c

In [16]:
slearner = SLearner()
slearner.fit(X_train, y_train)
pred_slearner = slearner.predict(X_test)

### Two model aproach или же T-leaner

In [17]:
class TLearner:
    def __init__(self, communication_field='offer'):
        self.model_treatment =  DecisionTreeClassifier(max_depth=20)
        self.model_control =  DecisionTreeClassifier(max_depth=20)
        self.com_field = communication_field

    def fit(self, X, y):
        X_with_communication = X[X[self.com_field]==1].drop(columns=self.com_field)
        y_with_communication = y[X[self.com_field]==1]
        X_without_communication = X[X[self.com_field]==0].drop(columns=self.com_field)
        y_without_communication = y[X[self.com_field]==0]
        self.model_treatment.fit(X_with_communication, y_with_communication)
        self.model_control.fit(X_without_communication, y_without_communication)
    def predict(self, X):
        X_with_communication = X.drop(columns=self.com_field)
        X_without_communication = X.drop(columns=self.com_field)
        pred_with_c = self.model_treatment.predict_proba(X_with_communication)[:, 1]
        pred_without_c = self.model_control.predict_proba(X_without_communication)[:, 1]

        return pred_with_c-pred_without_c

In [18]:
tlearner = TLearner()
tlearner.fit(X_train, y_train)
pred_tlearner = tlearner.predict(X_test)

In [19]:
pred_tlearner.max()

1.0

### Две созависимые модели или X-learner

In [20]:
class XLearner:
    def __init__(self, communication_field='offer'):
        self.model_treatment =  DecisionTreeClassifier(max_depth=20)
        self.model_control =  DecisionTreeClassifier(max_depth=20)

        self.final_model_treatment =  DecisionTreeRegressor(max_depth=20)
        self.final_model_control =  DecisionTreeRegressor(max_depth=20)

        self.com_field = communication_field


    def fit(self, X, y):
        X_with_communication = X[X[self.com_field]==1].drop(columns=self.com_field)
        y_with_communication = y[X[self.com_field]==1]
        X_without_communication = X[X[self.com_field]==0].drop(columns=self.com_field)
        y_without_communication = y[X[self.com_field]==0]
        self.model_treatment.fit(X_with_communication, y_with_communication)
        self.model_control.fit(X_without_communication, y_without_communication)

        Yc = self.model_control.predict_proba(X_with_communication)[:, 1]
        Yt = self.model_treatment.predict_proba(X_without_communication)[:, 1]

        ### Вменяемый эффект от коммуникации


        Dt = y_with_communication - Yc
        Dc = Yt - y_without_communication

        self.final_model_treatment.fit(X_with_communication, Dt)
        self.final_model_control.fit(X_without_communication, Dc)

    def predict(self, X, g=0.5):
        pred_treatment = self.final_model_treatment.predict(X.drop(columns=self.com_field))
        pred_control = self.final_model_control.predict(X.drop(columns=self.com_field))
        return g*pred_control + (1-g)*pred_treatment


In [40]:
xlearner = XLearner()
xlearner.fit(X_train, y_train)
pred_xlearner = xlearner.predict(X_test)

### Трансформация таргета

In [22]:
train_Z = np.logical_xor(X_train.offer,y_train).astype(int)

In [23]:
tt_model = DecisionTreeClassifier(max_depth=20)

In [24]:
tt_model.fit(X_train.drop(columns='offer'), train_Z)

DecisionTreeClassifier(max_depth=20)

In [25]:
pred_tt = 2 * tt_model.predict_proba(X_test.drop(columns='offer'))[:, 1] - 1

In [26]:
### Метрики качества

In [28]:
from sklift.metrics import uplift_at_k

In [119]:
uplift_at_k(y_test, pred_slearner, X_test.offer,
                             strategy='overall', k=0.5)

0.07408995795053706

In [117]:
uplift_at_k(y_test, pred_tlearner, X_test.offer,
                             strategy='overall', k=0.5)

0.07467117184399535

In [115]:
uplift_at_k(y_test, pred_xlearner, X_test.offer,
                             strategy='overall', k=0.5)

0.07488237141290721

In [56]:
uplift_at_k(y_test, pred_tt, X_test.offer,
                             strategy='overall', k=0.5)

0.0687058765436402

In [51]:
from sklift.metrics import uplift_by_percentile

In [71]:
uplift_by_percentile(y_test, pred_tlearner, X_test.offer,
                     strategy='overall',
                     total=True, std=True, bins=10)

  return asarray(a).ndim


Unnamed: 0_level_0,n_treatment,n_control,response_rate_treatment,response_rate_control,uplift,std_treatment,std_control,std_uplift
percentile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0-10,1404,708,0.210826,0.112994,0.097832,0.010886,0.011898,0.016127
10-20,1369,743,0.182615,0.119785,0.06283,0.010442,0.011912,0.015841
20-30,1397,715,0.170365,0.100699,0.069666,0.010059,0.011254,0.015094
30-40,1408,704,0.158381,0.082386,0.075994,0.00973,0.010363,0.014215
40-50,1393,719,0.147164,0.079277,0.067888,0.009492,0.010076,0.013843
50-60,1385,727,0.163177,0.101788,0.061389,0.009929,0.011214,0.014978
60-70,1419,693,0.117689,0.103896,0.013792,0.008554,0.011591,0.014406
70-80,1437,675,0.161447,0.112593,0.048855,0.009706,0.012166,0.015564
80-90,1411,701,0.167257,0.095578,0.07168,0.009935,0.011105,0.014901
90-100,1413,699,0.203822,0.111588,0.092234,0.010717,0.011909,0.016021


### Посмотрим результат работы обычной модели

In [120]:
model = DecisionTreeClassifier(max_depth=20)

In [124]:
model.fit(X_train.drop(columns='offer'), y_train)

DecisionTreeClassifier(max_depth=20)

In [127]:
standard_preds = model.predict_proba(X_test.drop(columns='offer'))[:, 1]

In [130]:
uplift_at_k(y_test, standard_preds, X_test.offer,
                     strategy='overall', k=0.5)

0.06703093254469168

In [128]:
uplift_by_percentile(y_test, standard_preds, X_test.offer,
                     strategy='overall',
                     total=True, std=True, bins=10)

  return asarray(a).ndim


Unnamed: 0_level_0,n_treatment,n_control,response_rate_treatment,response_rate_control,uplift,std_treatment,std_control,std_uplift
percentile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0-10,1414,698,0.199434,0.106017,0.093417,0.010626,0.011653,0.01577
10-20,1401,711,0.212705,0.144866,0.067839,0.010933,0.0132,0.01714
20-30,1423,689,0.159522,0.103048,0.056474,0.009707,0.011582,0.015112
30-40,1357,755,0.153279,0.090066,0.063213,0.00978,0.010419,0.014289
40-50,1395,717,0.127599,0.075314,0.052285,0.008933,0.009855,0.013301
50-60,1398,714,0.158798,0.105042,0.053756,0.009775,0.011474,0.015074
60-70,1428,684,0.170868,0.096491,0.074377,0.00996,0.01129,0.015055
70-80,1398,714,0.157368,0.098039,0.059328,0.009739,0.011129,0.014789
80-90,1392,720,0.173851,0.094444,0.079406,0.010158,0.010899,0.014898
90-100,1430,682,0.167832,0.108504,0.059328,0.009883,0.011909,0.015476
