In [None]:
#! pip install git+https://github.com/sberbank-ai-lab/LightAutoML.git@master

# Imports

In [1]:
import pandas as pd

In [2]:
from lightautoml.tasks import Task
from lightautoml.addons.uplift.base import AutoUplift
from lightautoml.addons.uplift.metrics import calculate_uplift_auc

In [3]:
from sklearn.model_selection import train_test_split

# Load data & prepare

In [6]:
train_df = pd.read_csv('train.csv')
test_df  = pd.read_csv('test.csv')
submission_df = pd.read_csv('submission_uplift.csv')

In [7]:
train_df.head()

Unnamed: 0,id,age,cheque_count_12m_g20,cheque_count_12m_g21,cheque_count_12m_g25,cheque_count_12m_g32,cheque_count_12m_g33,cheque_count_12m_g38,cheque_count_12m_g39,cheque_count_12m_g41,...,sale_sum_3m_g26,sale_sum_3m_g32,sale_sum_3m_g33,sale_sum_6m_g24,sale_sum_6m_g25,sale_sum_6m_g26,sale_sum_6m_g32,sale_sum_6m_g33,sale_sum_6m_g44,sale_sum_6m_g54
0,0,34.0,1.0,2.0,2.0,2.0,3.0,2.0,0.0,2.0,...,55.78,42.19,637.99,0.0,47.49,55.78,42.19,637.99,553.14,114.96
1,1,27.0,35.0,38.0,4.0,50.0,7.0,38.0,2.0,11.0,...,92.77,1237.9,210.9,745.46,299.69,340.97,3543.02,524.12,677.29,707.53
2,2,28.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,...,,,,169.52,0.0,26.99,0.0,27.59,0.0,208.63
3,3,32.0,0.0,4.0,4.0,2.0,3.0,2.0,2.0,5.0,...,846.8,213.37,556.1,841.96,430.47,1015.73,213.37,556.1,169.49,288.63
4,4,51.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,...,18.89,0.0,0.0,68.93,0.0,34.98,0.0,0.0,1395.36,139.19


In [8]:
TARGET_NAME = 'target'
TREATMENT_NAME = 'group'

In [9]:
train_df[TREATMENT_NAME] = (train_df[TREATMENT_NAME] == 'test').astype(int)

In [10]:
train_df.drop('id', axis=1, inplace=True)
test_df.drop('id', axis=1, inplace=True)

In [11]:
stratify_val = train_df[[TARGET_NAME, TREATMENT_NAME]]

In [12]:
train_df, valid_df = train_test_split(
    train_df,
    stratify=stratify_val,
    shuffle=True,
    random_state=100
)

In [13]:
valid_target = valid_df[TARGET_NAME].values.ravel()
valid_treatment = valid_df[TREATMENT_NAME].values.ravel()

# Training

In [14]:
TIMEOUT = 60 * 3  # 30 min

In [15]:
roles = {
    'target': TARGET_NAME,
    'treatment': TREATMENT_NAME
}

In [16]:
autouplift = AutoUplift(
    base_task=Task('binary'),
    timeout=TIMEOUT
)

In [17]:
%%time

autouplift.fit(train_df, roles)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Wall time: 5min 6s


In [18]:
uplift_pred, _, _  = autouplift.predict(valid_df)

In [19]:
cum_gain = calculate_uplift_auc(
    valid_target,
    uplift_pred,
    valid_treatment
)

In [20]:
print(cum_gain)

0.021101254029912274


# Make submission

In [21]:
uplift_pred, _, _ = autouplift.predict(test_df)

In [22]:
submission_df['uplift'] = uplift_pred

In [24]:
submission_df.to_csv('baseline_submission.csv', index=False)