In [15]:
! ls

AzureML		       catboost		lentahack		train_as_is.csv
MMLSpark	       dana2_eda.ipynb	offers_date.csv		Плавильня.ipynb
Nazar.ipynb	       dana_eda.ipynb	pytorch
SparkML		       h2o		test_Offer_ID_map.json
alisa-data-prep.ipynb  julia		test_as_is.csv


In [1]:
import json
import numpy as np

In [2]:
import os

import joblib
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

# regression models
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor


In [3]:
from catboost import Pool, CatBoostRegressor

In [14]:
def load_dataset(path, is_test=False, y_col='UpLift'):
    df = pd.read_csv(path)
    if is_test: 
        return df
    return df[df.columns[:-1]], df[y_col]


def build_model(mdl):
    model = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore')), 
                      ("model", mdl)])
    return model


def tune_model():
    X_train, y_train = load_dataset(X_TRAIN, Y_TRAIN)
    model = build_model()

    gs = GridSearchCV(model, GRID_PARAMS, scoring="accuracy", n_jobs=-1, cv=5)
    gs.fit(X_train, y_train)

    print("Best Hyperparameters: {}".format(gs.best_params_))
    print("Best score: {:.2f}%".format(100 * gs.best_score_))


def train_model(print_params=False):
    X_train, y_train = load_dataset(PATH)

    model = build_model()
    model.set_params(**PARAMS)

    if print_params:
        print(model.get_params())

    model.fit(X_train, y_train)

    joblib.dump(model, MODEL_NAME)


def test_model():
    X_test, y_test = load_dataset(X_TEST, Y_TEST)
    model = joblib.load(MODEL_NAME)

    y_pred = model.predict(X_test)

    print("MAE on the test set: {:.2f}%".format(
        100 * mean_absolute_error(y_test, y_pred)))

In [None]:
num_attributes = housing_X.select_dtypes(exclude='object')
cat_attributes = housing_X.select_dtypes(include='object')


In [66]:
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

def build_model2(mdl):
    global num_attribs
    global cat_attribs
    
    cat_pipeline = Pipeline([
     ('imputer', SimpleImputer(strategy='most_frequent')),
     ('onehot', OneHotEncoder(handle_unknown='ignore'))    
       ])
    num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="constant", fill_value=0)),
        ('std_scaler', StandardScaler()),
    ])
    
    preproc_pipeline = ColumnTransformer([
        ("num", num_pipeline, num_attribs),
        ("cat", cat_pipeline, cat_attribs),
    ])
        
    model = Pipeline([('preproc', preproc_pipeline), 
                      ("model", mdl)])
    return model

In [80]:
def build_model2(mdl, num_cols):
    
    num_pipeline = Pipeline([
        ('imputer', SimpleImputer(strategy="constant", fill_value=0)),
        ('std_scaler', StandardScaler()),
    ])
    
    model = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore')),
                      ColumnTransformer([("num", num_pipeline, num_cols)]), 
                      ("model", mdl)])
    return model

In [5]:
# submission prep 

def submission_prep(y_pred, offer_id_map_path, output_id):
    output_path = f'./data/submission{np.random.randint(1000, 2000)}.csv'   
    if  output_id: 
        output_path = f'./data/submission{output_id}.csv'
    Offer_IDs_test = json.load(open(offer_id_map_path, 'r'))
    sample_submission = pd.read_csv('./lentahack/20210521_sample_submission.csv')
    subm_offer_id_order = list(sample_submission['Offer_ID'])

    # check that we have correct Offer ID
    #set(subm_offer_id_order).difference(Offer_IDs_test)

    order_map = {k: v for v, k in enumerate(subm_offer_id_order)}

    submission = [None]*len(order_map)
    for offer_id, pred in zip(Offer_IDs_test, y_pred): 
        submission[order_map[offer_id]] = [offer_id, pred]

    print('Printing to', output_path)     
    with open(output_path, 'w') as fout: 
        print('Offer_ID,UpLift', file=fout)
        for o_id, y in submission: 
            print(f'{o_id},{y}', file=fout)

In [6]:
PATH = 'train_as_is.csv'
PATH_TEST = 'test_as_is.csv'
X_TRAIN, Y_TRAIN = load_dataset(PATH)
MODEL = LGBMRegressor()
GRID_PARAMS = _
PARAMS = _
MODEL_NAME = 'model_1'

In [2]:
X_tr, y_tr = load_dataset(PATH)

In [9]:
X_test = pd.read_csv('test_as_is.csv')

In [5]:
model = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore')), 
                  ("model", MODEL) 
                  ])

In [6]:
model.fit(X_tr, y_tr)

Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore')),
                ('model', LGBMRegressor())])

In [7]:
# checking right on the Training set 
y_pred =  model.predict(X_tr)
mean_absolute_error(y_tr, y_pred)

3.397615029802757

In [10]:
y_pred = model.predict(X_test)

In [13]:
len(y_pred), y_pred[:10]

(149,
 array([ 3.40316398,  5.35092376,  7.51223333, 10.14554485,  3.63715244,
         6.10871633,  7.56703453,  9.08632804, 10.60949484,  5.35092376]))

## 1. Feat Set: Category Counts

In [12]:
!ls data

test_Offer_ID_map_category_ct.json  test_category_ct.csv  train_category_ct.csv


In [8]:
PATH_TR = './data/train_category_ct.csv'
PATH_TEST = './data/test_category_ct.csv'
X_all = pd.read_csv(PATH_TR)
X_tr_orig, y_tr_orig = load_dataset(PATH_TR)
X_test = load_dataset(PATH_TEST, True)

X_train, X_val, y_train, y_val = train_test_split(X_tr_orig, y_tr_orig, test_size=0.2)

MODEL = LGBMRegressor()
GRID_PARAMS = _
PARAMS = _
MODEL_NAME = 'model_LGBMRegressor'

## 2. Feat Set: Other Offers

In [34]:
!ls data

off_to_skus_map.json	      submission5.csv
offers_df_agg.csv	      submission6.csv
offers_df_agg.json	      test_Offer_ID_map_category_ct.json
offers_df_agg_sales_feat.csv  test_category_ct.csv
submission1.csv		      test_other_ofrs.csv
submission1hr.csv	      test_sales_feats.csv
submission2.csv		      train_category_ct.csv
submission3.csv		      train_other_ofrs.csv
submission4.csv		      train_sales_feats.csv


## 3 Feat Set: Sales Nums

In [35]:
PATH_TR = './data/train_sales_feats.csv'
PATH_TEST = './data/test_sales_feats.csv'
X_all = pd.read_csv(PATH_TR)
X_tr_orig, y_tr_orig = load_dataset(PATH_TR)
X_test = load_dataset(PATH_TEST, True)

In [36]:
cols_to_exclude = ['num_other_promos', 'num_skus_in_other_promos']
X_tr_orig = X_tr_orig.drop(cols_to_exclude, axis=1)
X_test = X_test.drop(cols_to_exclude, axis=1)

In [38]:
X_train, X_val, y_train, y_val = train_test_split(X_tr_orig, y_tr_orig, test_size=0.2)

## 4 Feat Set: Sales Nums w Diff

In [40]:
ls data

off_to_skus_map.json                 submission6.csv
offers_df_agg.csv                    test_Offer_ID_map_category_ct.json
offers_df_agg.json                   test_category_ct.csv
offers_df_agg_sales_feat.csv         test_other_ofrs.csv
offers_df_agg_sales_feat_w_diff.csv  test_sales_feats.csv
submission1.csv                      test_sales_feats_w_diff.csv
submission1hr.csv                    train_category_ct.csv
submission2.csv                      train_other_ofrs.csv
submission3.csv                      train_sales_feats.csv
submission4.csv                      train_sales_feats_w_diff.csv
submission5.csv


In [145]:
PATH_TR = './data/train_sales_feats_w_diff.csv'
PATH_TEST = './data/test_sales_feats_w_diff.csv'
X_all = pd.read_csv(PATH_TR)
X_tr_orig, y_tr_orig = load_dataset(PATH_TR)

In [146]:
X_tr_orig.drop(['pred_start_date', 'pred_end_date'], inplace=True, axis=1)

In [147]:
X_test = load_dataset(PATH_TEST, True)
X_test.drop(['pred_start_date', 'pred_end_date'], inplace=True, axis=1)

In [148]:
X_train, X_val, y_train, y_val = train_test_split(X_tr_orig, y_tr_orig, test_size=0.2)

In [90]:
num_cols = X_train.columns[-15:]

In [None]:
cols_to_exclude = ['num_other_promos', 'num_skus_in_other_promos']


X_tr_orig = X_tr_orig.drop(cols_to_exclude, axis=1)
X_test = X_test.drop(cols_to_exclude, axis=1)

## Model Zoo

In [171]:
regressors = [LGBMRegressor(), GradientBoostingRegressor(), AdaBoostRegressor(), XGBRegressor(), Lasso(), Ridge(), 
             CatBoostRegressor(silent=True)]

In [102]:
model0 = build_model(LGBMRegressor())
model1 = build_model(GradientBoostingRegressor()) 
model2 = build_model(AdaBoostRegressor()) 
model3 = build_model(XGBRegressor()) 
model4 = build_model(Lasso()) 
model5 = build_model(CatBoostRegressor(silent=True)) 

In [103]:
models = [model0, model1, model2, model3, model4, model5]

In [105]:
scaler = StandardScaler()

In [104]:
# validation 
for m in [model0, model1, model3, model5]: 
    m.fit(X_train, y_train)
    y_pred_val = m.predict(X_val)
    y_pred_tr = m.predict(X_train)
    print(m["model"])
    print(mean_absolute_error(y_val, y_pred_val), mean_absolute_error(y_train, y_pred_tr)) 

LGBMRegressor()
3.9571454947326212 2.517985898171956
GradientBoostingRegressor()
4.1053911447005556 2.6183487513269044
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=48, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)
3.8755670332782994 0.912776474861224
<catboost.core.CatBoostRegressor object at 0x7f78d6889e50>
3.8257309888659323 1.9794538236604111


In [144]:
model5.fit(X_tr_orig, y_tr_orig)
y_pred = model5.predict(X_test)

submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', '_diff')

Printing to ./data/submission_diff.csv


In [49]:
#model1.fit(X_tr_orig, y_tr_orig)
y_pred = model1.predict(X_test)

submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', '_diffm1')

Printing to ./data/submission_diffm1.csv


### Model Zoo w/ Scaler

In [95]:
%who

AdaBoostRegressor	 CatBoostRegressor	 ColumnTransformer	 DecisionTreeRegressor	 ElasticNet	 GRID_PARAMS	 GradientBoostingRegressor	 GridSearchCV	 KNeighborsRegressor	 
LGBMRegressor	 Lasso	 LinearRegression	 MODEL	 MODEL_NAME	 OneHotEncoder	 PARAMS	 PATH	 PATH_TEST	 
PATH_TR	 Pipeline	 Pool	 RandomForestRegressor	 Ridge	 SVR	 SimpleImputer	 StandardScaler	 XGBRegressor	 
X_TRAIN	 X_all	 X_test	 X_test_	 X_tr_orig	 X_tr_orig_	 X_train	 X_train_	 X_val	 
X_val_	 Y_TRAIN	 accuracy_score	 build_model	 build_model2	 cat_attribs	 cat_attributes	 cols_to_exclude	 joblib	 
json	 load_dataset	 m	 mean_absolute_error	 model0	 model1	 model2	 model3	 model4	 
model5	 models	 models2	 np	 num_attribs	 num_attributes	 num_cols	 os	 pd	 
regressors	 submission_prep	 test_model	 train_model	 train_test_split	 tune_model	 y_pred	 y_pred_tr	 y_pred_val	 
y_tr_orig	 y_train	 y_train_	 y_val	 y_val_	 


In [117]:
X_train.shape

(592, 2627)

In [118]:
numerical_cols = X_train.select_dtypes(exclude = ['object']).columns
categorical_cols = X_train.select_dtypes(include = ['object'] ).columns
len(numerical_cols) + len(categorical_cols)

2627

In [120]:
categorical_cols

Index(['Promo_type', 'start_date', 'end_date', 'duration'], dtype='object')

In [167]:
def build_model22(mdl, num_cols, cat_cols):
    
    
    num_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy="constant", fill_value=0)),
        ('std_scaler', StandardScaler()),
    ])

    
    categorical_transformer_simple = Pipeline(steps=[
    ('imputer', SimpleImputer(missing_values=np.nan,strategy = 'most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))   
     ])
    
    data_transformer_simple = ColumnTransformer(
            transformers=[
                ('num', num_transformer, num_cols),
                ('cat', categorical_transformer_simple, cat_cols)
            ])
    
    model = Pipeline([
                      ('preproc', data_transformer_simple),  
                      #('onehot', OneHotEncoder(handle_unknown='ignore')),   
                      ("model", mdl)])
    return model

In [153]:
numerical_cols = X_train.select_dtypes(exclude = ['object']).columns
categorical_cols = X_train.select_dtypes(include = ['object'] ).columns

categorical_cols, len(numerical_cols) + len(categorical_cols)

(Index(['Promo_type', 'start_date', 'end_date', 'duration'], dtype='object'),
 2627)

In [168]:
m = build_model22(LGBMRegressor(), numerical_cols, categorical_cols)

In [169]:
m.fit(X_train, y_train)

Pipeline(steps=[('preproc',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value=0,
                                                                                 strategy='constant')),
                                                                  ('std_scaler',
                                                                   StandardScaler())]),
                                                  Index(['num_skus', 'num_hl1', 'num_hl2', 'num_hl3', 'num_hl4', 'hl_1_0',
       'hl_1_1', 'hl_2_0', 'hl_2_1', 'hl_2_2',
       ...
       'avg_min_suppl_pred', 'avg_mean_sales_pred', 'avg_max_sales_pred',
       'avg...vg_sls',
       'diff_min_avg_nums', 'diff_max_avg_nums', 'diff_min_avg_suppl',
       'diff_max_avg_suppl'],
      dtype='object', length=2623)),
                                                 ('ca

In [172]:
models22 = [build_model22(r, numerical_cols, categorical_cols) for r in regressors]
len(models22)

7

In [173]:
# validation 
for m in models22: 
    m.fit(X_train, y_train)
    y_pred_val = m.predict(X_val)
    y_pred_tr = m.predict(X_train)
    print(m["model"])
    print(mean_absolute_error(y_val, y_pred_val), mean_absolute_error(y_train, y_pred_tr)) 

LGBMRegressor()
3.743611086050003 1.4077008952577343
GradientBoostingRegressor()
3.894492419917168 2.1840407505706505
AdaBoostRegressor()
4.825455368892371 4.484143181350597
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=48, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)
3.5440416968676907 0.5735012002660322
Lasso()
4.520680638873011 4.553849519984007
Ridge()
5.147343451436054 1.8567322668264303
<catboost.core.CatBoostRegressor object at 0x7f78d62369d0>
3.649157701229511 1.5364111600068453


In [176]:
models22[6]["model"]

<catboost.core.CatBoostRegressor at 0x7f78d62369d0>

In [178]:
models22[3].fit(X_tr_orig, y_tr_orig)
y_pred = models22[3].predict(X_test)
submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', 14)

Printing to ./data/submission14.csv


In [177]:
models22[6].fit(X_tr_orig, y_tr_orig)
y_pred = models22[6].predict(X_test)
submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', 13)

Printing to ./data/submission13.csv


In [98]:
models2 = [build_model22(r, num_cols) for r in regressors]

In [99]:
models2[0]["model"]

LGBMRegressor()

In [73]:
StandardScaler?

In [101]:
type(X_train)

pandas.core.frame.DataFrame

In [100]:
# validation 
for m in models2[:1]: 
    m.fit(X_train, y_train)
    y_pred_val = m.predict(X_val)
    y_pred_tr = m.predict(X_train)
    print(m["model"])
    print(mean_absolute_error(y_val, y_pred_val), mean_absolute_error(y_train, y_pred_tr)) 

ValueError: Specifying the columns using strings is only supported for pandas DataFrames

##### Comparing w/o Offer cols

In [44]:
cols_to_exclude = ['num_other_promos', 'num_skus_in_other_promos']
X_tr_orig_ = X_tr_orig.drop(cols_to_exclude, axis=1)
X_test_ = X_test.drop(cols_to_exclude, axis=1)
X_train_, X_val_, y_train_, y_val_ = train_test_split(X_tr_orig_, y_tr_orig, test_size=0.2)

In [46]:
# validation 
for m in [model0, model1, model3, model5]: 
    m.fit(X_train_, y_train)
    y_pred_val = m.predict(X_val_)
    y_pred_tr = m.predict(X_train_)
    print(m["model"])
    print(mean_absolute_error(y_val, y_pred_val), mean_absolute_error(y_train, y_pred_tr)) 

LGBMRegressor()
4.679130404095676 3.63190277161753
GradientBoostingRegressor()
4.390980488565595 3.8180816025355515
XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=48, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)
4.587960878332384 2.0423967630515962
<catboost.core.CatBoostRegressor object at 0x7f78d623a1c0>
4.462837288619407 3.187734691125384


In [27]:
model5.fit(X_tr_orig, y_tr_orig)
y_pred = model5.predict(X_test)

submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', 6)

Printing to ./data/submission3.csv


In [17]:
model5 = build_model(CatBoostRegressor())

In [19]:
# 3.811659175054392 2.3031921382709273
for m in [model5]:
    m.fit(X_train, y_train)
    y_pred_val = m.predict(X_val)
    y_pred_tr = m.predict(X_train)
    print(m["model"])
    print(mean_absolute_error(y_val, y_pred_val), mean_absolute_error(y_train, y_pred_tr)) 

0:	learn: 6.1758624	total: 92.5ms	remaining: 1m 32s
1:	learn: 6.1576197	total: 113ms	remaining: 56.4s
2:	learn: 6.1328365	total: 131ms	remaining: 43.5s
3:	learn: 6.1108295	total: 149ms	remaining: 37.2s
4:	learn: 6.0920717	total: 167ms	remaining: 33.1s
5:	learn: 6.0624676	total: 185ms	remaining: 30.7s
6:	learn: 6.0372404	total: 215ms	remaining: 30.5s
7:	learn: 6.0147230	total: 234ms	remaining: 29s
8:	learn: 5.9790275	total: 255ms	remaining: 28.1s
9:	learn: 5.9542382	total: 273ms	remaining: 27.1s
10:	learn: 5.9244557	total: 294ms	remaining: 26.4s
11:	learn: 5.9078697	total: 313ms	remaining: 25.8s
12:	learn: 5.8822876	total: 332ms	remaining: 25.2s
13:	learn: 5.8557884	total: 354ms	remaining: 24.9s
14:	learn: 5.8324648	total: 373ms	remaining: 24.5s
15:	learn: 5.8146196	total: 395ms	remaining: 24.3s
16:	learn: 5.8049613	total: 408ms	remaining: 23.6s
17:	learn: 5.7908378	total: 426ms	remaining: 23.2s
18:	learn: 5.7776997	total: 444ms	remaining: 22.9s
19:	learn: 5.7605040	total: 462ms	remaini

161:	learn: 4.9722370	total: 3.15s	remaining: 16.3s
162:	learn: 4.9710083	total: 3.17s	remaining: 16.3s
163:	learn: 4.9686692	total: 3.18s	remaining: 16.2s
164:	learn: 4.9632356	total: 3.2s	remaining: 16.2s
165:	learn: 4.9611832	total: 3.22s	remaining: 16.2s
166:	learn: 4.9597306	total: 3.24s	remaining: 16.1s
167:	learn: 4.9573388	total: 3.26s	remaining: 16.1s
168:	learn: 4.9543221	total: 3.27s	remaining: 16.1s
169:	learn: 4.9522510	total: 3.29s	remaining: 16.1s
170:	learn: 4.9492245	total: 3.31s	remaining: 16.1s
171:	learn: 4.9461592	total: 3.33s	remaining: 16s
172:	learn: 4.9411637	total: 3.35s	remaining: 16s
173:	learn: 4.9355369	total: 3.36s	remaining: 16s
174:	learn: 4.9312810	total: 3.38s	remaining: 15.9s
175:	learn: 4.9291353	total: 3.4s	remaining: 15.9s
176:	learn: 4.9257741	total: 3.42s	remaining: 15.9s
177:	learn: 4.9229819	total: 3.44s	remaining: 15.9s
178:	learn: 4.9203707	total: 3.45s	remaining: 15.8s
179:	learn: 4.9161624	total: 3.47s	remaining: 15.8s
180:	learn: 4.914093

323:	learn: 4.3325037	total: 6.24s	remaining: 13s
324:	learn: 4.3304324	total: 6.26s	remaining: 13s
325:	learn: 4.3267338	total: 6.28s	remaining: 13s
326:	learn: 4.3237405	total: 6.3s	remaining: 13s
327:	learn: 4.3212646	total: 6.32s	remaining: 12.9s
328:	learn: 4.3142313	total: 6.34s	remaining: 12.9s
329:	learn: 4.3109095	total: 6.36s	remaining: 12.9s
330:	learn: 4.3090450	total: 6.37s	remaining: 12.9s
331:	learn: 4.3069009	total: 6.39s	remaining: 12.9s
332:	learn: 4.3035718	total: 6.42s	remaining: 12.9s
333:	learn: 4.2974702	total: 6.44s	remaining: 12.8s
334:	learn: 4.2948428	total: 6.46s	remaining: 12.8s
335:	learn: 4.2909365	total: 6.48s	remaining: 12.8s
336:	learn: 4.2887191	total: 6.5s	remaining: 12.8s
337:	learn: 4.2803970	total: 6.52s	remaining: 12.8s
338:	learn: 4.2771583	total: 6.54s	remaining: 12.8s
339:	learn: 4.2747972	total: 6.56s	remaining: 12.7s
340:	learn: 4.2685177	total: 6.58s	remaining: 12.7s
341:	learn: 4.2662482	total: 6.59s	remaining: 12.7s
342:	learn: 4.2595730	

484:	learn: 3.8323309	total: 9.29s	remaining: 9.86s
485:	learn: 3.8308798	total: 9.31s	remaining: 9.84s
486:	learn: 3.8292590	total: 9.32s	remaining: 9.82s
487:	learn: 3.8248718	total: 9.34s	remaining: 9.8s
488:	learn: 3.8233213	total: 9.36s	remaining: 9.78s
489:	learn: 3.8217227	total: 9.38s	remaining: 9.76s
490:	learn: 3.8203734	total: 9.41s	remaining: 9.76s
491:	learn: 3.8190402	total: 9.43s	remaining: 9.74s
492:	learn: 3.8149633	total: 9.45s	remaining: 9.72s
493:	learn: 3.8130220	total: 9.47s	remaining: 9.7s
494:	learn: 3.8109092	total: 9.49s	remaining: 9.68s
495:	learn: 3.8039483	total: 9.51s	remaining: 9.66s
496:	learn: 3.8024514	total: 9.53s	remaining: 9.64s
497:	learn: 3.8012618	total: 9.55s	remaining: 9.63s
498:	learn: 3.7998723	total: 9.57s	remaining: 9.6s
499:	learn: 3.7985118	total: 9.59s	remaining: 9.59s
500:	learn: 3.7973131	total: 9.6s	remaining: 9.56s
501:	learn: 3.7960251	total: 9.62s	remaining: 9.54s
502:	learn: 3.7939470	total: 9.64s	remaining: 9.52s
503:	learn: 3.78

652:	learn: 3.4602599	total: 12.6s	remaining: 6.68s
653:	learn: 3.4587059	total: 12.6s	remaining: 6.66s
654:	learn: 3.4575096	total: 12.6s	remaining: 6.65s
655:	learn: 3.4567297	total: 12.6s	remaining: 6.63s
656:	learn: 3.4554353	total: 12.7s	remaining: 6.61s
657:	learn: 3.4522174	total: 12.7s	remaining: 6.59s
658:	learn: 3.4494032	total: 12.7s	remaining: 6.57s
659:	learn: 3.4485826	total: 12.7s	remaining: 6.55s
660:	learn: 3.4473540	total: 12.7s	remaining: 6.53s
661:	learn: 3.4462723	total: 12.8s	remaining: 6.51s
662:	learn: 3.4451776	total: 12.8s	remaining: 6.49s
663:	learn: 3.4426525	total: 12.8s	remaining: 6.47s
664:	learn: 3.4414417	total: 12.8s	remaining: 6.45s
665:	learn: 3.4401614	total: 12.8s	remaining: 6.43s
666:	learn: 3.4363732	total: 12.8s	remaining: 6.41s
667:	learn: 3.4334660	total: 12.9s	remaining: 6.39s
668:	learn: 3.4294049	total: 12.9s	remaining: 6.37s
669:	learn: 3.4281966	total: 12.9s	remaining: 6.36s
670:	learn: 3.4269955	total: 12.9s	remaining: 6.34s
671:	learn: 

812:	learn: 3.1713894	total: 15.7s	remaining: 3.6s
813:	learn: 3.1704300	total: 15.7s	remaining: 3.58s
814:	learn: 3.1687605	total: 15.7s	remaining: 3.56s
815:	learn: 3.1658346	total: 15.7s	remaining: 3.55s
816:	learn: 3.1642042	total: 15.7s	remaining: 3.53s
817:	learn: 3.1632726	total: 15.8s	remaining: 3.51s
818:	learn: 3.1622311	total: 15.8s	remaining: 3.49s
819:	learn: 3.1613433	total: 15.8s	remaining: 3.47s
820:	learn: 3.1604024	total: 15.8s	remaining: 3.45s
821:	learn: 3.1595224	total: 15.9s	remaining: 3.43s
822:	learn: 3.1587024	total: 15.9s	remaining: 3.41s
823:	learn: 3.1568395	total: 15.9s	remaining: 3.39s
824:	learn: 3.1560174	total: 15.9s	remaining: 3.37s
825:	learn: 3.1536607	total: 15.9s	remaining: 3.35s
826:	learn: 3.1516116	total: 15.9s	remaining: 3.33s
827:	learn: 3.1504737	total: 16s	remaining: 3.32s
828:	learn: 3.1478959	total: 16s	remaining: 3.3s
829:	learn: 3.1460875	total: 16s	remaining: 3.28s
830:	learn: 3.1451508	total: 16s	remaining: 3.26s
831:	learn: 3.1416018	

973:	learn: 2.9329928	total: 18.7s	remaining: 499ms
974:	learn: 2.9322946	total: 18.7s	remaining: 480ms
975:	learn: 2.9302711	total: 18.7s	remaining: 461ms
976:	learn: 2.9295316	total: 18.7s	remaining: 441ms
977:	learn: 2.9267784	total: 18.8s	remaining: 422ms
978:	learn: 2.9218663	total: 18.8s	remaining: 403ms
979:	learn: 2.9194359	total: 18.8s	remaining: 384ms
980:	learn: 2.9186685	total: 18.8s	remaining: 365ms
981:	learn: 2.9171746	total: 18.9s	remaining: 346ms
982:	learn: 2.9165126	total: 18.9s	remaining: 326ms
983:	learn: 2.9154535	total: 18.9s	remaining: 307ms
984:	learn: 2.9147754	total: 18.9s	remaining: 288ms
985:	learn: 2.9140172	total: 18.9s	remaining: 269ms
986:	learn: 2.9124259	total: 18.9s	remaining: 249ms
987:	learn: 2.9109664	total: 19s	remaining: 230ms
988:	learn: 2.9102588	total: 19s	remaining: 211ms
989:	learn: 2.9096216	total: 19s	remaining: 192ms
990:	learn: 2.9089977	total: 19s	remaining: 173ms
991:	learn: 2.9083544	total: 19s	remaining: 154ms
992:	learn: 2.9052434	

In [20]:
model5.fit(X_tr_orig, y_tr_orig)
y_pred = model5.predict(X_test)

submission_prep(y_pred, './data/test_Offer_ID_map_category_ct.json', 2)

0:	learn: 6.1985637	total: 24.4ms	remaining: 24.4s
1:	learn: 6.1645846	total: 44.2ms	remaining: 22.1s
2:	learn: 6.1333526	total: 65.2ms	remaining: 21.7s
3:	learn: 6.1075089	total: 85.4ms	remaining: 21.3s
4:	learn: 6.0826778	total: 106ms	remaining: 21.2s
5:	learn: 6.0595746	total: 127ms	remaining: 21.1s
6:	learn: 6.0438931	total: 147ms	remaining: 20.9s
7:	learn: 6.0262608	total: 167ms	remaining: 20.8s
8:	learn: 6.0030375	total: 188ms	remaining: 20.7s
9:	learn: 5.9781764	total: 208ms	remaining: 20.6s
10:	learn: 5.9636929	total: 227ms	remaining: 20.4s
11:	learn: 5.9429969	total: 246ms	remaining: 20.3s
12:	learn: 5.9199570	total: 266ms	remaining: 20.2s
13:	learn: 5.9052667	total: 285ms	remaining: 20.1s
14:	learn: 5.8885325	total: 305ms	remaining: 20s
15:	learn: 5.8748040	total: 325ms	remaining: 20s
16:	learn: 5.8617744	total: 344ms	remaining: 19.9s
17:	learn: 5.8462742	total: 363ms	remaining: 19.8s
18:	learn: 5.8284631	total: 382ms	remaining: 19.7s
19:	learn: 5.8109545	total: 401ms	remaini

164:	learn: 5.0932590	total: 3.35s	remaining: 16.9s
165:	learn: 5.0887275	total: 3.37s	remaining: 16.9s
166:	learn: 5.0872006	total: 3.38s	remaining: 16.9s
167:	learn: 5.0845746	total: 3.4s	remaining: 16.9s
168:	learn: 5.0829105	total: 3.42s	remaining: 16.8s
169:	learn: 5.0783074	total: 3.44s	remaining: 16.8s
170:	learn: 5.0760812	total: 3.46s	remaining: 16.8s
171:	learn: 5.0722396	total: 3.48s	remaining: 16.8s
172:	learn: 5.0693403	total: 3.5s	remaining: 16.7s
173:	learn: 5.0665436	total: 3.52s	remaining: 16.7s
174:	learn: 5.0644848	total: 3.54s	remaining: 16.7s
175:	learn: 5.0616726	total: 3.56s	remaining: 16.7s
176:	learn: 5.0592746	total: 3.58s	remaining: 16.6s
177:	learn: 5.0547600	total: 3.6s	remaining: 16.6s
178:	learn: 5.0526433	total: 3.62s	remaining: 16.6s
179:	learn: 5.0445480	total: 3.64s	remaining: 16.6s
180:	learn: 5.0422819	total: 3.65s	remaining: 16.5s
181:	learn: 5.0383196	total: 3.67s	remaining: 16.5s
182:	learn: 5.0343657	total: 3.69s	remaining: 16.5s
183:	learn: 5.0

330:	learn: 4.4828272	total: 6.65s	remaining: 13.4s
331:	learn: 4.4774296	total: 6.67s	remaining: 13.4s
332:	learn: 4.4757664	total: 6.69s	remaining: 13.4s
333:	learn: 4.4739403	total: 6.71s	remaining: 13.4s
334:	learn: 4.4691940	total: 6.74s	remaining: 13.4s
335:	learn: 4.4674352	total: 6.75s	remaining: 13.3s
336:	learn: 4.4617976	total: 6.78s	remaining: 13.3s
337:	learn: 4.4580522	total: 6.8s	remaining: 13.3s
338:	learn: 4.4562989	total: 6.82s	remaining: 13.3s
339:	learn: 4.4497713	total: 6.85s	remaining: 13.3s
340:	learn: 4.4480670	total: 6.87s	remaining: 13.3s
341:	learn: 4.4440380	total: 6.89s	remaining: 13.3s
342:	learn: 4.4417786	total: 6.91s	remaining: 13.2s
343:	learn: 4.4399308	total: 6.93s	remaining: 13.2s
344:	learn: 4.4364113	total: 6.96s	remaining: 13.2s
345:	learn: 4.4345659	total: 6.98s	remaining: 13.2s
346:	learn: 4.4291715	total: 7s	remaining: 13.2s
347:	learn: 4.4249032	total: 7.02s	remaining: 13.1s
348:	learn: 4.4182307	total: 7.04s	remaining: 13.1s
349:	learn: 4.41

494:	learn: 3.9986211	total: 9.97s	remaining: 10.2s
495:	learn: 3.9973073	total: 9.99s	remaining: 10.2s
496:	learn: 3.9952647	total: 10s	remaining: 10.1s
497:	learn: 3.9907544	total: 10s	remaining: 10.1s
498:	learn: 3.9884649	total: 10.1s	remaining: 10.1s
499:	learn: 3.9845944	total: 10.1s	remaining: 10.1s
500:	learn: 3.9827953	total: 10.1s	remaining: 10.1s
501:	learn: 3.9760769	total: 10.1s	remaining: 10s
502:	learn: 3.9718832	total: 10.1s	remaining: 10s
503:	learn: 3.9706264	total: 10.2s	remaining: 10s
504:	learn: 3.9694405	total: 10.2s	remaining: 9.98s
505:	learn: 3.9673422	total: 10.2s	remaining: 9.97s
506:	learn: 3.9631675	total: 10.2s	remaining: 9.95s
507:	learn: 3.9618013	total: 10.3s	remaining: 9.93s
508:	learn: 3.9599242	total: 10.3s	remaining: 9.91s
509:	learn: 3.9585383	total: 10.3s	remaining: 9.89s
510:	learn: 3.9573263	total: 10.3s	remaining: 9.87s
511:	learn: 3.9537031	total: 10.3s	remaining: 9.85s
512:	learn: 3.9487599	total: 10.4s	remaining: 9.83s
513:	learn: 3.9461586	

653:	learn: 3.6210361	total: 13.2s	remaining: 7.01s
654:	learn: 3.6178263	total: 13.3s	remaining: 6.99s
655:	learn: 3.6168861	total: 13.3s	remaining: 6.97s
656:	learn: 3.6146866	total: 13.3s	remaining: 6.95s
657:	learn: 3.6129900	total: 13.3s	remaining: 6.93s
658:	learn: 3.6120439	total: 13.3s	remaining: 6.91s
659:	learn: 3.6087644	total: 13.4s	remaining: 6.89s
660:	learn: 3.6066369	total: 13.4s	remaining: 6.87s
661:	learn: 3.6056932	total: 13.4s	remaining: 6.85s
662:	learn: 3.6030920	total: 13.4s	remaining: 6.83s
663:	learn: 3.6006722	total: 13.5s	remaining: 6.81s
664:	learn: 3.5996398	total: 13.5s	remaining: 6.79s
665:	learn: 3.5976953	total: 13.5s	remaining: 6.77s
666:	learn: 3.5947307	total: 13.5s	remaining: 6.75s
667:	learn: 3.5919593	total: 13.5s	remaining: 6.73s
668:	learn: 3.5909356	total: 13.6s	remaining: 6.71s
669:	learn: 3.5889982	total: 13.6s	remaining: 6.68s
670:	learn: 3.5879548	total: 13.6s	remaining: 6.66s
671:	learn: 3.5870475	total: 13.6s	remaining: 6.64s
672:	learn: 

814:	learn: 3.3327079	total: 16.5s	remaining: 3.74s
815:	learn: 3.3300998	total: 16.5s	remaining: 3.71s
816:	learn: 3.3293081	total: 16.5s	remaining: 3.69s
817:	learn: 3.3285004	total: 16.5s	remaining: 3.67s
818:	learn: 3.3269068	total: 16.5s	remaining: 3.65s
819:	learn: 3.3240712	total: 16.6s	remaining: 3.63s
820:	learn: 3.3233403	total: 16.6s	remaining: 3.61s
821:	learn: 3.3224466	total: 16.6s	remaining: 3.59s
822:	learn: 3.3201985	total: 16.6s	remaining: 3.57s
823:	learn: 3.3193253	total: 16.6s	remaining: 3.55s
824:	learn: 3.3185353	total: 16.7s	remaining: 3.54s
825:	learn: 3.3170435	total: 16.7s	remaining: 3.52s
826:	learn: 3.3161837	total: 16.7s	remaining: 3.5s
827:	learn: 3.3141645	total: 16.7s	remaining: 3.48s
828:	learn: 3.3131707	total: 16.8s	remaining: 3.46s
829:	learn: 3.3101415	total: 16.8s	remaining: 3.44s
830:	learn: 3.3086162	total: 16.8s	remaining: 3.42s
831:	learn: 3.3078326	total: 16.8s	remaining: 3.4s
832:	learn: 3.3056273	total: 16.8s	remaining: 3.38s
833:	learn: 3.

977:	learn: 3.1168652	total: 19.7s	remaining: 443ms
978:	learn: 3.1155455	total: 19.7s	remaining: 423ms
979:	learn: 3.1142392	total: 19.7s	remaining: 403ms
980:	learn: 3.1123989	total: 19.7s	remaining: 382ms
981:	learn: 3.1096071	total: 19.8s	remaining: 362ms
982:	learn: 3.1088510	total: 19.8s	remaining: 342ms
983:	learn: 3.1071210	total: 19.8s	remaining: 322ms
984:	learn: 3.1063953	total: 19.8s	remaining: 302ms
985:	learn: 3.1056633	total: 19.8s	remaining: 282ms
986:	learn: 3.1048319	total: 19.9s	remaining: 262ms
987:	learn: 3.1039078	total: 19.9s	remaining: 242ms
988:	learn: 3.1020947	total: 19.9s	remaining: 221ms
989:	learn: 3.1003240	total: 19.9s	remaining: 201ms
990:	learn: 3.0996112	total: 20s	remaining: 181ms
991:	learn: 3.0990203	total: 20s	remaining: 161ms
992:	learn: 3.0960313	total: 20s	remaining: 141ms
993:	learn: 3.0926516	total: 20s	remaining: 121ms
994:	learn: 3.0911472	total: 20s	remaining: 101ms
995:	learn: 3.0900284	total: 20s	remaining: 80.5ms
996:	learn: 3.0893017	t

## AzureML

In [25]:
from azureml.core.workspace import Workspace

In [21]:
!ls ./AzureML/configuration.yml

Dockerfiles  NBSETUP.md  configuration.ipynb  how-to-use-azureml  tutorials
LICENSE      README	 configuration.yml    index.md
Licenses     README.md	 contrib	      setup-environment


In [29]:
Workspace.create?

In [30]:
import os
from azureml.core.authentication import ServicePrincipalAuthentication

service_principal_password = os.environ.get("AZUREML_PASSWORD")

In [31]:
service_principal_password

In [32]:
ws = Workspace.create(name='hackpromoworkspace',
               subscription_id='5e81e9b4-742a-4c1e-bf1a-93f66c614eaa',
               resource_group='HackPromoLenta',
               create_resource_group=False,
               location='eastus2'
               )

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code C5P2GJ9Z9 to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Deploying AppInsights with name hackprominsights9bb3fb2e.
Deployed AppInsights with name hackprominsights9bb3fb2e. Took 3.86 seconds.
Deploying StorageAccount with name hackpromstoragec278afdea.
Deploying KeyVault with name hackpromkeyvault48315c33.
Deployed KeyVault with name hackpromkeyvault48315c33. Took 19.34 seconds.
Deployed StorageAccount with name hackpromstoragec278afdea. Took 25.45 seconds.
Deploying Workspace with name hackpromoworkspace.
Deployed Workspace with name hackpromoworkspace. Took 27.98 seconds.


In [34]:
import logging

automl_settings = {
    "iteration_timeout_minutes": 10,
    "experiment_timeout_hours": 0.3,
    "enable_early_stopping": True,
    "primary_metric": 'normalized_mean_absolute_error',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 5
}

In [36]:
from azureml.train.automl import AutoMLConfig

automl_config = AutoMLConfig(task='regression',
                             debug_log='automated_ml_errors.log',
                             training_data=X_all,
                             label_column_name="UpLift",
                             **automl_settings)

In [18]:
from azureml.core.experiment import Experiment

In [37]:
experiment = Experiment(ws, "uplift-cat-feats-automl")
local_run = experiment.submit(automl_config, show_output=True)

ValidationException: ValidationException:
	Message: Install the required versions of packages using the requirements file. Requirements file location: /anaconda/envs/azureml_py36_automl/lib/python3.6/site-packages/azureml/automl/core/validated_linux_requirements.txt. Alternatively, use remote target to avoid dependency management. 
Package name/Required version/Installed version
azureml-train-automl-runtime/pandas<1.0.0,>=0.21.0/pandas 1.1.5
azureml-train-automl-runtime/jinja2<=2.11.2/Jinja2 2.11.3
azureml-automl-runtime/pandas<1.0.0,>=0.21.0/pandas 1.1.5
	InnerException: None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "Install the required versions of packages using the requirements file. Requirements file location: /anaconda/envs/azureml_py36_automl/lib/python3.6/site-packages/azureml/automl/core/validated_linux_requirements.txt. Alternatively, use remote target to avoid dependency management. \nPackage name/Required version/Installed version\nazureml-train-automl-runtime/pandas<1.0.0,>=0.21.0/pandas 1.1.5\nazureml-train-automl-runtime/jinja2<=2.11.2/Jinja2 2.11.3\nazureml-automl-runtime/pandas<1.0.0,>=0.21.0/pandas 1.1.5",
        "target": "azureml-train-automl-runtime,azureml-automl-runtime",
        "inner_error": {
            "code": "NotSupported",
            "inner_error": {
                "code": "IncompatibleOrMissingDependency"
            }
        },
        "reference_code": "435ab938-fd87-49bc-932e-6eec0d6aee4f"
    }
}