In [10]:
%load_ext autoreload
%autoreload 2

import os
import sys

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sklearn
import scipy
import altair as alt
from altair import datum
from tqdm.auto import tqdm, trange

from src.model import tscv
from src.feature_engineering import add_lagged_features

%run constants.py

%matplotlib inline
print("Versions:")
print("  Python: %s" % sys.version)
for module in [pd, np, sns, sklearn, alt]:
    print("  %s: %s" %(module.__name__, module.__version__))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Versions:
  Python: 3.8.2 (default, Jul 16 2020, 14:00:26) 
[GCC 9.3.0]
  pandas: 1.1.0
  numpy: 1.19.1
  seaborn: 0.10.1
  sklearn: 0.23.2
  altair: 4.1.0


Let's build the feature set using the shop item-cat encoding we developed earlier.

In [3]:
train_set = pd.read_parquet(os.path.join(PROCESSED_DATA_DIR, 'train-set.parquet'))
shop_item_cat_encoding = pd.read_parquet(os.path.join(PROCESSED_DATA_DIR, 'shop-item-cat-encoding.parquet'))

In [5]:
shop_item_cat_encoding

Unnamed: 0,shop_id,date_block_num,shop_item_cat_enc_0,shop_item_cat_enc_1,shop_item_cat_enc_2,shop_item_cat_enc_3,shop_item_cat_enc_4,shop_item_cat_enc_5,shop_item_cat_enc_6,shop_item_cat_enc_7,shop_item_cat_enc_8,shop_item_cat_enc_9,shop_item_cat_enc_10,shop_item_cat_enc_11,shop_item_cat_enc_12,shop_item_cat_enc_13,shop_item_cat_enc_14
0,0,0,0.025636,0.000000,0.034062,0.008426,0.161169,0.0,0.0,0.165471,0.002510,0.408032,0.012549,0.094658,0.045357,0.042130,0.000000
1,0,1,0.021707,0.000163,0.034764,0.009466,0.166476,0.0,0.0,0.150481,0.003754,0.392035,0.013547,0.105109,0.062510,0.039987,0.000000
2,1,0,0.037326,0.000000,0.038683,0.007805,0.168646,0.0,0.0,0.184595,0.003393,0.392263,0.000000,0.083135,0.045809,0.038344,0.000000
3,1,1,0.044887,0.000000,0.026159,0.009215,0.155172,0.0,0.0,0.181034,0.003864,0.379608,0.001486,0.087990,0.079667,0.030916,0.000000
4,2,0,0.028796,0.000000,0.057592,0.017452,0.209424,0.0,0.0,0.229494,0.002618,0.216405,0.023560,0.048866,0.089005,0.076789,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1581,59,29,0.016429,0.000000,0.016429,0.005476,0.213582,0.0,0.0,0.221249,0.016429,0.196057,0.004381,0.094195,0.205915,0.009858,0.000000
1582,59,30,0.036290,0.000000,0.012097,0.003024,0.183468,0.0,0.0,0.213710,0.014113,0.154234,0.008065,0.108871,0.241935,0.018145,0.006048
1583,59,31,0.021417,0.000000,0.020593,0.001647,0.254530,0.0,0.0,0.299835,0.009885,0.132619,0.011532,0.071664,0.164745,0.009885,0.001647
1584,59,32,0.020788,0.000000,0.022976,0.004376,0.256018,0.0,0.0,0.258206,0.008753,0.163020,0.014223,0.074398,0.138950,0.014223,0.024070


In [8]:
feature_cols = [col for col in shop_item_cat_encoding.columns
                if col not in ['shop_id', 'date_block_num']]


In [11]:
add_lagged_features(
    train_set, shop_item_cat_encoding, feature_cols, max_lag=1,
    index_cols=['shop_id'])

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




Unnamed: 0,item_id,shop_id,date_block_num,item_cnt_month,f__shop_item_cat_enc_0_1,f__shop_item_cat_enc_1_1,f__shop_item_cat_enc_2_1,f__shop_item_cat_enc_3_1,f__shop_item_cat_enc_4_1,f__shop_item_cat_enc_5_1,f__shop_item_cat_enc_6_1,f__shop_item_cat_enc_7_1,f__shop_item_cat_enc_8_1,f__shop_item_cat_enc_9_1,f__shop_item_cat_enc_10_1,f__shop_item_cat_enc_11_1,f__shop_item_cat_enc_12_1,f__shop_item_cat_enc_13_1,f__shop_item_cat_enc_14_1
0,5037,5,10,0.0,0.000719,0.0,0.025180,0.023741,0.147482,0.0,0.0,0.238129,0.004317,0.287770,0.000719,0.125180,0.141007,0.005755,0.0
1,5320,5,10,0.0,0.000719,0.0,0.025180,0.023741,0.147482,0.0,0.0,0.238129,0.004317,0.287770,0.000719,0.125180,0.141007,0.005755,0.0
2,5233,5,10,0.0,0.000719,0.0,0.025180,0.023741,0.147482,0.0,0.0,0.238129,0.004317,0.287770,0.000719,0.125180,0.141007,0.005755,0.0
3,5232,5,10,0.0,0.000719,0.0,0.025180,0.023741,0.147482,0.0,0.0,0.238129,0.004317,0.287770,0.000719,0.125180,0.141007,0.005755,0.0
4,5268,5,10,0.0,0.000719,0.0,0.025180,0.023741,0.147482,0.0,0.0,0.238129,0.004317,0.287770,0.000719,0.125180,0.141007,0.005755,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5140795,18454,45,33,1.0,0.009972,0.0,0.019943,0.004274,0.223647,0.0,0.0,0.126781,0.018519,0.180912,0.079772,0.133903,0.189459,0.012821,0.0
5140796,16188,45,33,0.0,0.009972,0.0,0.019943,0.004274,0.223647,0.0,0.0,0.126781,0.018519,0.180912,0.079772,0.133903,0.189459,0.012821,0.0
5140797,15757,45,33,0.0,0.009972,0.0,0.019943,0.004274,0.223647,0.0,0.0,0.126781,0.018519,0.180912,0.079772,0.133903,0.189459,0.012821,0.0
5140798,19648,45,33,0.0,0.009972,0.0,0.019943,0.004274,0.223647,0.0,0.0,0.126781,0.018519,0.180912,0.079772,0.133903,0.189459,0.012821,0.0
