In [1]:
from itertools import product
import pandas as pd
import numpy as np

from src.settings import RAW_PATH, PROCESSED_PATH, WINS_SHIFTS, ROLL_FUNCS

# Initial merge of `sales_train` and `items`

In [23]:
sales_train = pd.read_csv(RAW_PATH + 'sales_train.csv')
items = pd.read_csv(RAW_PATH + 'items.csv')[['item_id', 'item_category_id']]

In [24]:
sales_train.head()

Unnamed: 0,date,date_block_num,shop_id,item_id,item_price,item_cnt_day
0,02.01.2013,0,59,22154,999.0,1.0
1,03.01.2013,0,25,2552,899.0,1.0
2,05.01.2013,0,25,2552,899.0,-1.0
3,06.01.2013,0,25,2554,1709.05,1.0
4,15.01.2013,0,25,2555,1099.0,1.0


In [25]:
items.head()

Unnamed: 0,item_id,item_category_id
0,0,40
1,1,76
2,2,40
3,3,40
4,4,40


In [26]:
merged_df = sales_train.merge(items, how='left', on='item_id')

In [27]:
merged_df.isna().sum()

date                0
date_block_num      0
shop_id             0
item_id             0
item_price          0
item_cnt_day        0
item_category_id    0
dtype: int64

In [29]:
merged_df.to_parquet(PROCESSED_PATH + 'merged_train_df.parquet', index=False)

# Constructing target

This will be a simpler version of target where instead of rolling window of 30 days we will simply use the month id to sum aggregate number of items sold. We will calculate this for each shop and item, add missing months with 0 sales and shift the aggregate by 1 month.

In [261]:
merged_df = pd.read_parquet(PROCESSED_PATH + 'merged_train_df.parquet')

In [262]:
# creating groupping for particular month, shop and item
grouping_cols = ['shop_id', 'item_id', 'date_block_num']
target_df = merged_df[grouping_cols + ['item_cnt_day']].sort_values(grouping_cols)
target_df = target_df.groupby(grouping_cols)['item_cnt_day'].sum().reset_index() 

In [263]:
# creating dataframe where for each combination of shop and item every month is present
index_backbone = pd.DataFrame(product(
    range(target_df['shop_id'].min(), target_df['shop_id'].max()+1),
    range(target_df['item_id'].min(), target_df['item_id'].max()+1),
    range(target_df['date_block_num'].min(), target_df['date_block_num'].max()+1)
), columns = ['shop_id', 'item_id', 'date_block_num'])

In [264]:
extended_target_df = index_backbone.merge(target_df, how='left', on=grouping_cols).fillna(0)
extended_target_df = extended_target_df.sort_values(grouping_cols)

In [7]:
# grouping by shop_id and item_id and shifting by 1 row "into the future"
extended_target_df['target'] = extended_target_df.groupby(grouping_cols[:-1])['item_cnt_day'].shift(-1)

# leaving only rows with deals in current month as was in the dataset before expansion
shrinked_target_df = extended_target_df[extended_target_df['item_cnt_day'] > 0].reset_index(drop=True).fillna(0)
shrinked_target_df.head()

# target_df = target_df[((target_df['item_id']==5822) & (target_df['shop_id'] == 2) & \
#                        (target_df['date_block_num'] < 6)) | ((target_df['item_id']==100) & \
#                        (target_df['shop_id'] == 57))]

Unnamed: 0,shop_id,item_id,date_block_num,item_cnt_day,target
0,0,30,1,31.0,0.0
1,0,31,1,11.0,0.0
2,0,32,0,6.0,10.0
3,0,32,1,10.0,0.0
4,0,33,0,3.0,3.0


In [55]:
shrinked_target_df.shape

(1605626, 5)

In [10]:
shrinked_target_df.to_parquet(PROCESSED_PATH + 'target_df.parquet', index=False)

## Building features

## Features describing sales for `shop_id` and `date_block_num`

In [30]:
list(merged_df)

['date',
 'date_block_num',
 'shop_id',
 'item_id',
 'item_price',
 'item_cnt_day',
 'item_category_id']

For all feature construction we have to use expanded dataset as was built in the previous section to preserve the sequence of months. If some months are missing, than shifts and window aggregates would not be correct since we are not working with the datetime column here.

In [7]:
merged_df = pd.read_parquet(PROCESSED_PATH + 'merged_train_df.parquet')

# creating dataframe where for each combination of shop and item every month is present
shop_month_index_backbone = pd.DataFrame(product(
    range(merged_df['shop_id'].min(), merged_df['shop_id'].max()+1),
    range(merged_df['date_block_num'].min(), merged_df['date_block_num'].max()+1)
), columns = ['shop_id', 'date_block_num'])

### Counts of deals per month and per shop, lags, rolling aggregates

In [8]:
# adding deals count column
deals_cnt_df = merged_df[['date_block_num', 'shop_id', 'item_id']].reset_index(drop=True).copy()
group_cols = ['shop_id', 'date_block_num']
deals_cnt_df = deals_cnt_df.sort_values(group_cols).groupby(group_cols)\
        ['item_id'].count().reset_index().rename(columns={'item_id': 'deals_cnt'})
deals_cnt_df = shop_month_index_backbone.merge(deals_cnt_df, how='left', on=group_cols).fillna(0)

# calculating lags
deals_cnt_df = deals_cnt_df.set_index('shop_id')
for shift in WINS_SHIFTS:
    deals_cnt_df[f'deals_cnt_shift_{shift}'] = deals_cnt_df.groupby('shop_id')['deals_cnt'].shift(periods=shift, fill_value=0)
deals_cnt_df = deals_cnt_df.reset_index()

# calculating rolling window aggregates
deals_cnt_df = deals_cnt_df.sort_values(group_cols)
roll_funcs = ROLL_FUNCS
for func in roll_funcs:
    for win_len in WINS_SHIFTS:
        deals_cnt_df[f'deals_cnt_roll_{func}_{win_len}'] = deals_cnt_df.groupby('shop_id').rolling(win_len, min_periods=1)\
                .agg({'deals_cnt': func}).reset_index(drop=True).fillna(0)

In [6]:
deals_cnt_df

Unnamed: 0,shop_id,date_block_num,deals_cnt,deals_cnt_shift_2,deals_cnt_shift_6,deals_cnt_shift_12,deals_cnt_roll_sum_2,deals_cnt_roll_sum_6,deals_cnt_roll_sum_12,deals_cnt_roll_mean_2,...,deals_cnt_roll_mean_12,deals_cnt_roll_std_2,deals_cnt_roll_std_6,deals_cnt_roll_std_12,deals_cnt_roll_min_2,deals_cnt_roll_min_6,deals_cnt_roll_min_12,deals_cnt_roll_max_2,deals_cnt_roll_max_6,deals_cnt_roll_max_12
0,0,0,4793.0,0.0,0.0,0.0,4793.0,4793.0,4793.0,4793.0,...,4793.000000,0.000000,0.000000,0.000000,4793.0,4793.0,4793.0,4793.0,4793.0,4793.0
1,0,1,5064.0,0.0,0.0,0.0,9857.0,9857.0,9857.0,4928.5,...,4928.500000,191.625938,191.625938,191.625938,4793.0,4793.0,4793.0,5064.0,5064.0,5064.0
2,0,2,0.0,4793.0,0.0,0.0,5064.0,9857.0,9857.0,2532.0,...,3285.666667,3580.788740,2848.695198,2848.695198,0.0,0.0,0.0,5064.0,5064.0,5064.0
3,0,3,0.0,5064.0,0.0,0.0,0.0,9857.0,9857.0,0.0,...,2464.250000,0.000000,2847.620805,2847.620805,0.0,0.0,0.0,0.0,5064.0,5064.0
4,0,4,0.0,0.0,0.0,0.0,0.0,9857.0,9857.0,0.0,...,1971.400000,0.000000,2701.150459,2701.150459,0.0,0.0,0.0,0.0,5064.0,5064.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2035,59,29,836.0,792.0,1651.0,1013.0,1585.0,5427.0,12409.0,792.5,...,1034.083333,61.518290,133.824886,240.913964,749.0,749.0,749.0,836.0,1091.0,1651.0
2036,59,30,916.0,749.0,1091.0,974.0,1752.0,5252.0,12351.0,876.0,...,1029.250000,56.568542,99.791115,242.803372,836.0,749.0,749.0,916.0,1011.0,1651.0
2037,59,31,973.0,836.0,948.0,1127.0,1889.0,5277.0,12197.0,944.5,...,1016.416667,40.305087,103.870593,241.231861,916.0,749.0,749.0,973.0,1011.0,1651.0
2038,59,32,778.0,916.0,1011.0,941.0,1751.0,5044.0,12034.0,875.5,...,1002.833333,137.885822,87.071618,250.283778,778.0,749.0,749.0,973.0,973.0,1651.0


In [77]:
deals_cnt_df[((deals_cnt_df['shop_id'] == 9)) ]

Unnamed: 0,shop_id,date_block_num,deals_cnt
306,9,0,0.0
307,9,1,0.0
308,9,2,0.0
309,9,3,0.0
310,9,4,0.0
311,9,5,0.0
312,9,6,0.0
313,9,7,0.0
314,9,8,0.0
315,9,9,1488.0


In [539]:
deals_cnt_df[deals_cnt_df['shop_id']==59].head()

Unnamed: 0,shop_id,date_block_num,deals_cnt,deals_cnt_shift_1,deals_cnt_shift_3,deals_cnt_shift_7,deals_cnt_shift_12,deals_cnt_shift_20,deals_cnt_sum_3,deals_cnt_sum_6,...,deals_cnt_min_3,deals_cnt_min_6,deals_cnt_min_9,deals_cnt_min_15,deals_cnt_min_20,deals_cnt_max_3,deals_cnt_max_6,deals_cnt_max_9,deals_cnt_max_15,deals_cnt_max_20
1552,59,0,1847,0,0,0,0,0,1847.0,1847.0,...,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0
1553,59,1,1696,1847,0,0,0,0,3543.0,3543.0,...,1696.0,1696.0,1696.0,1696.0,1696.0,1847.0,1847.0,1847.0,1847.0,1847.0
1554,59,2,1743,1696,0,0,0,0,5286.0,5286.0,...,1696.0,1696.0,1696.0,1696.0,1696.0,1847.0,1847.0,1847.0,1847.0,1847.0
1555,59,3,1271,1743,1847,0,0,0,4710.0,6557.0,...,1271.0,1271.0,1271.0,1271.0,1271.0,1743.0,1847.0,1847.0,1847.0,1847.0
1556,59,4,1194,1271,1696,0,0,0,4208.0,7751.0,...,1194.0,1194.0,1194.0,1194.0,1194.0,1743.0,1847.0,1847.0,1847.0,1847.0


In [78]:
deals_cnt_df.shape

(2040, 3)

### Aggregates over prices per month and per shop, lags, window aggregates

In [9]:
# adding simple aggregates of prices over various deals
prices_df = merged_df.reset_index()[['shop_id', 'date_block_num', 'item_price']]
group_cols = ['shop_id', 'date_block_num']
simple_agg_funcs = ROLL_FUNCS
prices_df = prices_df.groupby(group_cols).agg({'item_price': simple_agg_funcs}).fillna(0)
prices_df.columns = ['_'.join(col) for col in prices_df.columns]
prices_df = prices_df.reset_index()
prices_df = shop_month_index_backbone.merge(prices_df, how='left').fillna(0)
simple_agg_cols = [f'item_price_{agg}' for agg in simple_agg_funcs]

# adding lags
prices_df = prices_df.sort_values(group_cols).set_index('shop_id')
for shift in WINS_SHIFTS:
    for col in simple_agg_cols:
        prices_df[f'{col}_shift_{shift}'] = prices_df.groupby('shop_id')[col].shift(periods=shift, fill_value=0)
prices_df = prices_df.reset_index()

# adding window aggregates
prices_df = prices_df.sort_values(group_cols)
roll_funcs = ROLL_FUNCS
cols_to_agg = ['item_price_sum', 'item_price_mean']

for func in roll_funcs:
    for win_len in WINS_SHIFTS:
        for col in cols_to_agg:
            prices_df[f'{col}_roll_{func}_{win_len}'] = prices_df.groupby('shop_id').rolling(win_len, min_periods=1)\
                    .agg({col: func}).reset_index(drop=True).fillna(0)

## Features describing sales for `shop_id`, `category_id` abd `date_block_num`

## Feature describing sales for `shop_id`, `item_id` and `date_block_num`

## Autoregression features

# Final merge

We are left joining all the feature datasets to the target dataset here

In [16]:
shrinked_target_df.head()

Unnamed: 0,shop_id,item_id,date_block_num,item_cnt_day,target
0,0,30,1,31.0,0.0
1,0,31,1,11.0,0.0
2,0,32,0,6.0,10.0
3,0,32,1,10.0,0.0
4,0,33,0,3.0,3.0


In [17]:
deals_cnt_df.head()


NameError: name 'deals_cnt_df' is not defined

In [18]:
prices_df.head()

Unnamed: 0,shop_id,date_block_num,item_price_sum,item_price_min,item_price_max,item_price_mean,item_price_std,item_price_sum_shift_2,item_price_min_shift_2,item_price_max_shift_2,...,item_price_sum_roll_min_6,item_price_mean_roll_min_6,item_price_sum_roll_min_12,item_price_mean_roll_min_12,item_price_sum_roll_max_2,item_price_mean_roll_max_2,item_price_sum_roll_max_6,item_price_mean_roll_max_6,item_price_sum_roll_max_12,item_price_mean_roll_max_12
0,0,0,2546339.0,14.0,13679.0,531.262049,1148.438481,0.0,0.0,0.0,...,2546339.0,531.262049,2546339.0,531.262049,2546339.0,531.262049,2546339.0,531.262049,2546339.0,531.262049
1,0,1,3007530.0,13.0,15653.0,593.904028,1327.146796,0.0,0.0,0.0,...,2546339.0,531.262049,2546339.0,531.262049,3007530.0,593.904028,3007530.0,593.904028,3007530.0,593.904028
2,0,2,0.0,0.0,0.0,0.0,0.0,2546339.0,14.0,13679.0,...,0.0,0.0,0.0,0.0,3007530.0,593.904028,3007530.0,593.904028,3007530.0,593.904028
3,0,3,0.0,0.0,0.0,0.0,0.0,3007530.0,13.0,15653.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3007530.0,593.904028,3007530.0,593.904028
4,0,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,3007530.0,593.904028,3007530.0,593.904028


In [20]:
fin_df = shrinked_target_df.merge(deals_cnt_df, how='left').merge(prices_df, how='left')

In [21]:
fin_df = fin_df.astype('float32')

In [22]:
fin_df.to_parquet(PROCESSED_PATH+'fin_training_df.parquet')

So we've created a dataset where for every month we have some descriptive features about sales and profits. We also have a target column that contains the number of items sold in the given shop. We can now train a model to predic number of items sold in the following month.

# Feature generation at inference time

Let's verify that we can merge it to the test set.

In [6]:
fin_df = pd.read_parquet(PROCESSED_PATH+'fin_training_df.parquet')

In [3]:
test_df = pd.read_csv(RAW_PATH + 'test.csv')

In [4]:
test_df

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320
2,2,5,5233
3,3,5,5232
4,4,5,5268
...,...,...,...
214195,214195,45,18454
214196,214196,45,16188
214197,214197,45,15757
214198,214198,45,19648


In [10]:
test_df.merge(fin_df, how='left')['date_block_num'].isna().sum()

102838

In [7]:
fin_df

Unnamed: 0,shop_id,item_id,date_block_num,item_cnt_day,target,deals_cnt,deals_cnt_shift_2,deals_cnt_shift_6,deals_cnt_shift_12,deals_cnt_roll_sum_2,...,item_price_sum_roll_min_6,item_price_mean_roll_min_6,item_price_sum_roll_min_12,item_price_mean_roll_min_12,item_price_sum_roll_max_2,item_price_mean_roll_max_2,item_price_sum_roll_max_6,item_price_mean_roll_max_6,item_price_sum_roll_max_12,item_price_mean_roll_max_12
0,0.0,30.0,1.0,31.0,0.0,5064.0,0.0,0.0,0.0,9857.0,...,2.546339e+06,531.262024,2.546339e+06,531.262024,3.007530e+06,593.904053,3007530.000,593.904053,3007530.000,593.904053
1,0.0,31.0,1.0,11.0,0.0,5064.0,0.0,0.0,0.0,9857.0,...,2.546339e+06,531.262024,2.546339e+06,531.262024,3.007530e+06,593.904053,3007530.000,593.904053,3007530.000,593.904053
2,0.0,32.0,0.0,6.0,10.0,4793.0,0.0,0.0,0.0,4793.0,...,2.546339e+06,531.262024,2.546339e+06,531.262024,2.546339e+06,531.262024,2546339.000,531.262024,2546339.000,531.262024
3,0.0,32.0,1.0,10.0,0.0,5064.0,0.0,0.0,0.0,9857.0,...,2.546339e+06,531.262024,2.546339e+06,531.262024,3.007530e+06,593.904053,3007530.000,593.904053,3007530.000,593.904053
4,0.0,33.0,0.0,3.0,3.0,4793.0,0.0,0.0,0.0,4793.0,...,2.546339e+06,531.262024,2.546339e+06,531.262024,2.546339e+06,531.262024,2546339.000,531.262024,2546339.000,531.262024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1605621,59.0,22164.0,27.0,2.0,0.0,792.0,948.0,1039.0,1050.0,1803.0,...,7.287827e+05,875.018799,7.287827e+05,823.831360,8.846440e+05,920.180176,2225274.250,1352.869141,2225274.250,1352.869141
1605622,59.0,22164.0,30.0,1.0,0.0,916.0,749.0,1091.0,974.0,1752.0,...,7.287827e+05,875.018799,7.287827e+05,875.018799,8.085937e+05,942.271362,981327.000,1035.155029,2225274.250,1352.869141
1605623,59.0,22167.0,9.0,1.0,0.0,1461.0,1830.0,1271.0,0.0,3041.0,...,7.731048e+05,597.036743,7.731048e+05,597.036743,1.153746e+06,769.317383,1153746.500,769.317383,1514862.375,820.174561
1605624,59.0,22167.0,11.0,2.0,0.0,2043.0,1461.0,1465.0,0.0,3586.0,...,1.006289e+06,597.036743,7.731048e+05,597.036743,1.955512e+06,957.176880,1955512.375,957.176880,1955512.375,957.176880


Let's create functionality to generate test set for a given month

In [10]:
class FeatureGenerator():
    def __init__(self):
        self.merged_df = pd.read_parquet(PROCESSED_PATH + 'merged_train_df.parquet')
        self.shop_id_min, self.shop_id_max = self.merged_df['shop_id'].min(), self.merged_df['shop_id'].max()
        self.month_min, self.month_max = self.merged_df['date_block_num'].min(), self.merged_df['date_block_num'].max()
        
    def gen_shop_month_backbone(self):
        """Creating dataframe where for each combination of shop and item every month is present"""
        self.shop_month_index_backbone = pd.DataFrame(product(
            range(self.shop_id_min, self.shop_id_max+1),
            range(self.month_min, self.month_max+1)
        ), columns = ['shop_id', 'date_block_num'])
    
    def gen_deals_per_month_feats(self):
        """Calculating features based on number of deals per month"""
        # adding deals count column
        deals_cnt_df = self.merged_df[['date_block_num', 'shop_id', 'item_id']]\
                            .reset_index(drop=True).copy()
        group_cols = ['shop_id', 'date_block_num']
        deals_cnt_df = deals_cnt_df.sort_values(group_cols).groupby(group_cols)\
                            ['item_id'].count().reset_index()\
                            .rename(columns={'item_id': 'deals_cnt'})
        deals_cnt_df = self.shop_month_index_backbone.merge(deals_cnt_df, how='left', 
                                                            on=group_cols).fillna(0)

        # calculating lags
        deals_cnt_df = deals_cnt_df.set_index('shop_id')
        for shift in WINS_SHIFTS:
            deals_cnt_df[f'deals_cnt_shift_{shift}'] = deals_cnt_df.groupby('shop_id')['deals_cnt']\
                                                        .shift(periods=shift, fill_value=0)
        deals_cnt_df = deals_cnt_df.reset_index()

        # calculating rolling window aggregates
        deals_cnt_df = deals_cnt_df.sort_values(group_cols)
        for func in ROLL_FUNCS:
            for win_len in WINS_SHIFTS:
                deals_cnt_df[f'deals_cnt_roll_{func}_{win_len}'] = deals_cnt_df.groupby('shop_id').rolling(win_len, min_periods=1)\
                        .agg({'deals_cnt': func}).reset_index(drop=True).fillna(0)
        return deals_cnt_df
        

    def gen_revenue_per_shop_month_feats(self):
        """Calculating features based on prices of sold items per month per shop"""
        # adding simple aggregates of prices over various deals
        prices_df = self.merged_df.reset_index()[['shop_id', 'date_block_num', 'item_price']]
        group_cols = ['shop_id', 'date_block_num']
        prices_df = prices_df.groupby(group_cols).agg({'item_price': ROLL_FUNCS}).fillna(0)
        prices_df.columns = ['_'.join(col) for col in prices_df.columns]
        prices_df = prices_df.reset_index()
        prices_df = self.shop_month_index_backbone.merge(prices_df, how='left').fillna(0)
        simple_agg_cols = [f'item_price_{agg}' for agg in ROLL_FUNCS]

        # adding lags
        prices_df = prices_df.sort_values(group_cols).set_index('shop_id')
        for shift in WINS_SHIFTS:
            for col in simple_agg_cols:
                prices_df[f'{col}_shift_{shift}'] = prices_df.groupby('shop_id')[col].shift(periods=shift, fill_value=0)
        prices_df = prices_df.reset_index()

        # adding window aggregates
        prices_df = prices_df.sort_values(group_cols)
        cols_to_agg = ['item_price_sum', 'item_price_mean']

        for func in ROLL_FUNCS:
            for win_len in WINS_SHIFTS:
                for col in cols_to_agg:
                    prices_df[f'{col}_roll_{func}_{win_len}'] = prices_df.groupby('shop_id').rolling(win_len, min_periods=1)\
                            .agg({col: func}).reset_index(drop=True).fillna(0)
        return prices_df
    
    def generate_features(self):
        """Calculating all features and merging them in one dataset"""
        self.gen_shop_month_backbone()
        deals_cnt_df = self.gen_deals_per_month_feats()
        prices_df = self.gen_revenue_per_shop_month_feats()
        return deals_cnt_df.merge(prices_df, how='left')

In [11]:
def generate_test_backbone(test_size: int, 
                           month_num: int,
                           shop_id_min_max: tuple[int, int]=(0, 59),
                           item_id_min_max: tuple[int, int]=(30, 22167),
                           num_items_variety: float=.3) -> pd.DataFrame:
    """
    Function generates backbone dataframe with 
    shop_id, item_id and month number approximately of the passed size.
    """
    num_shops = int(np.sqrt(test_size))
    ideal_num_items = int(test_size / num_shops)
    test_shops = np.random.choice(range(shop_id_min_max[0], 
                                        shop_id_min_max[1]+1), 
                                  num_shops, replace=False)
    back = []
    for val in test_shops:
        num_items_to_pick = np.random.choice(range(int(ideal_num_items * (1-num_items_variety)),
                                                int(ideal_num_items * (1+num_items_variety))))
        test_items = np.random.choice(range(item_id_min_max[0], 
                                            item_id_min_max[1]+1), 
                                    num_items_to_pick, replace=False)
        back.append(pd.DataFrame(product([val], test_items), columns = ['shop_id', 'item_id']))
    back_df = pd.concat(back, ignore_index=True).drop_duplicates()
    back_df['date_block_num'] = month_num - 1
    return back_df

def generate_test(test_backbone: pd.DataFrame,
                  feat_generator: FeatureGenerator) -> pd.DataFrame:
    """
    Function takes in test backbone, i.e. df of shops and items and a given month, 
    calls feature generator and returns the merged result
    """
    feats = feat_generator.generate_features()
    return test_backbone.merge(feats, how='left')

In [15]:
feat_generator = FeatureGenerator()
test_back = generate_test_backbone(month_num=35, test_size=1000)
generate_test(test_backbone=test_back,
              feat_generator=feat_generator)

Unnamed: 0,shop_id,item_id,date_block_num,deals_cnt,deals_cnt_shift_2,deals_cnt_shift_6,deals_cnt_shift_12,deals_cnt_roll_sum_2,deals_cnt_roll_sum_6,deals_cnt_roll_sum_12,...,item_price_sum_roll_min_6,item_price_mean_roll_min_6,item_price_sum_roll_min_12,item_price_mean_roll_min_12,item_price_sum_roll_max_2,item_price_mean_roll_max_2,item_price_sum_roll_max_6,item_price_mean_roll_max_6,item_price_sum_roll_max_12,item_price_mean_roll_max_12
0,1,10468,34,,,,,,,,...,,,,,,,,,,
1,1,12338,34,,,,,,,,...,,,,,,,,,,
2,1,1865,34,,,,,,,,...,,,,,,,,,,
3,1,903,34,,,,,,,,...,,,,,,,,,,
4,1,14530,34,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,53,9712,34,,,,,,,,...,,,,,,,,,,
996,53,1761,34,,,,,,,,...,,,,,,,,,,
997,53,4468,34,,,,,,,,...,,,,,,,,,,
998,53,20724,34,,,,,,,,...,,,,,,,,,,
