In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [2]:
def verify(df):
    print(df.shape)
    display(df.head())
    display(df.tail())

# Sales

In [3]:
df = pd.read_csv('data/train_clean.csv')
verify(df)

(3008016, 6)


Unnamed: 0,index,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-02,1,AUTOMOTIVE,2.0,0
2,2,2013-01-03,1,AUTOMOTIVE,3.0,0
3,3,2013-01-04,1,AUTOMOTIVE,3.0,0
4,4,2013-01-05,1,AUTOMOTIVE,5.0,0


Unnamed: 0,index,date,store_nbr,family,sales,onpromotion
3008011,3008011,2017-08-11,9,SEAFOOD,23.831,0
3008012,3008012,2017-08-12,9,SEAFOOD,16.859001,4
3008013,3008013,2017-08-13,9,SEAFOOD,20.0,0
3008014,3008014,2017-08-14,9,SEAFOOD,17.0,0
3008015,3008015,2017-08-15,9,SEAFOOD,16.0,0


In [4]:
df.drop(columns=['index'], inplace=True)
df['date'] = pd.to_datetime(df['date'])
df = df[df['date'] >= pd.to_datetime('2013-01-02')]

In [5]:
df['year'] = pd.to_datetime(df['date']).dt.year
df['month'] = pd.to_datetime(df['date']).dt.month
df['day'] = pd.to_datetime(df['date']).dt.day
df['day_of_week'] = pd.to_datetime(df['date']).dt.dayofweek
df['business_day'] = df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)

In [6]:
# def is_business_day(date):
#     return bool(len(pd.bdate_range(date, date)))

# df_dates = df[['date']].drop_duplicates()
# df_dates['business_day'] = df_dates['date'].apply(lambda x: is_business_day(x)).astype(int)

# df = df.merge(df_dates, on='date', how='inner')
df = df.sort_values(['store_nbr', 'family', 'date']).reset_index(drop=True)
verify(df)

(3006234, 10)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day
0,2013-01-02,1,AUTOMOTIVE,2.0,0,2013,1,2,2,0
1,2013-01-03,1,AUTOMOTIVE,3.0,0,2013,1,3,3,0
2,2013-01-04,1,AUTOMOTIVE,3.0,0,2013,1,4,4,0
3,2013-01-05,1,AUTOMOTIVE,5.0,0,2013,1,5,5,1
4,2013-01-06,1,AUTOMOTIVE,2.0,0,2013,1,6,6,1


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day
3006229,2017-08-11,54,SEAFOOD,0.0,0,2017,8,11,4,0
3006230,2017-08-12,54,SEAFOOD,1.0,1,2017,8,12,5,1
3006231,2017-08-13,54,SEAFOOD,2.0,0,2017,8,13,6,1
3006232,2017-08-14,54,SEAFOOD,0.0,0,2017,8,14,0,0
3006233,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0


In [7]:
cols_week = []
for lag in list(range(16,23)):
    df[f'sales_store_family_lag{lag}'] = df.groupby(['store_nbr', 'family']).sales.shift(lag)
    cols_week.append(f'sales_store_family_lag{lag}')

cols_year = []
for lag in list(range(30,361,30)):
    df[f'sales_store_family_lag{lag}'] = df.groupby(['store_nbr', 'family']).sales.shift(lag)
    cols_year.append(f'sales_store_family_lag{lag}')
    
df['ema_sales_store_family_week'] = df.groupby(['store_nbr', 'family']).sales.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df['ema_sales_store_family_lag_16_22'] = df.groupby(['store_nbr', 'family']).ema_sales_store_family_week.shift(16)

df['ema_sales_store_family_month'] = df.groupby(['store_nbr', 'family']).sales.rolling(30, min_periods=30).mean().reset_index(drop=True)
df['ema_sales_store_family_lag_16_45'] = df.groupby(['store_nbr', 'family']).ema_sales_store_family_month.shift(16)

df['ema_sales_store_family_year'] = df.groupby(['store_nbr', 'family']).sales.rolling(365, min_periods=365).mean().reset_index(drop=True)
df['ema_sales_store_family_lag_16_380'] = df.groupby(['store_nbr', 'family']).ema_sales_store_family_year.shift(16)

df.drop(columns=['ema_sales_store_family_week', 'ema_sales_store_family_month', 'ema_sales_store_family_year'], 
        inplace=True)

verify(df)
print(df[[i for i in df.columns if 'sales_store_family_lag' in i]].isna().sum())

(3006234, 32)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380
0,2013-01-02,1,AUTOMOTIVE,2.0,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-03,1,AUTOMOTIVE,3.0,0,2013,1,3,3,0,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-04,1,AUTOMOTIVE,3.0,0,2013,1,4,4,0,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-05,1,AUTOMOTIVE,5.0,0,2013,1,5,5,1,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-06,1,AUTOMOTIVE,2.0,0,2013,1,6,6,1,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380
3006229,2017-08-11,54,SEAFOOD,0.0,0,2017,8,11,4,0,3.0,7.0,1.0,5.0,3.0,0.0,3.0,1.0,5.0,4.0,1.0,2.0,2.0,2.0,1.0,7.0,1.0,0.0,5.0,3.470481,3.066667,1.934247
3006230,2017-08-12,54,SEAFOOD,1.0,1,2017,8,12,5,1,2.0,3.0,7.0,1.0,5.0,3.0,0.0,0.0,0.0,9.0,6.0,8.0,0.0,1.0,1.0,4.0,0.0,2.0,1.0,3.10286,2.766667,1.939726
3006231,2017-08-13,54,SEAFOOD,2.0,0,2017,8,13,6,1,4.0,2.0,3.0,7.0,1.0,5.0,3.0,0.0,3.0,9.0,0.0,1.0,0.0,2.0,2.0,2.0,3.0,2.0,2.0,3.327145,2.866667,1.945205
3006232,2017-08-14,54,SEAFOOD,0.0,0,2017,8,14,0,0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,0.0,4.0,2.0,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,3.495359,2.966667,1.956164
3006233,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123


sales_store_family_lag16              28512
sales_store_family_lag17              30294
sales_store_family_lag18              32076
sales_store_family_lag19              33858
sales_store_family_lag20              35640
sales_store_family_lag21              37422
sales_store_family_lag22              39204
sales_store_family_lag30              53460
sales_store_family_lag60             106920
sales_store_family_lag90             160380
sales_store_family_lag120            213840
sales_store_family_lag150            267300
sales_store_family_lag180            320760
sales_store_family_lag210            374220
sales_store_family_lag240            427680
sales_store_family_lag270            481140
sales_store_family_lag300            534600
sales_store_family_lag330            588060
sales_store_family_lag360            641520
ema_sales_store_family_lag_16_22      39204
ema_sales_store_family_lag_16_45      80190
ema_sales_store_family_lag_16_380    677160
dtype: int64


In [8]:
df_stores = df.groupby(['date', 'store_nbr']).sales.sum().reset_index()
df_stores = df_stores.sort_values(['store_nbr', 'date']).reset_index(drop=True)

cols_week = []
for lag in list(range(16,23)):
    df_stores[f'sales_store_lag{lag}'] = df_stores.groupby(['store_nbr']).sales.shift(lag)
    cols_week.append(f'sales_store_lag{lag}')
    
cols_year = []
for lag in list(range(30,361,30)):
    df_stores[f'sales_store_lag{lag}'] = df_stores.groupby(['store_nbr']).sales.shift(lag)
    cols_year.append(f'sales_store_lag{lag}')

df_stores['ema_sales_store_week'] = df_stores.groupby(['store_nbr']).sales.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df_stores['ema_sales_store_lag_16_22'] = df_stores.groupby(['store_nbr']).ema_sales_store_week.shift(16)

df_stores['ema_sales_store_month'] = df_stores.groupby(['store_nbr']).sales.rolling(30, min_periods=30).mean().reset_index(drop=True)
df_stores['ema_sales_store_lag_16_45'] = df_stores.groupby(['store_nbr']).ema_sales_store_month.shift(16)

df_stores['ema_sales_store_year'] = df_stores.groupby(['store_nbr']).sales.rolling(365, min_periods=365).mean().reset_index(drop=True)
df_stores['ema_sales_store_lag_16_380'] = df_stores.groupby(['store_nbr']).ema_sales_store_year.shift(16)

df_stores.drop(columns=['ema_sales_store_week', 'ema_sales_store_month', 'ema_sales_store_year', 'sales'], 
        inplace=True)

verify(df_stores)
print(df_stores[[i for i in df_stores.columns if 'sales_store_lag' in i]].isna().sum())

(91098, 24)


Unnamed: 0,date,store_nbr,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380
0,2013-01-02,1,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-03,1,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-04,1,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-05,1,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-06,1,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,date,store_nbr,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380
91093,2017-08-11,54,6572.494,8241.906,10245.246998,16841.417,9739.265,10845.986,7097.149003,8042.298,11821.521,11318.824,7956.353002,8192.171,11721.063,7262.782,6872.459002,10855.031002,8966.58,6323.353,7951.739006,9412.730787,9832.126766,8773.782915
91094,2017-08-12,54,6911.345,6572.494,8241.906,10245.246998,16841.417,9739.265,10845.986,6722.425,9262.571,11251.186,6093.959,7520.89,8257.622,8834.506,6923.343,9892.957,13494.297,6812.701002,6246.614,8787.384341,9776.4357,8776.847039
91095,2017-08-13,54,9531.089998,6911.345,6572.494,8241.906,10245.246998,16841.417,9739.265,6765.057998,9678.241998,13391.214,8845.043,5348.335998,6807.944,11633.912,8206.350995,7604.733,8572.51,8997.745,6128.879997,8973.310755,9865.9205,8789.411924
91096,2017-08-14,54,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16841.417,9306.72,6506.621,9195.975,10599.092,7928.36,7077.091998,9775.383,9473.449002,9304.739002,8637.075,12835.125,6315.9,9449.669566,9927.3663,8801.114677
91097,2017-08-15,54,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789


sales_store_lag16               864
sales_store_lag17               918
sales_store_lag18               972
sales_store_lag19              1026
sales_store_lag20              1080
sales_store_lag21              1134
sales_store_lag22              1188
sales_store_lag30              1620
sales_store_lag60              3240
sales_store_lag90              4860
sales_store_lag120             6480
sales_store_lag150             8100
sales_store_lag180             9720
sales_store_lag210            11340
sales_store_lag240            12960
sales_store_lag270            14580
sales_store_lag300            16200
sales_store_lag330            17820
sales_store_lag360            19440
ema_sales_store_lag_16_22      1188
ema_sales_store_lag_16_45      2430
ema_sales_store_lag_16_380    20520
dtype: int64


In [9]:
df_families = df.groupby(['date', 'family']).sales.sum().reset_index()
df_families = df_families.sort_values(['family', 'date']).reset_index(drop=True)

cols_week = []
for lag in list(range(16,23)):
    df_families[f'sales_family_lag{lag}'] = df_families.groupby(['family']).sales.shift(lag)
    cols_week.append(f'sales_family_lag{lag}')

cols_year = []
for lag in list(range(30,361,30)):
    df_families[f'sales_family_lag{lag}'] = df_families.groupby(['family']).sales.shift(lag)
    cols_year.append(f'sales_family_lag{lag}')

df_families['ema_sales_family_week'] = df_families.groupby(['family']).sales.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df_families['ema_sales_family_lag_16_22'] = df_families.groupby(['family']).ema_sales_family_week.shift(16)

df_families['ema_sales_family_month'] = df_families.groupby(['family']).sales.rolling(30, min_periods=30).mean().reset_index(drop=True)
df_families['ema_sales_family_lag_16_45'] = df_families.groupby(['family']).ema_sales_family_month.shift(16)

df_families['ema_sales_family_year'] = df_families.groupby(['family']).sales.rolling(365, min_periods=365).mean().reset_index(drop=True)
df_families['ema_sales_family_lag_16_380'] = df_families.groupby(['family']).ema_sales_family_year.shift(16)

df_families.drop(columns=['ema_sales_family_week', 'ema_sales_family_month', 'ema_sales_family_year', 'sales'], 
        inplace=True)

verify(df_families)
print(df_families[[i for i in df_families.columns if 'sales_family_lag' in i]].isna().sum())

(55671, 24)


Unnamed: 0,date,family,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
0,2013-01-02,AUTOMOTIVE,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-03,AUTOMOTIVE,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-04,AUTOMOTIVE,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-05,AUTOMOTIVE,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-06,AUTOMOTIVE,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,date,family,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
55666,2017-08-11,SEAFOOD,839.655002,842.090999,995.327001,1336.117001,1237.136004,1279.617002,832.490001,910.025003,733.046999,1586.752001,1338.790996,1122.169993,1549.751998,1499.210012,1000.610003,1168.591997,1412.043001,914.278999,1231.333995,994.971666,1106.508366,1214.055734
55667,2017-08-12,SEAFOOD,652.069999,839.655002,842.090999,995.327001,1336.117001,1237.136004,1279.617002,763.185,731.403995,1109.968,1663.270998,1316.079002,1092.972998,1449.434,668.240001,1081.247003,1833.255003,1648.302995,1272.509,909.246249,1097.978233,1213.060436
55668,2017-08-13,SEAFOOD,1307.945999,652.069999,839.655002,842.090999,995.327001,1336.117001,1237.136004,1168.711002,853.077994,1176.515991,1219.864,1143.293005,889.708,1597.529992,1297.765,1279.605003,1152.918007,1526.577002,1005.604001,1008.921187,1110.9081,1214.387918
55669,2017-08-14,SEAFOOD,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1336.117001,1116.871994,677.453,1207.565999,1380.766001,1855.046,981.652003,1101.452004,1157.602995,787.699001,1006.996007,1624.924003,1577.567993,1081.069392,1130.908133,1213.776773
55670,2017-08-15,SEAFOOD,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934


sales_family_lag16               528
sales_family_lag17               561
sales_family_lag18               594
sales_family_lag19               627
sales_family_lag20               660
sales_family_lag21               693
sales_family_lag22               726
sales_family_lag30               990
sales_family_lag60              1980
sales_family_lag90              2970
sales_family_lag120             3960
sales_family_lag150             4950
sales_family_lag180             5940
sales_family_lag210             6930
sales_family_lag240             7920
sales_family_lag270             8910
sales_family_lag300             9900
sales_family_lag330            10890
sales_family_lag360            11880
ema_sales_family_lag_16_22       726
ema_sales_family_lag_16_45      1485
ema_sales_family_lag_16_380    12540
dtype: int64


In [10]:
df.merge(df_stores, on=['date', 'store_nbr'], how='inner')

Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380
0,2013-01-02,1,AUTOMOTIVE,2.000,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-02,1,BABY CARE,0.000,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-02,1,BEAUTY,2.000,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-02,1,BEVERAGES,1091.000,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-02,1,BOOKS,0.000,0,2013,1,2,2,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3006229,2017-08-15,54,POULTRY,59.619,0,2017,8,15,1,0,72.895996,82.296,46.656,52.086,34.856,50.686,51.209,103.902,72.770,44.233,56.169,47.291,46.572,57.161,66.212,91.916,34.801,49.364998,29.669,6.453319e+01,69.364066,62.222482,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789
3006230,2017-08-15,54,PREPARED FOODS,94.000,0,2017,8,15,1,0,124.000000,81.000,68.000,73.000,81.000,65.000,112.000,50.000,79.000,81.000,29.000,70.000,49.000,98.000,103.000,74.000,70.000,80.000000,52.000,9.153548e+01,85.000000,75.816438,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789
3006231,2017-08-15,54,PRODUCE,915.371,76,2017,8,15,1,0,793.147000,729.844,562.097,520.489,559.658,914.959,654.930,981.712,580.389,506.987,559.538,659.408,426.046,862.417,816.129,613.487,555.718,549.655000,713.194,6.889917e+02,698.255533,655.923181,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789
3006232,2017-08-15,54,SCHOOL AND OFFICE SUPPLIES,0.000,0,2017,8,15,1,0,0.000000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,1.000,1.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000000,0.000,2.320649e-09,0.000000,1.679452,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789


In [11]:
df = df.dropna(how='any')
verify(df)

df_stores = df_stores.dropna(how='any')
verify(df_stores)

df_families = df_families.dropna(how='any')
verify(df_families)

(2329074, 32)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380
380,2014-01-17,1,AUTOMOTIVE,2.0,0,2014,1,17,4,0,0.0,2.0,1.0,0.0,4.0,6.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,5.0,1.0,2.0,0.0,1.0,1.0,1.474098,1.833333,2.142466
381,2014-01-18,1,AUTOMOTIVE,4.0,0,2014,1,18,5,1,3.0,0.0,2.0,1.0,0.0,4.0,6.0,0.0,4.0,1.0,5.0,4.0,2.0,4.0,6.0,5.0,1.0,2.0,3.0,1.855573,1.866667,2.145205
382,2014-01-19,1,AUTOMOTIVE,0.0,0,2014,1,19,6,1,0.0,3.0,0.0,2.0,1.0,0.0,4.0,5.0,6.0,1.0,2.0,4.0,3.0,3.0,2.0,3.0,4.0,4.0,0.0,1.39168,1.833333,2.136986
383,2014-01-20,1,AUTOMOTIVE,2.0,0,2014,1,20,0,0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,5.0,3.0,7.0,1.0,2.0,7.0,1.0,1.0,2.0,10.0,0.0,5.0,1.04376,1.833333,2.128767
384,2014-01-21,1,AUTOMOTIVE,9.0,0,2014,1,21,1,0,1.0,0.0,0.0,3.0,0.0,2.0,1.0,0.0,2.0,1.0,1.0,2.0,1.0,2.0,2.0,3.0,3.0,1.0,4.0,1.03282,1.8,2.117808


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380
3006229,2017-08-11,54,SEAFOOD,0.0,0,2017,8,11,4,0,3.0,7.0,1.0,5.0,3.0,0.0,3.0,1.0,5.0,4.0,1.0,2.0,2.0,2.0,1.0,7.0,1.0,0.0,5.0,3.470481,3.066667,1.934247
3006230,2017-08-12,54,SEAFOOD,1.0,1,2017,8,12,5,1,2.0,3.0,7.0,1.0,5.0,3.0,0.0,0.0,0.0,9.0,6.0,8.0,0.0,1.0,1.0,4.0,0.0,2.0,1.0,3.10286,2.766667,1.939726
3006231,2017-08-13,54,SEAFOOD,2.0,0,2017,8,13,6,1,4.0,2.0,3.0,7.0,1.0,5.0,3.0,0.0,3.0,9.0,0.0,1.0,0.0,2.0,2.0,2.0,3.0,2.0,2.0,3.327145,2.866667,1.945205
3006232,2017-08-14,54,SEAFOOD,0.0,0,2017,8,14,0,0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,0.0,4.0,2.0,0.0,4.0,2.0,1.0,1.0,2.0,0.0,0.0,3.495359,2.966667,1.956164
3006233,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123


(70578, 24)


Unnamed: 0,date,store_nbr,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380
380,2014-01-17,1,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628
381,2014-01-18,1,12034.864999,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,7930.986,6475.073999,2550.309995,5621.10698,5531.862,5047.601,5257.657,5426.115,5085.243,2056.88,5781.510002,6259.346,6070.274318,6722.666533,5227.030907
382,2014-01-19,1,11700.4514,12034.864999,0.0,4618.153,5754.39498,1819.251996,4532.439,10906.77801,7099.558,5879.459,5573.482,4568.639,5644.173,2207.966,2195.219,7112.197,5233.41299,5426.989018,4861.524,7477.818588,6807.393413,5242.995859
383,2014-01-20,1,10863.582,11700.4514,12034.864999,0.0,4618.153,5754.39498,1819.251996,8320.063,5481.078,5429.208,2216.21,5678.172998,5855.491,5225.463,4255.778004,4722.563002,4812.276,2295.712,5577.701,8324.259441,6972.171147,5256.540251
384,2014-01-21,1,4634.573,10863.582,11700.4514,12034.864999,0.0,4618.153,5754.39498,5450.846,6937.855,6429.41099,5487.169,4615.946,5112.657,5698.107,1621.783,6316.556,6387.455,5333.274984,5290.32701,7401.837831,7023.522413,5251.925971


Unnamed: 0,date,store_nbr,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380
91093,2017-08-11,54,6572.494,8241.906,10245.246998,16841.417,9739.265,10845.986,7097.149003,8042.298,11821.521,11318.824,7956.353002,8192.171,11721.063,7262.782,6872.459002,10855.031002,8966.58,6323.353,7951.739006,9412.730787,9832.126766,8773.782915
91094,2017-08-12,54,6911.345,6572.494,8241.906,10245.246998,16841.417,9739.265,10845.986,6722.425,9262.571,11251.186,6093.959,7520.89,8257.622,8834.506,6923.343,9892.957,13494.297,6812.701002,6246.614,8787.384341,9776.4357,8776.847039
91095,2017-08-13,54,9531.089998,6911.345,6572.494,8241.906,10245.246998,16841.417,9739.265,6765.057998,9678.241998,13391.214,8845.043,5348.335998,6807.944,11633.912,8206.350995,7604.733,8572.51,8997.745,6128.879997,8973.310755,9865.9205,8789.411924
91096,2017-08-14,54,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16841.417,9306.72,6506.621,9195.975,10599.092,7928.36,7077.091998,9775.383,9473.449002,9304.739002,8637.075,12835.125,6315.9,9449.669566,9927.3663,8801.114677
91097,2017-08-15,54,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789


(43131, 24)


Unnamed: 0,date,family,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
380,2014-01-17,AUTOMOTIVE,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616
381,2014-01-18,AUTOMOTIVE,331.0,4.0,239.0,389.0,386.0,432.0,297.0,196.0,176.0,381.0,190.0,240.0,296.0,377.0,308.0,203.0,313.0,181.0,198.0,264.366073,266.4,253.843836
382,2014-01-19,AUTOMOTIVE,299.0,331.0,4.0,239.0,389.0,386.0,432.0,225.0,238.0,193.0,306.0,215.0,258.0,390.0,362.0,268.0,227.0,275.0,156.0,273.024555,268.7,254.221918
383,2014-01-20,AUTOMOTIVE,404.0,299.0,331.0,4.0,239.0,389.0,386.0,325.0,200.0,206.0,346.0,204.0,294.0,212.0,368.0,244.0,183.0,390.0,177.0,305.768416,274.733333,254.865753
384,2014-01-21,AUTOMOTIVE,393.0,404.0,299.0,331.0,4.0,239.0,389.0,365.0,182.0,174.0,201.0,317.0,223.0,256.0,360.0,194.0,216.0,220.0,302.0,327.576312,280.566667,255.005479


Unnamed: 0,date,family,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
55666,2017-08-11,SEAFOOD,839.655002,842.090999,995.327001,1336.117001,1237.136004,1279.617002,832.490001,910.025003,733.046999,1586.752001,1338.790996,1122.169993,1549.751998,1499.210012,1000.610003,1168.591997,1412.043001,914.278999,1231.333995,994.971666,1106.508366,1214.055734
55667,2017-08-12,SEAFOOD,652.069999,839.655002,842.090999,995.327001,1336.117001,1237.136004,1279.617002,763.185,731.403995,1109.968,1663.270998,1316.079002,1092.972998,1449.434,668.240001,1081.247003,1833.255003,1648.302995,1272.509,909.246249,1097.978233,1213.060436
55668,2017-08-13,SEAFOOD,1307.945999,652.069999,839.655002,842.090999,995.327001,1336.117001,1237.136004,1168.711002,853.077994,1176.515991,1219.864,1143.293005,889.708,1597.529992,1297.765,1279.605003,1152.918007,1526.577002,1005.604001,1008.921187,1110.9081,1214.387918
55669,2017-08-14,SEAFOOD,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1336.117001,1116.871994,677.453,1207.565999,1380.766001,1855.046,981.652003,1101.452004,1157.602995,787.699001,1006.996007,1624.924003,1577.567993,1081.069392,1130.908133,1213.776773
55670,2017-08-15,SEAFOOD,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934


In [12]:
df = df.merge(df_stores, on=['date', 'store_nbr'], how='inner')
del df_stores

df = df.merge(df_families, on=['date', 'family'], how='inner')
del df_families

verify(df)

(2329074, 76)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
0,2014-01-17,1,AUTOMOTIVE,2.0,0,2014,1,17,4,0,0.0,2.0,1.0,0.0,4.0,6.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,5.0,1.0,2.0,0.0,1.0,1.0,1.474098,1.833333,2.142466,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616
1,2014-01-17,2,AUTOMOTIVE,7.0,0,2014,1,17,4,0,0.0,8.0,6.0,3.0,5.0,8.0,2.0,6.0,2.0,6.0,4.0,3.0,2.0,1.0,2.0,1.0,6.0,4.0,2.0,4.404975,4.3,3.693151,0.0,10784.24,10471.023,7575.10397,8975.967,8798.078,8370.09303,10808.051,7932.985,10556.369999,5589.83701,7364.131996,8322.357,7312.826996,6642.774,7047.772,10146.848,5138.403004,6157.080001,7584.884739,9909.063967,7884.668154,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616
2,2014-01-17,3,AUTOMOTIVE,9.0,0,2014,1,17,4,0,0.0,2.0,10.0,15.0,9.0,6.0,8.0,3.0,1.0,12.0,4.0,7.0,5.0,11.0,3.0,3.0,7.0,3.0,3.0,5.542701,5.9,7.005479,0.0,20350.2601,25879.015,18258.696,21290.041985,19884.88404,21118.82403,24811.254,17359.445998,24412.431,12854.207,15038.87098,20147.16797,15494.618,13900.119,15323.494,20366.3,12110.657,12857.139024,16574.644444,22231.457402,17765.34112,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616
3,2014-01-17,4,AUTOMOTIVE,5.0,0,2014,1,17,4,0,0.0,2.0,6.0,11.0,5.0,2.0,7.0,2.0,5.0,1.0,1.0,5.0,3.0,1.0,2.0,2.0,8.0,5.0,0.0,3.920018,4.233333,4.010959,0.0,10644.794,10491.676,7533.10901,8934.769,8538.423,7036.212998,7780.486,6558.057,8659.599,4549.53699,6233.079,8906.79,5507.104,5259.091998,6066.549011,8921.207978,4989.73,5389.75199,7432.554631,8902.319868,7189.951863,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616
4,2014-01-17,5,AUTOMOTIVE,4.0,0,2014,1,17,4,0,0.0,2.0,5.0,6.0,5.0,11.0,2.0,3.0,4.0,6.0,2.0,6.0,8.0,1.0,2.0,2.0,3.0,2.0,7.0,3.284882,4.3,4.446575,0.0,11653.234,14030.74,9906.029,10650.37904,10926.648,9302.268,8913.484,6423.434,7281.10198,4874.799,5928.438,8489.368,5749.733,5472.49699,6319.16301,6389.314,5130.57,6079.274,8935.970934,9842.768936,7106.665439,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380
2329069,2017-08-15,50,SEAFOOD,15.314,0,2017,8,15,1,0,19.564,38.183,45.765,19.574,15.565001,18.085,11.97,22.605999,8.0,18.102,17.511,23.0,7.0,25.07,23.528,25.161,27.047,28.487999,40.402,25.780637,21.412933,21.474304,28905.006,29523.784,22359.163,17047.749,18063.433001,18572.603,20176.261004,27946.379999,21381.688,17301.502,18540.736,30445.085,14912.749,17249.56,37987.356995,19786.152,14007.13798,16744.163999,19723.38602,24268.343902,22556.048994,21448.769026,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934
2329070,2017-08-15,51,SEAFOOD,52.876,0,2017,8,15,1,0,75.666,54.773,55.965,24.253,40.159,36.005,47.489,59.424,58.518,66.958,37.850998,53.438,26.071,44.687,58.608,66.229004,54.069,63.306,88.277,54.055013,50.628067,53.342526,31740.339,25381.37296,25944.219,17206.981,25735.891,21278.683,25309.882,31024.755,30187.64102,28664.33003,23268.793998,25029.97804,18654.792,22051.198,36331.785,25291.539004,23468.32797,20768.45201,27109.459,25726.025906,24509.603702,23697.171516,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934
2329071,2017-08-15,52,SEAFOOD,7.0,0,2017,8,15,1,0,12.0,12.0,12.0,8.0,5.0,8.0,5.414,17.0,6.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.390481,9.696067,2.945975,26874.19797,33950.49397,27263.18097,17526.184004,23978.732,17653.922,20484.642,30216.529,22425.545,24600.065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25903.03636,23262.360796,6385.428436,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934
2329072,2017-08-15,53,SEAFOOD,5.0,0,2017,8,15,1,0,2.0,2.0,4.0,4.0,0.0,6.0,2.0,5.0,7.0,3.0,4.0,7.0,6.0,12.0,6.0,14.0,3.0,2.0,1.0,2.911979,3.933333,6.314082,17141.285,14961.53703,11183.506,7726.762,12397.486,8396.316,11461.163,16387.04,12498.094,11484.32,10786.439,14720.722,9047.348,10468.129,19251.229,10280.446,9647.294,9941.111,15296.490004,13194.485283,12230.967169,12358.954583,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934
2329073,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934


# Oil 

In [13]:
df_oil = pd.read_csv('data/oil_clean.csv')

cols_week = []
for lag in list(range(1,8)):
    df_oil[f'dcoilwtico_lag{lag}'] = df_oil.dcoilwtico.shift(lag)
    cols_week.append(f'dcoilwtico_lag{lag}')

df_oil['ema_dcoilwtico_week'] = df_oil.dcoilwtico.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df_oil['ema_dcoilwtico_lag_1_7'] = df_oil.ema_dcoilwtico_week.shift(1)

df_oil['ema_dcoilwtico_month'] = df_oil.dcoilwtico.rolling(30, min_periods=30).mean().reset_index(drop=True)
df_oil['ema_dcoilwtico_lag_1_30'] = df_oil.ema_dcoilwtico_month.shift(1)

df_oil['ema_dcoilwtico_year'] = df_oil.dcoilwtico.rolling(365, min_periods=365).mean().reset_index(drop=True)
df_oil['ema_dcoilwtico_lag_1_365'] = df_oil.ema_dcoilwtico_month.shift(1)

df_oil.drop(columns=['ema_dcoilwtico_week', 'ema_dcoilwtico_month', 'ema_dcoilwtico_year'], 
            inplace=True)
verify(df_oil)

(1704, 12)


Unnamed: 0,date,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365
0,2013-01-01,93.14,,,,,,,,,,
1,2013-01-02,93.14,93.14,,,,,,,,,
2,2013-01-03,92.97,93.14,93.14,,,,,,,,
3,2013-01-04,93.12,92.97,93.14,93.14,,,,,,,
4,2013-01-05,93.12,93.12,92.97,93.14,93.14,,,,,,


Unnamed: 0,date,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365
1699,2017-08-27,47.65,47.65,47.65,47.24,48.45,47.65,47.39,48.59,47.765076,48.657667,48.657667
1700,2017-08-28,46.4,47.65,47.65,47.65,47.24,48.45,47.65,47.39,47.736307,48.588667,48.588667
1701,2017-08-29,46.46,46.4,47.65,47.65,47.65,47.24,48.45,47.65,47.40223,48.478,48.478
1702,2017-08-30,45.96,46.46,46.4,47.65,47.65,47.65,47.24,48.45,47.166673,48.369333,48.369333
1703,2017-08-31,47.26,45.96,46.46,46.4,47.65,47.65,47.65,47.24,46.865004,48.227667,48.227667


In [14]:
df_oil['date'] = pd.to_datetime(df_oil['date'])

In [15]:
df = df.merge(df_oil, on='date', how='left')
verify(df)

(2329074, 87)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365
0,2014-01-17,1,AUTOMOTIVE,2.0,0,2014,1,17,4,0,0.0,2.0,1.0,0.0,4.0,6.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,5.0,1.0,2.0,0.0,1.0,1.0,1.474098,1.833333,2.142466,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333
1,2014-01-17,2,AUTOMOTIVE,7.0,0,2014,1,17,4,0,0.0,8.0,6.0,3.0,5.0,8.0,2.0,6.0,2.0,6.0,4.0,3.0,2.0,1.0,2.0,1.0,6.0,4.0,2.0,4.404975,4.3,3.693151,0.0,10784.24,10471.023,7575.10397,8975.967,8798.078,8370.09303,10808.051,7932.985,10556.369999,5589.83701,7364.131996,8322.357,7312.826996,6642.774,7047.772,10146.848,5138.403004,6157.080001,7584.884739,9909.063967,7884.668154,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333
2,2014-01-17,3,AUTOMOTIVE,9.0,0,2014,1,17,4,0,0.0,2.0,10.0,15.0,9.0,6.0,8.0,3.0,1.0,12.0,4.0,7.0,5.0,11.0,3.0,3.0,7.0,3.0,3.0,5.542701,5.9,7.005479,0.0,20350.2601,25879.015,18258.696,21290.041985,19884.88404,21118.82403,24811.254,17359.445998,24412.431,12854.207,15038.87098,20147.16797,15494.618,13900.119,15323.494,20366.3,12110.657,12857.139024,16574.644444,22231.457402,17765.34112,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333
3,2014-01-17,4,AUTOMOTIVE,5.0,0,2014,1,17,4,0,0.0,2.0,6.0,11.0,5.0,2.0,7.0,2.0,5.0,1.0,1.0,5.0,3.0,1.0,2.0,2.0,8.0,5.0,0.0,3.920018,4.233333,4.010959,0.0,10644.794,10491.676,7533.10901,8934.769,8538.423,7036.212998,7780.486,6558.057,8659.599,4549.53699,6233.079,8906.79,5507.104,5259.091998,6066.549011,8921.207978,4989.73,5389.75199,7432.554631,8902.319868,7189.951863,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333
4,2014-01-17,5,AUTOMOTIVE,4.0,0,2014,1,17,4,0,0.0,2.0,5.0,6.0,5.0,11.0,2.0,3.0,4.0,6.0,2.0,6.0,8.0,1.0,2.0,2.0,3.0,2.0,7.0,3.284882,4.3,4.446575,0.0,11653.234,14030.74,9906.029,10650.37904,10926.648,9302.268,8913.484,6423.434,7281.10198,4874.799,5928.438,8489.368,5749.733,5472.49699,6319.16301,6389.314,5130.57,6079.274,8935.970934,9842.768936,7106.665439,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365
2329069,2017-08-15,50,SEAFOOD,15.314,0,2017,8,15,1,0,19.564,38.183,45.765,19.574,15.565001,18.085,11.97,22.605999,8.0,18.102,17.511,23.0,7.0,25.07,23.528,25.161,27.047,28.487999,40.402,25.780637,21.412933,21.474304,28905.006,29523.784,22359.163,17047.749,18063.433001,18572.603,20176.261004,27946.379999,21381.688,17301.502,18540.736,30445.085,14912.749,17249.56,37987.356995,19786.152,14007.13798,16744.163999,19723.38602,24268.343902,22556.048994,21448.769026,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274
2329070,2017-08-15,51,SEAFOOD,52.876,0,2017,8,15,1,0,75.666,54.773,55.965,24.253,40.159,36.005,47.489,59.424,58.518,66.958,37.850998,53.438,26.071,44.687,58.608,66.229004,54.069,63.306,88.277,54.055013,50.628067,53.342526,31740.339,25381.37296,25944.219,17206.981,25735.891,21278.683,25309.882,31024.755,30187.64102,28664.33003,23268.793998,25029.97804,18654.792,22051.198,36331.785,25291.539004,23468.32797,20768.45201,27109.459,25726.025906,24509.603702,23697.171516,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274
2329071,2017-08-15,52,SEAFOOD,7.0,0,2017,8,15,1,0,12.0,12.0,12.0,8.0,5.0,8.0,5.414,17.0,6.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.390481,9.696067,2.945975,26874.19797,33950.49397,27263.18097,17526.184004,23978.732,17653.922,20484.642,30216.529,22425.545,24600.065,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25903.03636,23262.360796,6385.428436,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274
2329072,2017-08-15,53,SEAFOOD,5.0,0,2017,8,15,1,0,2.0,2.0,4.0,4.0,0.0,6.0,2.0,5.0,7.0,3.0,4.0,7.0,6.0,12.0,6.0,14.0,3.0,2.0,1.0,2.911979,3.933333,6.314082,17141.285,14961.53703,11183.506,7726.762,12397.486,8396.316,11461.163,16387.04,12498.094,11484.32,10786.439,14720.722,9047.348,10468.129,19251.229,10280.446,9647.294,9941.111,15296.490004,13194.485283,12230.967169,12358.954583,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274
2329073,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274


# Stores

In [16]:
df_stores = pd.read_csv('data/stores.csv')
verify(df_stores)

(54, 5)


Unnamed: 0,store_nbr,city,state,type,cluster
0,1,Quito,Pichincha,D,13
1,2,Quito,Pichincha,D,13
2,3,Quito,Pichincha,D,8
3,4,Quito,Pichincha,D,9
4,5,Santo Domingo,Santo Domingo de los Tsachilas,D,4


Unnamed: 0,store_nbr,city,state,type,cluster
49,50,Ambato,Tungurahua,A,14
50,51,Guayaquil,Guayas,A,17
51,52,Manta,Manabi,A,11
52,53,Manta,Manabi,D,13
53,54,El Carmen,Manabi,C,3


In [17]:
df = df.merge(df_stores, on='store_nbr', how='inner')
verify(df)

(2329074, 91)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365,city,state,type,cluster
0,2014-01-17,1,AUTOMOTIVE,2.0,0,2014,1,17,4,0,0.0,2.0,1.0,0.0,4.0,6.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,5.0,1.0,2.0,0.0,1.0,1.0,1.474098,1.833333,2.142466,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13
1,2014-01-17,1,BABY CARE,0.0,0,2014,1,17,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13
2,2014-01-17,1,BEAUTY,4.0,0,2014,1,17,4,0,0.0,1.0,3.0,0.0,2.0,0.0,5.0,0.0,4.0,3.0,0.0,2.0,1.0,0.0,3.0,1.0,1.0,4.0,1.0,1.23805,1.5,1.780822,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,2.0,107.0,163.0,161.0,198.0,143.0,169.0,102.0,101.0,197.0,88.0,134.0,195.0,84.0,104.0,104.0,132.0,96.0,106.0,113.078043,132.666667,130.208219,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13
3,2014-01-17,1,BEVERAGES,2085.0,0,2014,1,17,4,0,0.0,840.0,1001.0,242.0,803.0,1025.0,1009.0,1122.0,901.0,981.0,848.0,721.0,279.0,901.0,871.0,853.0,985.0,910.0,1037.0,652.12298,942.233333,858.071233,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,2175.0,92017.0,96102.0,78396.0,88614.0,77409.0,73168.0,63989.0,56867.0,80806.0,43939.0,50801.0,69215.0,50770.0,47331.0,47858.0,75968.0,41292.0,42931.0,65460.06741,71811.233333,59902.942466,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13
4,2014-01-17,1,BOOKS,0.0,0,2014,1,17,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365,city,state,type,cluster
2329069,2017-08-15,54,POULTRY,59.619,0,2017,8,15,1,0,72.895996,82.296,46.656,52.086,34.856,50.686,51.209,103.902,72.77,44.233,56.169,47.291,46.572,57.161,66.212,91.916,34.801,49.364998,29.669,64.53319,69.364066,62.222482,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,25089.244942,22477.071865,28521.282225,14552.290992,16548.68701,16769.651937,19464.66103,26360.76801,31804.989046,18491.03,17711.450004,23457.77709,16029.954,17694.390035,26878.77905,28038.540014,15365.961035,16143.457978,21158.702034,22225.768154,21734.075462,21059.267403,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3
2329070,2017-08-15,54,PREPARED FOODS,94.0,0,2017,8,15,1,0,124.0,81.0,68.0,73.0,81.0,65.0,112.0,50.0,79.0,81.0,29.0,70.0,49.0,98.0,103.0,74.0,70.0,80.0,52.0,91.53548,85.0,75.816438,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,5507.523005,5408.918016,4722.614983,4168.786016,4456.418986,4129.020989,4823.321997,5534.052983,5001.44601,4738.09202,4342.841024,7189.56103,5153.443006,5341.388989,7448.360985,5268.237992,4946.68898,5305.362987,6106.435004,4968.95479,4894.940064,5550.034376,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3
2329071,2017-08-15,54,PRODUCE,915.371,76,2017,8,15,1,0,793.147,729.844,562.097,520.489,559.658,914.959,654.93,981.712,580.389,506.987,559.538,659.408,426.046,862.417,816.129,613.487,555.718,549.655,713.194,688.9917,698.255533,655.923181,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,146045.795,131988.66197,111948.89,90481.2861,145917.9191,115457.6587,117117.09501,157883.056,124173.9535,166802.7749,115072.02088,139267.04003,95892.37803,124250.2049,155663.9546,102969.51783,138579.652,104423.301,120558.7378,127348.465736,129450.195936,125742.800885,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3
2329072,2017-08-15,54,SCHOOL AND OFFICE SUPPLIES,0.0,0,2017,8,15,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.320649e-09,0.0,1.679452,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,2027.0,1345.0,567.0,244.0,166.0,174.0,192.0,121.0,58.0,215.0,718.0,150.0,60.0,80.0,129.0,71.0,69.0,146.0,787.0,925.102321,239.033333,300.334247,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3
2329073,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3


# Holidays Events

In [18]:
df_holidays = pd.read_csv('data/holidays_events.csv')
verify(df_holidays)

(350, 6)


Unnamed: 0,date,type,locale,locale_name,description,transferred
0,2012-03-02,Holiday,Local,Manta,Fundacion de Manta,False
1,2012-04-01,Holiday,Regional,Cotopaxi,Provincializacion de Cotopaxi,False
2,2012-04-12,Holiday,Local,Cuenca,Fundacion de Cuenca,False
3,2012-04-14,Holiday,Local,Libertad,Cantonizacion de Libertad,False
4,2012-04-21,Holiday,Local,Riobamba,Cantonizacion de Riobamba,False


Unnamed: 0,date,type,locale,locale_name,description,transferred
345,2017-12-22,Additional,National,Ecuador,Navidad-3,False
346,2017-12-23,Additional,National,Ecuador,Navidad-2,False
347,2017-12-24,Additional,National,Ecuador,Navidad-1,False
348,2017-12-25,Holiday,National,Ecuador,Navidad,False
349,2017-12-26,Additional,National,Ecuador,Navidad+1,False


In [19]:
df_holidays['date'] = pd.to_datetime(df_holidays['date'])
df_holidays = df_holidays[df_holidays['transferred'] == False]
df_holidays.shape

(338, 6)

In [20]:
df_holidays.type.unique()

array(['Holiday', 'Transfer', 'Additional', 'Bridge', 'Work Day', 'Event'],
      dtype=object)

In [21]:
day_off = df_holidays[(df_holidays['locale'] == 'National') & 
                      (df_holidays['type'].isin(['Transfer', 'Additional', 'Bridge']))].date.unique()
df['national_holiday'] = [1 if date in day_off else 0 for date in df.date]
df.national_holiday.value_counts()

0    2275614
1      53460
Name: national_holiday, dtype: int64

In [22]:
event = df_holidays[(df_holidays['locale'] == 'National') & 
                    (df_holidays['type'] == 'Event')].date.unique()
df['national_event'] = [1 if date in event else 0 for date in df.date]
df.national_event.value_counts()

0    2232846
1      96228
Name: national_event, dtype: int64

In [23]:
print(set(df_holidays.locale_name) & set(df.city))

dates_local_holidays = df_holidays[(df_holidays['locale'] == 'Local')].set_index('date').locale_name.to_dict()
df['local_holiday'] = [1 if date in dates_local_holidays and dates_local_holidays[date]== city else 0 for city,date in zip(df.city,df.date)]
df.local_holiday.value_counts()

{'El Carmen', 'Puyo', 'Guaranda', 'Salinas', 'Quevedo', 'Loja', 'Ibarra', 'Cuenca', 'Esmeraldas', 'Cayambe', 'Manta', 'Riobamba', 'Quito', 'Ambato', 'Machala', 'Santo Domingo', 'Guayaquil', 'Libertad', 'Latacunga'}


0    2320329
1       8745
Name: local_holiday, dtype: int64

In [24]:
print(set(df_holidays.locale_name) & set(df.state))

dates_regional_holidays = df_holidays[(df_holidays['locale'] == 'Regional')].set_index('date').locale_name.to_dict()
df['regional_holiday'] = [1 if date in dates_regional_holidays and dates_regional_holidays[date]== state else 0 for state,date in zip(df.state,df.date)]
df.regional_holiday.value_counts()

{'Santa Elena', 'Santo Domingo de los Tsachilas', 'Esmeraldas', 'Loja', 'Imbabura', 'Cotopaxi'}


0    2328282
1        792
Name: regional_holiday, dtype: int64

# Transactions

In [25]:
df_transactions = pd.read_csv('data/transactions_clean.csv')
verify(df_transactions)

(91152, 3)


Unnamed: 0,date,store_nbr,transactions
0,2013-01-01,25.0,770.0
1,2013-01-02,25.0,1038.0
2,2013-01-03,25.0,887.0
3,2013-01-04,25.0,1054.0
4,2013-01-05,25.0,1355.0


Unnamed: 0,date,store_nbr,transactions
91147,2017-08-11,52.0,2957.0
91148,2017-08-12,52.0,2804.0
91149,2017-08-13,52.0,2433.0
91150,2017-08-14,52.0,2074.0
91151,2017-08-15,52.0,2255.0


In [26]:
df_transactions['date'] = pd.to_datetime(df_transactions['date'])
df_transactions = df_transactions[df_transactions['date'] >= pd.to_datetime('2013-01-02')]

In [27]:
df_stores = df_transactions.copy()
df_stores = df_stores.sort_values(['store_nbr', 'date']).reset_index(drop=True)

cols_week = []
for lag in list(range(16,23)):
    df_stores[f'transactions_store_lag{lag}'] = df_stores.groupby(['store_nbr']).transactions.shift(lag)
    cols_week.append(f'transactions_store_lag{lag}')

cols_year = []
for lag in list(range(30,361,30)):
    df_stores[f'transactions_store_lag{lag}'] = df_stores.groupby(['store_nbr']).transactions.shift(lag)
    cols_year.append(f'transactions_store_lag{lag}')

df_stores['ema_transactions_store_week'] = df_stores.groupby(['store_nbr']).transactions.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df_stores['ema_transactions_store_lag_16_22'] = df_stores.groupby(['store_nbr']).ema_transactions_store_week.shift(16)

df_stores['ema_transactions_store_month'] = df_stores.groupby(['store_nbr']).transactions.rolling(30, min_periods=30).mean().reset_index(drop=True)
df_stores['ema_transactions_store_lag_16_45'] = df_stores.groupby(['store_nbr']).ema_transactions_store_month.shift(16)

df_stores['ema_transactions_store_year'] = df_stores.groupby(['store_nbr']).transactions.rolling(365, min_periods=365).mean().reset_index(drop=True)
df_stores['ema_transactions_store_lag_16_380'] = df_stores.groupby(['store_nbr']).ema_transactions_store_year.shift(16)

df_stores.drop(columns=['ema_transactions_store_week', 'ema_transactions_store_month', 'ema_transactions_store_year', 
                        'transactions'], 
        inplace=True)

verify(df_stores)
print(df_stores[[i for i in df_stores.columns if 'transactions_store_lag' in i]].isna().sum())

(91098, 24)


Unnamed: 0,date,store_nbr,transactions_store_lag16,transactions_store_lag17,transactions_store_lag18,transactions_store_lag19,transactions_store_lag20,transactions_store_lag21,transactions_store_lag22,transactions_store_lag30,transactions_store_lag60,transactions_store_lag90,transactions_store_lag120,transactions_store_lag150,transactions_store_lag180,transactions_store_lag210,transactions_store_lag240,transactions_store_lag270,transactions_store_lag300,transactions_store_lag330,transactions_store_lag360,ema_transactions_store_lag_16_22,ema_transactions_store_lag_16_45,ema_transactions_store_lag_16_380
0,2013-01-02,1.0,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-03,1.0,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-04,1.0,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-05,1.0,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-06,1.0,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,date,store_nbr,transactions_store_lag16,transactions_store_lag17,transactions_store_lag18,transactions_store_lag19,transactions_store_lag20,transactions_store_lag21,transactions_store_lag22,transactions_store_lag30,transactions_store_lag60,transactions_store_lag90,transactions_store_lag120,transactions_store_lag150,transactions_store_lag180,transactions_store_lag210,transactions_store_lag240,transactions_store_lag270,transactions_store_lag300,transactions_store_lag330,transactions_store_lag360,ema_transactions_store_lag_16_22,ema_transactions_store_lag_16_45,ema_transactions_store_lag_16_380
91093,2017-08-11,54.0,700.0,727.0,792.0,926.0,876.0,676.0,647.0,692.0,781.0,1112.0,826.0,699.0,1073.0,766.0,746.0,832.0,927.0,712.0,804.0,761.164081,788.733333,829.961644
91094,2017-08-12,54.0,662.0,700.0,727.0,792.0,926.0,876.0,676.0,683.0,770.0,1212.0,834.0,800.0,886.0,958.0,729.0,798.0,1142.0,778.0,679.0,736.373061,785.233333,829.821918
91095,2017-08-13,54.0,766.0,662.0,700.0,727.0,792.0,926.0,876.0,676.0,731.0,884.0,898.0,613.0,1035.0,1029.0,862.0,741.0,806.0,954.0,709.0,743.779796,788.7,830.079452
91096,2017-08-14,54.0,870.0,766.0,662.0,700.0,727.0,792.0,926.0,855.0,672.0,841.0,976.0,787.0,742.0,904.0,984.0,705.0,778.0,1091.0,752.0,775.334847,793.8,830.383562
91097,2017-08-15,54.0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932


transactions_store_lag16               864
transactions_store_lag17               918
transactions_store_lag18               972
transactions_store_lag19              1026
transactions_store_lag20              1080
transactions_store_lag21              1134
transactions_store_lag22              1188
transactions_store_lag30              1620
transactions_store_lag60              3240
transactions_store_lag90              4860
transactions_store_lag120             6480
transactions_store_lag150             8100
transactions_store_lag180             9720
transactions_store_lag210            11340
transactions_store_lag240            12960
transactions_store_lag270            14580
transactions_store_lag300            16200
transactions_store_lag330            17820
transactions_store_lag360            19440
ema_transactions_store_lag_16_22      1188
ema_transactions_store_lag_16_45      2430
ema_transactions_store_lag_16_380    20520
dtype: int64


In [28]:
df_transactions = df_transactions.groupby("date").transactions.sum().reset_index()

cols_week = []
for lag in list(range(16,23)):
    df_transactions[f'transactions_lag{lag}'] = df_transactions.transactions.shift(lag)
    cols_week.append(f'transactions_lag{lag}')

cols_year = []
for lag in list(range(30,361,30)):
    df_transactions[f'transactions_lag{lag}'] = df_transactions.transactions.shift(lag)
    cols_year.append(f'transactions_lag{lag}')

df_transactions['ema_transactions_week'] = df_transactions.transactions.ewm(span=7, adjust=False, min_periods=7).mean().reset_index(drop=True)
df_transactions['ema_transactions_lag_16_22'] = df_transactions.ema_transactions_week.shift(16)

df_transactions['ema_transactions_month'] = df_transactions.transactions.rolling(30, min_periods=30).mean().reset_index(drop=True)
df_transactions['ema_transactions_lag_16_45'] = df_transactions.ema_transactions_month.shift(16)

df_transactions['ema_transactions_year'] = df_transactions.transactions.rolling(365, min_periods=365).mean().reset_index(drop=True)
df_transactions['ema_transactions_lag_16_380'] = df_transactions.ema_transactions_year.shift(16)

df_transactions.drop(columns=['ema_transactions_week', 'ema_transactions_month', 'ema_transactions_year',
                              'transactions'], 
        inplace=True)

verify(df_transactions)
print(df_transactions[[i for i in df_transactions.columns if 'transactions_lag' in i]].isna().sum())

(1687, 23)


Unnamed: 0,date,transactions_lag16,transactions_lag17,transactions_lag18,transactions_lag19,transactions_lag20,transactions_lag21,transactions_lag22,transactions_lag30,transactions_lag60,transactions_lag90,transactions_lag120,transactions_lag150,transactions_lag180,transactions_lag210,transactions_lag240,transactions_lag270,transactions_lag300,transactions_lag330,transactions_lag360,ema_transactions_lag_16_22,ema_transactions_lag_16_45,ema_transactions_lag_16_380
0,2013-01-02,,,,,,,,,,,,,,,,,,,,,,
1,2013-01-03,,,,,,,,,,,,,,,,,,,,,,
2,2013-01-04,,,,,,,,,,,,,,,,,,,,,,
3,2013-01-05,,,,,,,,,,,,,,,,,,,,,,
4,2013-01-06,,,,,,,,,,,,,,,,,,,,,,


Unnamed: 0,date,transactions_lag16,transactions_lag17,transactions_lag18,transactions_lag19,transactions_lag20,transactions_lag21,transactions_lag22,transactions_lag30,transactions_lag60,transactions_lag90,transactions_lag120,transactions_lag150,transactions_lag180,transactions_lag210,transactions_lag240,transactions_lag270,transactions_lag300,transactions_lag330,transactions_lag360,ema_transactions_lag_16_22,ema_transactions_lag_16_45,ema_transactions_lag_16_380
1682,2017-08-11,82113.0,81930.0,85120.0,94170.0,97358.0,88613.0,82423.0,84155.0,81080.0,121889.0,90752.0,78200.0,93429.0,84068.0,89417.0,77317.0,92768.0,74741.0,81063.0,86004.902876,88614.466667,87278.339726
1683,2017-08-12,78743.0,82113.0,81930.0,85120.0,94170.0,97358.0,88613.0,78720.0,81063.0,101257.0,86517.0,84574.0,93688.0,99921.0,91525.0,76651.0,93951.0,80789.0,81405.0,84189.427157,88598.366667,87281.112329
1684,2017-08-13,89197.0,78743.0,82113.0,81930.0,85120.0,94170.0,97358.0,87671.0,82752.0,83716.0,97165.0,79030.0,99371.0,97107.0,99717.0,81603.0,76614.0,94298.0,75159.0,85441.320368,88886.566667,87328.405479
1685,2017-08-14,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,94170.0,100293.0,81831.0,85528.0,90664.0,87303.0,80501.0,84038.0,114465.0,74999.0,73632.0,92375.0,82021.0,89137.740276,89630.066667,87377.958904
1686,2017-08-15,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274


transactions_lag16              16
transactions_lag17              17
transactions_lag18              18
transactions_lag19              19
transactions_lag20              20
transactions_lag21              21
transactions_lag22              22
transactions_lag30              30
transactions_lag60              60
transactions_lag90              90
transactions_lag120            120
transactions_lag150            150
transactions_lag180            180
transactions_lag210            210
transactions_lag240            240
transactions_lag270            270
transactions_lag300            300
transactions_lag330            330
transactions_lag360            360
ema_transactions_lag_16_22      22
ema_transactions_lag_16_45      45
ema_transactions_lag_16_380    380
dtype: int64


In [29]:
df = df.merge(df_stores, on=['date', 'store_nbr'], how='inner')
del df_stores

df = df.merge(df_transactions, on=['date'], how='inner')
del df_transactions

verify(df)

(2329074, 139)


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365,city,state,type,cluster,national_holiday,national_event,local_holiday,regional_holiday,transactions_store_lag16,transactions_store_lag17,transactions_store_lag18,transactions_store_lag19,transactions_store_lag20,transactions_store_lag21,transactions_store_lag22,transactions_store_lag30,transactions_store_lag60,transactions_store_lag90,transactions_store_lag120,transactions_store_lag150,transactions_store_lag180,transactions_store_lag210,transactions_store_lag240,transactions_store_lag270,transactions_store_lag300,transactions_store_lag330,transactions_store_lag360,ema_transactions_store_lag_16_22,ema_transactions_store_lag_16_45,ema_transactions_store_lag_16_380,transactions_lag16,transactions_lag17,transactions_lag18,transactions_lag19,transactions_lag20,transactions_lag21,transactions_lag22,transactions_lag30,transactions_lag60,transactions_lag90,transactions_lag120,transactions_lag150,transactions_lag180,transactions_lag210,transactions_lag240,transactions_lag270,transactions_lag300,transactions_lag330,transactions_lag360,ema_transactions_lag_16_22,ema_transactions_lag_16_45,ema_transactions_lag_16_380
0,2014-01-17,1,AUTOMOTIVE,2.0,0,2014,1,17,4,0,0.0,2.0,1.0,0.0,4.0,6.0,2.0,1.0,1.0,1.0,2.0,1.0,1.0,5.0,1.0,2.0,0.0,1.0,1.0,1.474098,1.833333,2.142466,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,4.0,239.0,389.0,386.0,432.0,297.0,317.0,198.0,200.0,329.0,159.0,224.0,364.0,206.0,284.0,206.0,344.0,164.0,163.0,242.154764,264.933333,253.635616,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13,0,0,0,0,0.0,1105.0,1388.0,416.0,1070.0,2022.0,1980.0,2337.0,1881.0,1346.0,1685.0,1670.0,448.0,1700.0,1865.0,1838.0,1358.0,1756.0,1762.0,965.07268,1592.7,1514.463014,1327.0,117214.0,117645.0,88729.0,102858.0,98330.0,96002.0,102356.0,76133.0,93205.0,67209.0,76629.0,80799.0,73717.0,71763.0,75157.0,91923.0,68246.0,68988.0,79226.96782,92817.166667,79747.205479
1,2014-01-17,1,BABY CARE,0.0,0,2014,1,17,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13,0,0,0,0,0.0,1105.0,1388.0,416.0,1070.0,2022.0,1980.0,2337.0,1881.0,1346.0,1685.0,1670.0,448.0,1700.0,1865.0,1838.0,1358.0,1756.0,1762.0,965.07268,1592.7,1514.463014,1327.0,117214.0,117645.0,88729.0,102858.0,98330.0,96002.0,102356.0,76133.0,93205.0,67209.0,76629.0,80799.0,73717.0,71763.0,75157.0,91923.0,68246.0,68988.0,79226.96782,92817.166667,79747.205479
2,2014-01-17,1,BEAUTY,4.0,0,2014,1,17,4,0,0.0,1.0,3.0,0.0,2.0,0.0,5.0,0.0,4.0,3.0,0.0,2.0,1.0,0.0,3.0,1.0,1.0,4.0,1.0,1.23805,1.5,1.780822,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,2.0,107.0,163.0,161.0,198.0,143.0,169.0,102.0,101.0,197.0,88.0,134.0,195.0,84.0,104.0,104.0,132.0,96.0,106.0,113.078043,132.666667,130.208219,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13,0,0,0,0,0.0,1105.0,1388.0,416.0,1070.0,2022.0,1980.0,2337.0,1881.0,1346.0,1685.0,1670.0,448.0,1700.0,1865.0,1838.0,1358.0,1756.0,1762.0,965.07268,1592.7,1514.463014,1327.0,117214.0,117645.0,88729.0,102858.0,98330.0,96002.0,102356.0,76133.0,93205.0,67209.0,76629.0,80799.0,73717.0,71763.0,75157.0,91923.0,68246.0,68988.0,79226.96782,92817.166667,79747.205479
3,2014-01-17,1,BEVERAGES,2085.0,0,2014,1,17,4,0,0.0,840.0,1001.0,242.0,803.0,1025.0,1009.0,1122.0,901.0,981.0,848.0,721.0,279.0,901.0,871.0,853.0,985.0,910.0,1037.0,652.12298,942.233333,858.071233,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,2175.0,92017.0,96102.0,78396.0,88614.0,77409.0,73168.0,63989.0,56867.0,80806.0,43939.0,50801.0,69215.0,50770.0,47331.0,47858.0,75968.0,41292.0,42931.0,65460.06741,71811.233333,59902.942466,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13,0,0,0,0,0.0,1105.0,1388.0,416.0,1070.0,2022.0,1980.0,2337.0,1881.0,1346.0,1685.0,1670.0,448.0,1700.0,1865.0,1838.0,1358.0,1756.0,1762.0,965.07268,1592.7,1514.463014,1327.0,117214.0,117645.0,88729.0,102858.0,98330.0,96002.0,102356.0,76133.0,93205.0,67209.0,76629.0,80799.0,73717.0,71763.0,75157.0,91923.0,68246.0,68988.0,79226.96782,92817.166667,79747.205479
4,2014-01-17,1,BOOKS,0.0,0,2014,1,17,4,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4618.153,5754.39498,1819.251996,4532.439,7440.179,6151.595,9382.83803,6396.04003,5723.31102,5064.11799,4658.069996,1905.67,5608.436,5896.853,5504.771,5518.596,5325.731,5298.986,4082.077424,6532.610967,5214.379628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,93.96,93.54,93.78,92.15,91.45,92.39,92.39,92.39,92.915199,95.927333,95.927333,Quito,Pichincha,D,13,0,0,0,0,0.0,1105.0,1388.0,416.0,1070.0,2022.0,1980.0,2337.0,1881.0,1346.0,1685.0,1670.0,448.0,1700.0,1865.0,1838.0,1358.0,1756.0,1762.0,965.07268,1592.7,1514.463014,1327.0,117214.0,117645.0,88729.0,102858.0,98330.0,96002.0,102356.0,76133.0,93205.0,67209.0,76629.0,80799.0,73717.0,71763.0,75157.0,91923.0,68246.0,68988.0,79226.96782,92817.166667,79747.205479


Unnamed: 0,date,store_nbr,family,sales,onpromotion,year,month,day,day_of_week,business_day,sales_store_family_lag16,sales_store_family_lag17,sales_store_family_lag18,sales_store_family_lag19,sales_store_family_lag20,sales_store_family_lag21,sales_store_family_lag22,sales_store_family_lag30,sales_store_family_lag60,sales_store_family_lag90,sales_store_family_lag120,sales_store_family_lag150,sales_store_family_lag180,sales_store_family_lag210,sales_store_family_lag240,sales_store_family_lag270,sales_store_family_lag300,sales_store_family_lag330,sales_store_family_lag360,ema_sales_store_family_lag_16_22,ema_sales_store_family_lag_16_45,ema_sales_store_family_lag_16_380,sales_store_lag16,sales_store_lag17,sales_store_lag18,sales_store_lag19,sales_store_lag20,sales_store_lag21,sales_store_lag22,sales_store_lag30,sales_store_lag60,sales_store_lag90,sales_store_lag120,sales_store_lag150,sales_store_lag180,sales_store_lag210,sales_store_lag240,sales_store_lag270,sales_store_lag300,sales_store_lag330,sales_store_lag360,ema_sales_store_lag_16_22,ema_sales_store_lag_16_45,ema_sales_store_lag_16_380,sales_family_lag16,sales_family_lag17,sales_family_lag18,sales_family_lag19,sales_family_lag20,sales_family_lag21,sales_family_lag22,sales_family_lag30,sales_family_lag60,sales_family_lag90,sales_family_lag120,sales_family_lag150,sales_family_lag180,sales_family_lag210,sales_family_lag240,sales_family_lag270,sales_family_lag300,sales_family_lag330,sales_family_lag360,ema_sales_family_lag_16_22,ema_sales_family_lag_16_45,ema_sales_family_lag_16_380,dcoilwtico,dcoilwtico_lag1,dcoilwtico_lag2,dcoilwtico_lag3,dcoilwtico_lag4,dcoilwtico_lag5,dcoilwtico_lag6,dcoilwtico_lag7,ema_dcoilwtico_lag_1_7,ema_dcoilwtico_lag_1_30,ema_dcoilwtico_lag_1_365,city,state,type,cluster,national_holiday,national_event,local_holiday,regional_holiday,transactions_store_lag16,transactions_store_lag17,transactions_store_lag18,transactions_store_lag19,transactions_store_lag20,transactions_store_lag21,transactions_store_lag22,transactions_store_lag30,transactions_store_lag60,transactions_store_lag90,transactions_store_lag120,transactions_store_lag150,transactions_store_lag180,transactions_store_lag210,transactions_store_lag240,transactions_store_lag270,transactions_store_lag300,transactions_store_lag330,transactions_store_lag360,ema_transactions_store_lag_16_22,ema_transactions_store_lag_16_45,ema_transactions_store_lag_16_380,transactions_lag16,transactions_lag17,transactions_lag18,transactions_lag19,transactions_lag20,transactions_lag21,transactions_lag22,transactions_lag30,transactions_lag60,transactions_lag90,transactions_lag120,transactions_lag150,transactions_lag180,transactions_lag210,transactions_lag240,transactions_lag270,transactions_lag300,transactions_lag330,transactions_lag360,ema_transactions_lag_16_22,ema_transactions_lag_16_45,ema_transactions_lag_16_380
2329069,2017-08-15,54,POULTRY,59.619,0,2017,8,15,1,0,72.895996,82.296,46.656,52.086,34.856,50.686,51.209,103.902,72.77,44.233,56.169,47.291,46.572,57.161,66.212,91.916,34.801,49.364998,29.669,64.53319,69.364066,62.222482,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,25089.244942,22477.071865,28521.282225,14552.290992,16548.68701,16769.651937,19464.66103,26360.76801,31804.989046,18491.03,17711.450004,23457.77709,16029.954,17694.390035,26878.77905,28038.540014,15365.961035,16143.457978,21158.702034,22225.768154,21734.075462,21059.267403,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3,0,0,0,0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274
2329070,2017-08-15,54,PREPARED FOODS,94.0,0,2017,8,15,1,0,124.0,81.0,68.0,73.0,81.0,65.0,112.0,50.0,79.0,81.0,29.0,70.0,49.0,98.0,103.0,74.0,70.0,80.0,52.0,91.53548,85.0,75.816438,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,5507.523005,5408.918016,4722.614983,4168.786016,4456.418986,4129.020989,4823.321997,5534.052983,5001.44601,4738.09202,4342.841024,7189.56103,5153.443006,5341.388989,7448.360985,5268.237992,4946.68898,5305.362987,6106.435004,4968.95479,4894.940064,5550.034376,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3,0,0,0,0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274
2329071,2017-08-15,54,PRODUCE,915.371,76,2017,8,15,1,0,793.147,729.844,562.097,520.489,559.658,914.959,654.93,981.712,580.389,506.987,559.538,659.408,426.046,862.417,816.129,613.487,555.718,549.655,713.194,688.9917,698.255533,655.923181,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,146045.795,131988.66197,111948.89,90481.2861,145917.9191,115457.6587,117117.09501,157883.056,124173.9535,166802.7749,115072.02088,139267.04003,95892.37803,124250.2049,155663.9546,102969.51783,138579.652,104423.301,120558.7378,127348.465736,129450.195936,125742.800885,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3,0,0,0,0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274
2329072,2017-08-15,54,SCHOOL AND OFFICE SUPPLIES,0.0,0,2017,8,15,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.320649e-09,0.0,1.679452,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,2027.0,1345.0,567.0,244.0,166.0,174.0,192.0,121.0,58.0,215.0,718.0,150.0,60.0,80.0,129.0,71.0,69.0,146.0,787.0,925.102321,239.033333,300.334247,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3,0,0,0,0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274
2329073,2017-08-15,54,SEAFOOD,3.0,0,2017,8,15,1,0,4.0,4.0,4.0,2.0,3.0,7.0,1.0,5.0,2.0,2.0,0.0,3.0,1.0,2.0,5.0,0.0,0.0,0.0,2.0,3.621519,3.1,1.967123,15815.709996,10878.746,9531.089998,6911.345,6572.494,8241.906,10245.246998,16959.135,10540.249999,17721.085,8402.924,10604.055,5518.609,7374.671,12540.281,5940.149002,6340.114,8185.356998,8424.711,11041.179674,10163.7067,8816.963789,1379.617003,1297.514006,1307.945999,652.069999,839.655002,842.090999,995.327001,1413.877,1315.639001,1195.405,998.932001,1597.305998,858.070005,990.695003,1289.671995,1362.009002,1080.609002,1138.018999,1447.197998,1155.706294,1131.1439,1213.79934,47.57,47.59,48.81,48.81,48.81,48.54,49.59,49.07,48.62108,48.274,48.274,El Carmen,Manabi,C,3,0,0,0,0,1108.0,870.0,766.0,662.0,700.0,727.0,792.0,920.0,754.0,706.0,724.0,968.0,662.0,745.0,1165.0,714.0,678.0,847.0,916.0,858.501135,807.233333,830.684932,94899.0,100227.0,89197.0,78743.0,82113.0,81930.0,85120.0,96119.0,92852.0,85560.0,83043.0,99094.0,77887.0,80093.0,116289.0,81854.0,72931.0,76255.0,91798.0,90578.055207,89847.766667,87374.260274


In [30]:
df.to_parquet('data/modelling.parquet', index=False)