In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from datetime import datetime, timedelta
import gc

In [3]:
from fastai import *
from fastai.vision import *
from fastai.tabular import *

In [4]:
pd.options.display.max_columns = 50

## Setup the path

In [5]:
path = Path('/kaggle/m5_forecasting/')
assert(path.exists())

In [6]:
path.ls()

[PosixPath('/kaggle/m5_forecasting/sales_train_validation.csv'),
 PosixPath('/kaggle/m5_forecasting/m5_model_0.lgb'),
 PosixPath('/kaggle/m5_forecasting/m5_model_1.lgb'),
 PosixPath('/kaggle/m5_forecasting/m5_model_3.lgb'),
 PosixPath('/kaggle/m5_forecasting/m5_best_2.pth'),
 PosixPath('/kaggle/m5_forecasting/m5_best_1.pth'),
 PosixPath('/kaggle/m5_forecasting/walmartTrends0.csv'),
 PosixPath('/kaggle/m5_forecasting/m5_model_2.lgb'),
 PosixPath('/kaggle/m5_forecasting/m5_model.lgb'),
 PosixPath('/kaggle/m5_forecasting/calendar.csv'),
 PosixPath('/kaggle/m5_forecasting/sample_submission.csv'),
 PosixPath('/kaggle/m5_forecasting/m5_model_4.lgb'),
 PosixPath('/kaggle/m5_forecasting/m5_dt'),
 PosixPath('/kaggle/m5_forecasting/sell_prices.csv')]

## Read Data

In [7]:
PRICE_DTYPES = {"store_id": "category", "item_id": "category", "wm_yr_wk": "int16","sell_price":"float32" }
# CAL_DTYPES = {"event_name_1": "category", "event_name_2": "category", "event_type_1": "category", 
#          "event_type_2": "category", "weekday": "category", 'wm_yr_wk': 'int16', "wday": "int16",
#         "month": "int16", "year": "int16", "snap_CA": "float32", 'snap_TX': 'float32', 'snap_WI': 'float32' }
CAL_DTYPES = {"event_name_2": "category", "event_type_1": "category", 
         "event_type_2": "category", "weekday": "category", 'wm_yr_wk': 'int16', "wday": "int16",
        "month": "int16", "year": "int16", "snap_CA": "float32", 'snap_TX': 'float32', 'snap_WI': 'float32' }

In [8]:
sales_train_validation = pd.read_csv(path/"sales_train_validation.csv", nrows=10)

In [9]:
def read_data():
    prices = pd.read_csv(path/"sell_prices.csv", dtype = PRICE_DTYPES)
    cal = pd.read_csv(path/"calendar.csv", dtype = CAL_DTYPES)
    walmart_trends = pd.read_csv(path/"walmartTrends0.csv")
    return prices, cal, walmart_trends

In [10]:
prices, cal, walmart_trends = read_data()

In [11]:
cal.head()

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,d,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
0,2011-01-29,11101,Saturday,1,1,2011,d_1,,,,,0.0,0.0,0.0
1,2011-01-30,11101,Sunday,2,1,2011,d_2,,,,,0.0,0.0,0.0
2,2011-01-31,11101,Monday,3,1,2011,d_3,,,,,0.0,0.0,0.0
3,2011-02-01,11101,Tuesday,4,2,2011,d_4,,,,,1.0,1.0,0.0
4,2011-02-02,11101,Wednesday,5,2,2011,d_5,,,,,1.0,0.0,1.0


#### Pre-process calendar

In [12]:
cal["date"] = pd.to_datetime(cal["date"], infer_datetime_format=True)

In [13]:
cal['date'].min(), cal['date'].max()

(Timestamp('2011-01-29 00:00:00'), Timestamp('2016-06-19 00:00:00'))

In [14]:
cal.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1969 entries, 0 to 1968
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          1969 non-null   datetime64[ns]
 1   wm_yr_wk      1969 non-null   int16         
 2   weekday       1969 non-null   category      
 3   wday          1969 non-null   int16         
 4   month         1969 non-null   int16         
 5   year          1969 non-null   int16         
 6   d             1969 non-null   object        
 7   event_name_1  162 non-null    object        
 8   event_type_1  162 non-null    category      
 9   event_name_2  5 non-null      category      
 10  event_type_2  5 non-null      category      
 11  snap_CA       1969 non-null   float32       
 12  snap_TX       1969 non-null   float32       
 13  snap_WI       1969 non-null   float32       
dtypes: category(4), datetime64[ns](1), float32(3), int16(4), object(2)
memory usage: 93.2+ K

In [15]:
def create_event_map(field):
    return {v: k for k, v in enumerate(cal[field].unique())}

In [16]:
event_name_1_map = create_event_map('event_name_1')
cal.replace({'event_name_1': event_name_1_map}, inplace=True)

In [17]:
def get_elapsed(dt, event_name='Christmas', col='event_name_1', event_map=event_name_1_map, before=False):
    dt.sort_values(['date'], ascending=[(not before)], inplace=True)
    day1 = np.timedelta64(1, 'D')
    last_date = np.datetime64()
    res = []
    event = event_map[event_name]
    for v,d in zip(dt[col].values, dt.date.values):
        if v == event:
            last_date = d
        elapsed = ((d-last_date).astype('timedelta64[D]') / day1)
        res.append(elapsed)
    field_name = f"{'before' if before else 'after'}_{event_name.lower().replace(' ', '_')}"
    dt[field_name] = res
    dt[field_name] = dt[field_name].fillna(0)
    dt[field_name] = dt[field_name].astype('int16')

In [18]:
# get_elapsed(cal, 'Christmas', 'event_name_1', event_name_1_map, False)
# get_elapsed(cal, 'Easter', 'event_name_1', event_name_1_map, False)

In [19]:
def numericalize(df, type_map):
    for col, col_dtype in type_map.items():
        if col_dtype == "category":
            df[col] = df[col].cat.codes.astype('int16')
            df[col] -= df[col].min()

In [20]:
numericalize(prices, PRICE_DTYPES)
numericalize(cal, CAL_DTYPES)

In [21]:
def convert_to_type(df, cols, dt_type):
    for type_name in cols:
        print(type_name)
        df[type_name] = df[type_name].astype(dt_type)

def convert_uint8(df, cols):
    convert_to_type(df, cols, "uint8")
    
def convert_float16(df, cols):
    convert_to_type(df, cols, "float16")

In [22]:
uint8_types= ['event_name_1', 'event_type_1', 'event_name_2', 'event_type_2', 'month', 'wday', 'weekday', 
              'snap_CA', 'snap_TX', 'snap_WI']
convert_uint8(cal, uint8_types)
cal.info()

event_name_1
event_type_1
event_name_2
event_type_2
month
wday
weekday
snap_CA
snap_TX
snap_WI
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1969 entries, 0 to 1968
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          1969 non-null   datetime64[ns]
 1   wm_yr_wk      1969 non-null   int16         
 2   weekday       1969 non-null   uint8         
 3   wday          1969 non-null   uint8         
 4   month         1969 non-null   uint8         
 5   year          1969 non-null   int16         
 6   d             1969 non-null   object        
 7   event_name_1  1969 non-null   uint8         
 8   event_type_1  1969 non-null   uint8         
 9   event_name_2  1969 non-null   uint8         
 10  event_type_2  1969 non-null   uint8         
 11  snap_CA       1969 non-null   uint8         
 12  snap_TX       1969 non-null   uint8         
 13  snap_WI       1969 non-null   uint8        

In [23]:
pred_days = 28
max_lags = pred_days * 2 + 1
print('max_lags', max_lags)
num_cols = [c for c in pd.read_csv(path/"sales_train_validation.csv", nrows=2).columns if c.find('d_') == 0]
tr_last = len(num_cols)
catcols = ['id', 'item_id', 'dept_id','store_id', 'cat_id', 'state_id']
# For more training data use a lower value
FIRST_DAY=1

max_lags 57


In [24]:
def read_dt(is_train = True, nrows = None, first_day = 1200):
    start_day = max(1 if is_train else tr_last - max_lags, first_day)
    print('start_day', start_day)
    dtype = {num: 'float32' for num in num_cols}
    dtype.update({cat: 'category' for cat in catcols if cat != 'id'})
    numcols = [f"d_{day}" for day in range(start_day,tr_last+1)]
    dt = pd.read_csv(path/"sales_train_validation.csv", nrows=nrows, usecols = catcols + numcols, dtype=dtype)
    for col in catcols:
        if col != 'id':
            dt[col] = dt[col].cat.codes.astype('int16')
            dt[col] -= dt[col].min()
    if not is_train:
        for day in range(tr_last + 1, tr_last + 1 + pred_days):
            dt[f'd_{day}'] = np.nan
            
    dt = dt.melt(id_vars=catcols, value_vars=[col for col in dt.columns if col.startswith("d_")], var_name='d', value_name='sales')
    dt = dt.merge(cal, on='d', copy=False)
    dt = dt.merge(prices, on=['store_id', 'item_id', 'wm_yr_wk'], copy=False)
    return dt

In [25]:
%%time

dt = read_dt(first_day=FIRST_DAY)

start_day 1
CPU times: user 16.8 s, sys: 2.95 s, total: 19.7 s
Wall time: 19.7 s


In [26]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 46027957 entries, 0 to 46027956
Data columns (total 22 columns):
 #   Column        Dtype         
---  ------        -----         
 0   id            object        
 1   item_id       int16         
 2   dept_id       int16         
 3   store_id      int16         
 4   cat_id        int16         
 5   state_id      int16         
 6   d             object        
 7   sales         float32       
 8   date          datetime64[ns]
 9   wm_yr_wk      int16         
 10  weekday       uint8         
 11  wday          uint8         
 12  month         uint8         
 13  year          int16         
 14  event_name_1  uint8         
 15  event_type_1  uint8         
 16  event_name_2  uint8         
 17  event_type_2  uint8         
 18  snap_CA       uint8         
 19  snap_TX       uint8         
 20  snap_WI       uint8         
 21  sell_price    float32       
dtypes: datetime64[ns](1), float32(2), int16(7), object(2), uint8(10)

In [27]:
dt

Unnamed: 0,id,item_id,dept_id,store_id,cat_id,state_id,d,sales,date,wm_yr_wk,weekday,wday,month,year,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI,sell_price
0,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_1,12.0,2011-01-29,11101,2,1,1,2011,0,0,0,0,0,0,0,0.46
1,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_2,15.0,2011-01-30,11101,3,2,1,2011,0,0,0,0,0,0,0,0.46
2,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_3,0.0,2011-01-31,11101,1,3,1,2011,0,0,0,0,0,0,0,0.46
3,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_4,0.0,2011-02-01,11101,5,4,2,2011,0,0,0,0,1,1,0,0.46
4,HOBBIES_1_008_CA_1_validation,7,0,0,0,0,d_5,0.0,2011-02-02,11101,6,5,2,2011,0,0,0,0,1,0,1,0.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46027952,FOODS_3_825_WI_3_validation,3046,6,9,2,2,d_1913,0.0,2016-04-24,11613,3,2,4,2016,0,0,0,0,0,0,0,3.98
46027953,FOODS_3_826_WI_3_validation,3047,6,9,2,2,d_1912,1.0,2016-04-23,11613,2,1,4,2016,0,0,0,0,0,0,0,1.28
46027954,FOODS_3_826_WI_3_validation,3047,6,9,2,2,d_1913,3.0,2016-04-24,11613,3,2,4,2016,0,0,0,0,0,0,0,1.28
46027955,FOODS_3_827_WI_3_validation,3048,6,9,2,2,d_1912,0.0,2016-04-23,11613,2,1,4,2016,0,0,0,0,0,0,0,1.00


In [28]:
dt.date.min(), dt.date.max()

(Timestamp('2011-01-29 00:00:00'), Timestamp('2016-04-24 00:00:00'))

## Create features

In [29]:
def prepare_date_boolean_attrs(df):
    boolean_attrs = ['Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 
             'Is_year_start']
    for ba in boolean_attrs:
        df[ba] = getattr(df['date'].dt, ba.lower()).astype('uint8')

In [30]:
def prepare_day_of_year(df):
    day_of_year = 'Dayofyear'
    df[day_of_year] = getattr(df['date'].dt, day_of_year.lower()).astype('uint16')

In [31]:
def create_fea(dt):
    lags = [7, 28]
    lag_cols = [f"lag_{lag}" for lag in lags ]
    for lag, lag_col in zip(lags, lag_cols):
        dt[lag_col] = dt[["id","sales"]].groupby("id")["sales"].shift(lag)
        
    simple_lags = [1]
    for simple_lag in simple_lags:
        dt[f'lag_{simple_lag}'] = dt[["id","sales"]].groupby("id")["sales"].shift(simple_lag)

    wins = [7, 28]
    for win in wins :
        for lag,lag_col in zip(lags, lag_cols):
            dt[f"rmean_{lag}_{win}"] = dt[["id", lag_col]].groupby("id")[lag_col].transform(lambda x : x.rolling(win).mean())
    
    date_features = {
        "wday": "weekday",
        "week": "weekofyear",
        "month": "month",
        "quarter": "quarter",
        "year": "year",
        "mday": "day",
    }
    
    for date_feat_name, date_feat_func in date_features.items():
        if date_feat_name in dt.columns:
            dt[date_feat_name] = dt[date_feat_name].astype("int16")
        else:
            dt[date_feat_name] = getattr(dt["date"].dt, date_feat_func).astype("int16")
            
    prepare_date_boolean_attrs(dt)
    prepare_day_of_year(dt)
    
    uint8_types= ['month', 'wday', 'quarter', 'mday', 'week']
    convert_uint8(dt, uint8_types)

In [32]:
%%time
create_fea(dt)

event_name_1
event_type_1
event_name_2
event_type_2
month
wday
weekday
snap_CA
snap_TX
snap_WI
CPU times: user 2min 1s, sys: 6.65 s, total: 2min 7s
Wall time: 2min 7s


In [33]:
dt.dropna(inplace = True)

In [34]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 44351007 entries, 617364 to 46027956
Data columns (total 39 columns):
 #   Column            Dtype         
---  ------            -----         
 0   id                object        
 1   item_id           int16         
 2   dept_id           int16         
 3   store_id          int16         
 4   cat_id            int16         
 5   state_id          int16         
 6   d                 object        
 7   sales             float32       
 8   date              datetime64[ns]
 9   wm_yr_wk          int16         
 10  weekday           uint8         
 11  wday              uint8         
 12  month             uint8         
 13  year              int16         
 14  event_name_1      uint8         
 15  event_type_1      uint8         
 16  event_name_2      uint8         
 17  event_type_2      uint8         
 18  snap_CA           uint8         
 19  snap_TX           uint8         
 20  snap_WI           uint8         
 21  s

## Training Preparation

In [35]:
dt.columns

Index(['id', 'item_id', 'dept_id', 'store_id', 'cat_id', 'state_id', 'd',
       'sales', 'date', 'wm_yr_wk', 'weekday', 'wday', 'month', 'year',
       'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI', 'sell_price', 'lag_7', 'lag_28',
       'lag_1', 'rmean_7_7', 'rmean_28_7', 'rmean_7_28', 'rmean_28_28', 'week',
       'quarter', 'mday', 'Is_month_end', 'Is_month_start', 'Is_quarter_end',
       'Is_quarter_start', 'Is_year_end', 'Is_year_start', 'Dayofyear'],
      dtype='object')

In [36]:
dt.to_pickle(path/'m5_dt')

In [37]:
!du -h {path/'m5_dt'}

4.2G	/kaggle/m5_forecasting/m5_dt


In [38]:
cat_feats = ['item_id', 'dept_id', 'store_id', 'cat_id', 'state_id', 'event_name_1', 
             'event_type_1', 'event_name_2', 'event_type_2']
ignore_cols = ['id', 'date', 'sales', 'd', 'wm_yr_wk', 'weekday']
train_cols = [c for c in dt.columns if c not in ignore_cols]

In [39]:
valid_size = int(dt.shape[0] * 0.1)
np.random.seed(777)

valid_idx = np.random.choice(dt.index.values, valid_size, replace=False)
train_idx = np.setdiff1d(dt.index.values, valid_idx)
assert valid_idx.size + train_idx.size == dt.shape[0]

In [40]:
class ArrayDataset(Dataset):
    
    def __init__(self, x, y):
        self.x, self.y = torch.tensor(x.values, dtype=torch.float32), torch.tensor(y.values, dtype=torch.float32)
        assert(len(self.x) == len(self.y))
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]
    
    def __repr__(self):
        return f'x: {self.x.shape} y: {self.y.shape}'

In [41]:
train_x = dt[train_cols].loc[train_idx]
train_y = dt['sales'].loc[train_idx]

In [42]:
del dt
gc.collect()

100

In [43]:
train_ds = ArrayDataset(train_x, train_y)

In [44]:
del train_x, train_y
gc.collect()

40

In [45]:
dt = pd.read_pickle(path/'m5_dt')

In [46]:
valid_x = dt[train_cols].loc[valid_idx]
valid_y = dt['sales'].loc[valid_idx]

In [47]:
valid_ds = ArrayDataset(valid_x, valid_y)

In [48]:
del dt, valid_x, valid_y
gc.collect()

80

In [49]:
bs = 2048
data = DataBunch.create(train_ds, valid_ds, bs=bs, num_workers=11)
data.one_batch()[0]

tensor([[2.0770e+03, 5.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         3.1600e+02],
        [1.3080e+03, 3.0000e+00, 3.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         1.7000e+02],
        [2.0300e+03, 3.0000e+00, 4.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         2.3900e+02],
        ...,
        [4.6700e+02, 1.0000e+00, 9.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         2.3100e+02],
        [1.7490e+03, 4.0000e+00, 3.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         9.2000e+01],
        [3.0150e+03, 6.0000e+00, 4.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         1.7000e+01]])

## Training

In [50]:
def create_composed_layer(nh, nh2, is_last=False):
    if is_last:
        return nn.Linear(nh, nh2), nn.ReLU(), nn.BatchNorm1d(nh2, momentum=0.1)
    return nn.Linear(nh, nh2), nn.ReLU(), nn.BatchNorm1d(nh2, momentum=0.1), nn.Dropout(p=0.001)

In [51]:
ni = train_ds.x.shape[1]
nh = 100
nh2 = 50

In [52]:
model = nn.Sequential(*create_composed_layer(ni, nh), *create_composed_layer(nh, nh2), *create_composed_layer(nh2, 1, is_last=True))
model

Sequential(
  (0): Linear(in_features=33, out_features=100, bias=True)
  (1): ReLU()
  (2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.001, inplace=False)
  (4): Linear(in_features=100, out_features=50, bias=True)
  (5): ReLU()
  (6): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.001, inplace=False)
  (8): Linear(in_features=50, out_features=1, bias=True)
  (9): ReLU()
  (10): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [53]:
lr = 5e-5 # mse
# lr = 1e-4 # mae
# lr = 1e-3 # mape
# lr = 1e-7 # mqe
optimizer = optim.Adam(model.parameters(), lr=lr)

In [54]:
!ls {path}
!rm {path/'m5_best_1.pth'}

calendar.csv   m5_model_0.lgb  m5_model_4.lgb		   sell_prices.csv
m5_best_1.pth  m5_model_1.lgb  m5_model.lgb		   walmartTrends0.csv
m5_best_2.pth  m5_model_2.lgb  sales_train_validation.csv
m5_dt	       m5_model_3.lgb  sample_submission.csv


In [55]:
[f for f in dir(F) if f.find('loss') > -1]

['_pointwise_loss',
 '_smooth_l1_loss',
 'cosine_embedding_loss',
 'ctc_loss',
 'hinge_embedding_loss',
 'l1_loss',
 'margin_ranking_loss',
 'mse_loss',
 'multi_margin_loss',
 'multilabel_margin_loss',
 'multilabel_soft_margin_loss',
 'nll_loss',
 'poisson_nll_loss',
 'smooth_l1_loss',
 'soft_margin_loss',
 'triplet_margin_loss']

In [56]:
torch.randn([10, 10]).contiguous().view(-1).shape

torch.Size([100])

In [57]:
def mse_loss(pred, targ):
    pred, targ = pred.contiguous().view(-1),targ.contiguous().view(-1)
    return F.mse_loss(pred, targ)

def mqe_loss(pred, targ):
    pred, targ = pred.contiguous().view(-1),targ.contiguous().view(-1)
    return torch.mean((pred - targ) ** 4)

def mae_loss(pred, targ):
    pred, targ = pred.contiguous().view(-1),targ.contiguous().view(-1)
    return torch.mean(torch.abs(pred - targ))

def mape_loss(pred, targ):
    pred, targ = pred.contiguous().view(-1),targ.contiguous().view(-1)
    return torch.mean(torch.abs((targ - pred) / (targ + 1e-5)))

def poisson_loss(pred, targ):
    """Custom loss function for Poisson model."""
    pred, targ = flatten_check(pred, targ)
    return F.poisson_nll_loss(pred, targ)

In [58]:
# ??flatten_check

In [59]:
learn = Learner(data=data, model=model, model_dir = path, metrics=[rmse], loss_func=mse_loss)

In [60]:
%%time
learn.fit_one_cycle(10, lr, callbacks=[callbacks.EarlyStoppingCallback(learn, monitor="root_mean_squared_error", 
                                                                       mode="min", patience=30),
                                     callbacks.SaveModelCallback(learn, monitor='root_mean_squared_error',mode='min', 
                                                                 name='m5_best_1')])

epoch,train_loss,valid_loss,root_mean_squared_error,time
0,12.870758,12.984248,3.547443,01:38
1,10.131829,9.418804,3.008631,01:44
2,7.38931,6.562395,2.502014,01:47
3,6.306353,5.738266,2.340715,01:49
4,5.977338,5.343266,2.258778,01:47
5,5.726808,5.371672,2.264397,01:48
6,6.724778,5.314131,2.252314,01:50
7,6.769021,5.300624,2.249187,01:49
8,5.542991,5.286193,2.247263,01:49
9,5.913835,5.279285,2.244982,01:50


Better model found at epoch 0 with root_mean_squared_error value: 3.547442674636841.
Better model found at epoch 1 with root_mean_squared_error value: 3.008631467819214.
Better model found at epoch 2 with root_mean_squared_error value: 2.502014398574829.
Better model found at epoch 3 with root_mean_squared_error value: 2.340714931488037.
Better model found at epoch 4 with root_mean_squared_error value: 2.258777618408203.
Better model found at epoch 6 with root_mean_squared_error value: 2.252314329147339.
Better model found at epoch 7 with root_mean_squared_error value: 2.2491865158081055.
Better model found at epoch 8 with root_mean_squared_error value: 2.247262716293335.
Better model found at epoch 9 with root_mean_squared_error value: 2.2449824810028076.
CPU times: user 16min 24s, sys: 1min 35s, total: 17min 59s
Wall time: 17min 57s


In [None]:
%%time

lr = 1e-5
lr = 5e-5
learn.fit_one_cycle(3, lr, callbacks=[callbacks.EarlyStoppingCallback(learn, monitor="root_mean_squared_error", 
                                                                       mode="min", patience=30),
                                     callbacks.SaveModelCallback(learn, monitor='root_mean_squared_error',mode='min', 
                                                                 name='m5_best_2')])

In [None]:
learn.save(file = str(path/'m5_model'))

In [None]:
m_lgb = lgb.Booster(model_file=str(path/'m5_model.lgb'))

## Prediction

In [None]:
from tqdm.notebook import tqdm

In [None]:
alphas = [1.028, 1.023, 1.018]
weights = [1 / len(alphas)] * len(alphas)
assert sum(weights) == 1.0
fday = datetime(2016, 4, 25) 
assert datetime(2011, 1, 29) + timedelta(days=1914 - 1) == fday

In [None]:
cols = [f'F{i}' for i in range(1, pred_days + 1)]
sub = pd.DataFrame()
te = read_dt(False)

for icount, (alpha, weight) in tqdm(enumerate(zip(alphas, weights)), total=len(alphas)):
    for tdelta in tqdm(range(0, pred_days), total=pred_days):
        day = fday + timedelta(days=tdelta)
        print(tdelta, day)
        tst = te[(te.date >= day - timedelta(days=max_lags)) & (te.date <= day)].copy()
        create_features(tst)
        prepare_date_cols(tst)
        tst = tst.loc[tst.date == day, train_cols]
        te.loc[te.date == day, 'sales'] = alpha * m_lgb.predict(tst) # magic multiplier by kyakovlev
    
    te_sub = te.loc[te.date >= fday, ['id', 'sales']].copy()
    te_sub['F'] = [f"F{rank}" for rank in te_sub.groupby("id")["id"].cumcount()+1]
    te_sub = te_sub.set_index(["id", "F" ]).unstack()['sales'][cols]
    te_sub.fillna(0., inplace=True)
    te_sub.sort_values(["id"], inplace=True)
    te_sub.reset_index(drop=False, inplace = True)
    te_sub.to_csv(f"submission_{icount}.csv",index=False)
    
    if icount == 0:
        sub = te_sub
        sub[cols] *= weight
    else:
        sub[cols] += te_sub[cols] * weight

sub2 = sub.copy()
sub2["id"] = sub2["id"].str.replace("validation$", "evaluation")
sub = pd.concat([sub, sub2], axis=0, sort=False)
sub.to_csv("submission.csv",index=False)

In [None]:
!wc -l submission.csv

In [None]:
pd.read_csv('submission.csv')