In [None]:
import numpy as np
import pandas as pd
# !pip install pytorch_lightning
from pytorch_lightning.core.lightning import LightningModule
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from torchsummary import summary

from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler, LabelEncoder, OrdinalEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
import numpy as np

from pathlib import Path
from argparse import ArgumentParser
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pickle


data_dir = Path.home()/'data/kaggle/m5-forecasting-accuracy'

x_cat_cols = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id',
        'weekday', 'wday', 'month', 'year',
       'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI']
x_cont_cols = ['sell_price']
num_train_val_days = 1913
num_test1_days = 28
num_test2_days = 28

#### TODO
 - normalize y
 - sales price is 0. fix it. 

In [44]:
!ls $data_dir

calendar.csv		     sales_train_validation.csv  sell_prices.csv
m5-forecasting-accuracy.zip  sample_submission.csv


#### Sales

In [96]:
%%time
sales = pd.read_csv(data_dir/'sales_train_validation.csv')
print(f'sales.shape: {sales.shape}')
cat_cols = ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']

# encode cat cols
encoders = {}
for col in cat_cols:
    encoder =  OrdinalEncoder()
    sales[[col]] = encoder.fit_transform(sales[[col]])
    sales[col] = sales[col].astype(np.long)
    encoders[col] = encoder
    
# change day column names to just day number
train_day_cols = {col: col.split('_')[1] for col in sales.columns if col.startswith('d_')}
sales.rename(columns=train_day_cols, inplace=True)

sales.shape: (30490, 1919)
CPU times: user 7.52 s, sys: 5.37 s, total: 12.9 s
Wall time: 12.9 s


#### Add test data

In [97]:
test_day_cols = [str(num_train_val_days + 1 + o) for o in range(56)]
for col in test_day_cols:
    sales[col] = 0
print(sales.shape)

(30490, 1975)


In [50]:
sales.tail(2)

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,1,2,3,4,...,1904,1905,1906,1907,1908,1909,1910,1911,1912,1913
30488,FOODS_3_826_WI_3_validation,1435,2,0,9,2,0,0,0,0,...,0,0,1,0,0,1,0,3,1,3
30489,FOODS_3_827_WI_3_validation,1436,2,0,9,2,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [74]:
sample = pd.read_csv(data_dir/'sample_submission.csv')
sample.tail(2)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
60978,FOODS_3_826_WI_3_evaluation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60979,FOODS_3_827_WI_3_evaluation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [100]:
num_days = len(train_day_cols) + len(test_day_cols)
num_stores = sales['store_id'].nunique()
num_items = sales['item_id'].nunique()
print('total days : ', num_days)
print('num store_items - ', num_stores * num_items)

total days :  1969
num store_items -  30490


In [76]:
sales['item_id'].nunique()

3049

#### Calendar

In [77]:
calendar = pd.read_csv(data_dir/'calendar.csv')\
            .rename(columns={'d':'day'})

cat_cal_cols = ['wm_yr_wk', 'weekday', 'wday', 'month', 'year',
       'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI']
# ignore_cal_cols = ['wm_yr_wk']

for col in cat_cal_cols:
    
    # impute
    if str(calendar[col].dtype)[:3] == 'obj':
        fill_value = 'abcxyz' 
    elif str(calendar[col].dtype)[:3] == 'int':
        fill_value = -1
    calendar[[col]] = SimpleImputer(strategy='constant', fill_value=fill_value).fit_transform(calendar[[col]])
    
    # encode
    if col not in encoders:
        encoders[col] = OrdinalEncoder().fit(calendar[[col]])
    calendar[[col]] = encoders[col].transform(calendar[[col]])
    calendar[col] = calendar[col].astype(np.long)
    
# change day column names to just day number
calendar['day'] = calendar['day'].apply(lambda x: x.split('_')[1])
calendar['day'] = calendar['day'].astype(np.long)

calendar.tail(2)

Unnamed: 0,date,wm_yr_wk,weekday,wday,month,year,day,event_name_1,event_type_1,event_name_2,event_type_2,snap_CA,snap_TX,snap_WI
1967,2016-06-18,281,2,0,5,5,1968,30,4,4,2,0,0,0
1968,2016-06-19,281,3,1,5,5,1969,16,3,2,0,0,0,0


#### Prices

In [78]:
%%time
prices = pd.read_csv(data_dir/'sell_prices.csv')
for col in ['store_id', 'item_id', 'wm_yr_wk']:
    prices[[col]] = encoders[col].transform(prices[[col]])
    prices[col] = prices[col].astype(np.long)

CPU times: user 9.58 s, sys: 2.16 s, total: 11.7 s
Wall time: 11.7 s


In [79]:
prices.sort_values('wm_yr_wk',ascending=False).head(2)

Unnamed: 0,store_id,item_id,wm_yr_wk,sell_price
6841120,9,1436,281,1.0
4220997,6,1922,281,9.97


### Merge

In [80]:
%%time
sales2 = pd.melt(sales, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id'], 
                                       var_name='day', value_name='demand')
sales2['day'] = sales2['day'].astype(np.long)

sales2.sort_values('day', inplace=True)
calendar.sort_values('day', inplace=True)

sales2 = sales2.merge(calendar, on='day', how='left')
sales2 = sales2.merge(prices, on=['store_id', 'item_id', 'wm_yr_wk'], how='left')
sales2['sell_price'] = sales2['sell_price'].astype(np.float32)
sales2['sell_price'] = sales2['sell_price'].fillna(0.0)

sales2.sort_values(['item_id', 'store_id','day'], inplace=True)

# scale continuous columns
scalers = {}
for col in ['sell_price','demand']:
    scaler = MinMaxScaler()
    sales2[[col]] = scaler.fit_transform(sales2[[col]])
    scalers[col] = scaler

CPU times: user 2min 38s, sys: 1min 52s, total: 4min 31s
Wall time: 4min 30s


In [81]:
sales2.to_parquet('combined.pq')
with open('encoders.pkl','wb') as f:
    pickle.dump(encoders,f)
    
with open('scalers.pkl','wb') as f:
    pickle.dump(scalers, f)

### Creating tensors

In [82]:
%%time
sales2 = pd.read_parquet('combined.pq')
print(sales2.shape)
sales2.columns

(60034810, 22)
CPU times: user 27.7 s, sys: 34.4 s, total: 1min 2s
Wall time: 14.9 s


Index(['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id', 'day',
       'demand', 'date', 'wm_yr_wk', 'weekday', 'wday', 'month', 'year',
       'event_name_1', 'event_type_1', 'event_name_2', 'event_type_2',
       'snap_CA', 'snap_TX', 'snap_WI', 'sell_price'],
      dtype='object')

In [83]:
%%time
x = torch.tensor(sales2[x_cat_cols + x_cont_cols].values)
y = torch.tensor(sales2['demand'].values)

CPU times: user 13.2 s, sys: 31.4 s, total: 44.6 s
Wall time: 18.9 s


In [84]:
sales2[x_cat_cols + x_cont_cols].dtypes

item_id           int64
dept_id           int64
cat_id            int64
store_id          int64
state_id          int64
weekday           int64
wday              int64
month             int64
year              int64
event_name_1      int64
event_type_1      int64
event_name_2      int64
event_type_2      int64
snap_CA           int64
snap_TX           int64
snap_WI           int64
sell_price      float64
dtype: object

In [85]:
%%time

# from fastai v2
def get_emb_size(nunique):
    return min(600, round(1.6 * nunique**0.56))

emb_sizes = [(sales2[col].nunique(), get_emb_size(sales2[col].nunique())) for col in x_cat_cols]

CPU times: user 11 s, sys: 562 ms, total: 11.6 s
Wall time: 11.6 s


In [86]:
with open('emb_sz.pkl','wb') as f:
    pickle.dump(emb_sizes,f )

In [87]:
# group_size = num_items * num_stores
# group_size

In [101]:
%%time
num_features = x.size(1)
x1 = x.view(-1, num_days, num_features).refine_names('item_store', 'day','features')\
        .align_to('day','item_store','features').contiguous()

y1 = y.view(-1, num_days).refine_names('item_store', 'day')\
    .align_to('day', 'item_store').contiguous()

print(f'x1.shape - {x1.shape} y1.shape - {y1.shape}')

x1.shape - torch.Size([1969, 30490, 17]) y1.shape - torch.Size([1969, 30490])
CPU times: user 40.3 s, sys: 28.3 s, total: 1min 8s
Wall time: 2.93 s


In [102]:
%%time
torch.save(x1.rename(None), 'x.pt')
torch.save(y1.rename(None), 'y.pt')


CPU times: user 4.88 ms, sys: 8.85 s, total: 8.85 s
Wall time: 11.2 s


### Training

In [103]:
class M5DataSet(Dataset):
    def __init__(self,x, y, src_len, tgt_len, bsz, dstype='train'):
        assert dstype in ['train', 'test1', 'test2', 'val']
        self.x = x
        self.y = y
        self.src_len = src_len
        self.tgt_len = tgt_len
        self.bsz = bsz
        self.dstype = dstype
        
    def __len__(self):
        if self.dstype == 'train':
            l = (self.x.size(0) - (self.src_len + self.tgt_len + num_test1_days + num_test2_days)) 
            return l
        
        if self.dstype == 'test1':
            return 1
        
        return l
    
    def __getitem__(self, idx):
        if self.dstype == 'train':
            # we have 30490 item_stores. We may not be able to load them all. So randomly pick bsz items. 
            item_store_mask = list(np.random.randint(0, self.x.size(1),(self.bsz,)))
        elif self.dstype == 'test1':
            item_store_mask = list(np.arange(self.x.size(1)))
            idx = self.x.size(0) - (self.src_len + self.tgt_len + num_test2_days)
            print('test1 index - ', idx)
        
        x_src = self.x.rename(None)[idx:idx+self.src_len, item_store_mask, :]
        x_tgt = self.x.rename(None)[idx+self.src_len:idx+self.src_len+self.tgt_len, item_store_mask, :]
        y_src = self.y.rename(None)[idx:idx+self.src_len, item_store_mask]
        y_tgt = self.y.rename(None)[idx+self.src_len:idx+self.src_len+self.tgt_len, item_store_mask]
#         print(f'x.shape - {self.x.shape} y.shape - {self.y.shape} idx - {idx}. x_item.shape - {x_item.shape} y_item.shape - {y_item.shape}')
        return x_src, x_tgt, y_src, y_tgt, item_store_mask

# train_ds = M5DataSet(x1, y1, src_len, tgt_len, 200)
# train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, pin_memory=True)

In [104]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)
    
# def insert_embedding(inp, dim, index, emb):
#     """
#     Replace columns with their embeddings. Works only with 2-d tensors.
#     TODO - make it work for multi-dim tensors

#     :param inp: tensor of two or more dimensions
#     :param dim: dimension along which tensor should be expanded by inserting the embedding
#     :param i: index of tensor along dim which is to be embedded
#     :param emb: Embedding of shape [v,d], where v vocab_size and d is embedding dimension
#     :return: 
#     """
#     # create a slice of the data to be replaced with embedding. 
#     s = inp.index_select(dim, torch.tensor([index])).squeeze(dim)
#     embedded = emb(s.type(torch.long))
    
#     first_indices = torch.arange(0,index)
#     last_indices = torch.arange(index+1,inp.size(dim))

#     return torch.cat([inp.index_select(dim, first_indices), embedded.type(inp.dtype), inp.index_select(dim, last_indices)], axis=dim)

In [107]:
%%time
gx = torch.load('x.pt')
gy = torch.load('y.pt')
print(f'gx.shape - {gx.shape} gy.shape - {gy.shape}')

gx.shape - torch.Size([1969, 30490, 17]) gy.shape - torch.Size([1969, 30490])
CPU times: user 13.6 ms, sys: 8.15 s, total: 8.17 s
Wall time: 8.14 s


In [110]:
class SalesModel(LightningModule):
    def __init__(self, hparams):
        super(SalesModel, self).__init__()
        self.hparams = hparams
        self.x_cat_cols = x_cat_cols
        self.x_cont_cols = x_cont_cols
        self.pos_encoder = PositionalEncoding(hparams.ninp, hparams.dropout)
#         encoder_layers = nn.TransformerEncoderLayer(hparams.ninp, hparams.nhead, hparams.nhid, hparams.dropout)
#         decoder_layers = nn.TransformerDecoderLayer(hparams.ninp, hparams.nhead, hparams.nhid, hparams.dropout)
#         self.transformer_encoder = nn.TransformerEncoder(encoder_layers, hparams.nlayers)
#         self.transformer_decoder = nn.TransformerDecoder(decoder_layers, hparams.nlayers)
#         self.lin = nn.Linear()
        self.transformer = nn.Transformer(d_model=hparams.ninp, nhead=hparams.nhead, 
                                          num_encoder_layers=hparams.nlayers,
                                          num_decoder_layers=hparams.nlayers,
                                          dim_feedforward=hparams.nhid)
        self.criterion = nn.MSELoss()
        self.lin = nn.Linear(hparams.ninp, 1)
        self.sigmoid = nn.Sigmoid()
        
        print('reading data', flush=True)
        self.x = gx
        self.y = gy

        with open('emb_sz.pkl','rb') as f:
            emb_szs = pickle.load(f)
        print(f'emb_szs - {emb_szs}')
                    
        self.embs = nn.ModuleList([nn.Embedding(e[0],e[1]) for e in emb_szs])
        
#     def init_weights(self):
#         initrange = 0.1
#         self.src_embedding.weight.data.uniform_(-initrange, initrange)
#         self.decoder.bias.data.zero_()
#         self.decoder.weight.data.uniform_(-initrange, initrange)
        
    @staticmethod
    def add_model_specifi_args(parent_parser):
        parser = ArgumentParser(parents=[parent_parser], add_help=False)
        parser.add_argument('--bsz', default=20, type=int, help='batch_size', )
        parser.add_argument('--src-len', default=90, type=int, help='source length')
        parser.add_argument('--tgt-len', default=28, type=int, help='target length')
        parser.add_argument('--ninp', default=320, type=int, help='expected features in the input')
        parser.add_argument('--nhead', default=4, type=int, help='number of attention heads')
        parser.add_argument('--nhid', default=256, type=int, help='dimesion of feed-forward network model')
        parser.add_argument('--nlayers', default=2, type=int, help='number of encoder layers')
        parser.add_argument('--dropout', default=0.2, type=float, help='dropout')
        
        # they are not hyper params, but adding them as pytorch lightening can save them
        parser.add_argument('--num-cat-cols', default=len(x_cat_cols), type=int, help='number of categorical columns')
        parser.add_argument('--num-cont-cols', default=len(x_cont_cols), type=int, help='number of numeric columns')
        return parser
    
#     def _generate_square_subsequent_mask(self, sz):
#         # populate the lower triangle with True and rest with False
#         return torch.tril(torch.ones(sz, sz)) == 1.0
    
    def prepare_data(self):
        pass
        
    def train_dataloader(self):
        train_ds = M5DataSet(self.x, self.y, self.hparams.src_len, self.hparams.tgt_len, self.hparams.bsz,dstype='train')  
        print(f'train_ds.length - {len(train_ds)}')
        train_dl = DataLoader(train_ds, batch_size=1, shuffle=True, pin_memory=True)
        return train_dl
    
    def test_dataloader(self):
        test_ds = M5DataSet(self.x, self.y, self.hparams.src_len, self.hparams.tgt_len, self.hparams.bsz, dstype='test1')  
        test_dl = DataLoader(test_ds, batch_size=1, shuffle=False)
        return test_dl

    
    def emb_lookups(self, xb, yb=None):
        embs_t = []
        for idx in range(self.hparams.num_cat_cols):
#             print('looking up for ', idx)
            embs_t.append(self.embs[idx](xb[:,:,idx].type(torch.long)))
        xb_cat = torch.cat(embs_t, dim=2)
        xb_cont = xb[:,:,self.hparams.num_cat_cols:]
        
        if yb is not None:
            xb = torch.cat([xb_cat, xb_cont.type(xb_cat.dtype), yb.unsqueeze(2).type(xb_cat.dtype)], dim=2)
        else:
            xb = torch.cat([xb_cat, xb_cont.type(xb_cat.dtype)], dim=2)
            
        #pad to adjust the feature dimension
        dim3_shortfall = self.hparams.ninp - xb.size(2)
        assert dim3_shortfall >= 0
        pad = nn.ConstantPad1d(padding=(0,dim3_shortfall),value=0)
        xb = pad(xb) 

        return xb

    def forward(self, x_src, y_src, x_tgt):
        offset = 0
        
        x_src = self.emb_lookups(x_src, y_src)
        x_tgt = self.emb_lookups(x_tgt)
            
        x_src = self.pos_encoder(x_src)
#         print('shape after pos encoder - ', x_src.size())
        out = self.transformer(x_src, x_tgt)
#         print('shape after transformer - ', out.size())
        out = self.sigmoid(self.lin(out))
        
        return out
    
    def training_step(self, batch, batch_idx):
        x_src, x_tgt, y_src, y_tgt, item_store_mask = batch
        x_src = x_src.squeeze(0)
        x_tgt = x_tgt.squeeze(0)
        y_src = y_src.squeeze(0)
        y_tgt = y_tgt.squeeze(0)
        
#         print(f'x_src.shape - {tuple(x_src.shape)} \t x_tgt.shape - {tuple(x_tgt.shape)} \t y_src.shape - {y_src.shape} \t y_tgt.shape - {y_tgt.shape}')
        yhat_tgt = self(x_src, y_src, x_tgt)

        # apply the mask (due to random selection of item_stores) to output
#         idxs = list(np.arange(0,x_src.size(1)))
#         idxs = [1 if o in item_store_mask else 0 for o in idxs]
#         mask = torch.tensor(idxs) * torch.ones(y_tgt.size(0), y_tgt.size(1))
#         print(f'mask.shape: {mask.shape}')
#         
        loss = self.criterion((yhat_tgt).reshape(-1).type(torch.float32), (y_tgt).reshape(-1).type(torch.float32))
        if batch_idx%10 == 0:
            print(f'{batch_idx} loss: {loss}  yhat_tgt.sum: {yhat_tgt.sum().item()}  y_tgt.sum: {y_tgt.sum().item()}')
            
        return {'loss': loss}
    
    def test(self):
        dl = self.test_dataloader()
        batch = next(iter(dl))
        return batch
        

In [139]:
src_len = 90
tgt_len = 28
# bsz = 200
# model = SalesModel(hparams)

parser = ArgumentParser()
parser = SalesModel.add_model_specifi_args(parser)
hparams = parser.parse_args('--bsz 2000 --ninp 320 --nhid 128 --nlayers 1'.split())

checkpoint_callback = ModelCheckpoint(
    filepath='models/weights.ckpt',
    verbose=True
)

# hparams.__setattr__('x_cat_cols', x_cat_cols)
# hparams.__setattr__('x_cont_cols', x_cont_cols)
model = SalesModel(hparams)
trainer = Trainer(gpus=1,max_epochs=1)
trainer.fit(model)
trainer.save_checkpoint('models/weights.ckpt')

reading data
emb_szs - [(3049, 143), (7, 5), (3, 3), (10, 6), (3, 3), (7, 5), (7, 5), (12, 6), (6, 4), (31, 11), (5, 4), (5, 4), (3, 3), (2, 2), (2, 2), (2, 2)]


HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(flex='2'), max=1.0), HTML(value='')), …

train_ds.length - 1795
0 loss: 0.15710467100143433  yhat_tgt.sum: 21292.99609375  y_tgt.sum: 75.07994757536042
10 loss: 2.5262781491619535e-05  yhat_tgt.sum: 23.34113121032715  y_tgt.sum: 84.40235910878113
20 loss: 3.300821845186874e-05  yhat_tgt.sum: 14.7197265625  y_tgt.sum: 96.45347313237221
30 loss: 4.034461744595319e-05  yhat_tgt.sum: 12.68321704864502  y_tgt.sum: 90.77195281782437
40 loss: 3.176669270033017e-05  yhat_tgt.sum: 10.200115203857422  y_tgt.sum: 69.34600262123197
50 loss: 3.116010702797212e-05  yhat_tgt.sum: 10.485611915588379  y_tgt.sum: 64.73263433813892
60 loss: 2.9207660190877505e-05  yhat_tgt.sum: 10.546513557434082  y_tgt.sum: 83.98558322411535
70 loss: 2.4427195967291482e-05  yhat_tgt.sum: 11.79220962524414  y_tgt.sum: 94.59370904325033
80 loss: 3.5520213714335114e-05  yhat_tgt.sum: 10.69764518737793  y_tgt.sum: 75.59633027522936
90 loss: 2.465873876644764e-05  yhat_tgt.sum: 10.677375793457031  y_tgt.sum: 89.7549148099607
100 loss: 1.7101960111176595e-05  yhat_t

### testing

In [175]:
%%time
model = SalesModel.load_from_checkpoint('models/weights.ckpt')
x_src, x_tgt, y_src, y_tgt, item_store_mask = model.test()
x_src = x_src.squeeze(0)
x_tgt = x_tgt.squeeze(0)
y_src = y_src.squeeze(0)
y_tgt = y_tgt.squeeze(0)
print(f'x_src.shape - {x_src.shape}, x_tgt.shape - {x_tgt.shape} , y_src.shape - {y_src.shape} , y_tgt.shape - {y_tgt.shape}')

model.eval()
print('starting inference...')
yhat_tgt = model(x_src, y_src, x_tgt)
yhat_tgt.shape

reading data
emb_szs - [(3049, 143), (7, 5), (3, 3), (10, 6), (3, 3), (7, 5), (7, 5), (12, 6), (6, 4), (31, 11), (5, 4), (5, 4), (3, 3), (2, 2), (2, 2), (2, 2)]
test1 index -  1823
x_src.shape - torch.Size([90, 30490, 17]), x_tgt.shape - torch.Size([28, 30490, 17]) , y_src.shape - torch.Size([90, 30490]) , y_tgt.shape - torch.Size([28, 30490])
starting inference...
CPU times: user 6min 36s, sys: 3min 33s, total: 10min 9s
Wall time: 32.5 s


torch.Size([28, 30490, 1])

In [176]:
yhat_tgt = yhat_tgt.refine_names('days','item_store','demand')
yhat_tgt_aligned = yhat_tgt.align_to('item_store','days','demand').squeeze(2).detach().numpy()
print(f'yhat.shape: ', yhat_tgt_aligned.shape)

# create preds df
preds = pd.DataFrame()
preds['id'] = sales['id']

pred_ids = preds['id'].tolist()
# eval df should also be submitted (days 1942 to 1969)
eval_ids = ['_'.join(o.split('_')[:5] + ['evaluation']) for o in pred_ids]
eval_df = pd.DataFrame({'id': eval_ids})

for idx in range(num_test1_days):
    preds['F' + str(idx+1)] = yhat_tgt_aligned[:,idx]
    preds['F' + str(idx+1)] = scalers['demand'].inverse_transform(preds[['F' + str(idx+1)]])
    
    eval_df['F' + str(idx+1)] = 0.0
    
out_df = pd.concat([preds,eval_df],axis=0)
print(out_df.shape)
preds.head()

yhat.shape:  (30490, 28)
(60980, 29)


Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,1.108182,1.116894,1.21529,1.158227,1.163145,1.256685,1.212015,1.138547,1.13439,...,1.168011,1.238219,1.234245,1.133531,1.142793,1.237923,1.186019,1.182022,1.245165,1.259626
1,HOBBIES_1_002_CA_1_validation,0.935378,0.941673,1.023826,0.974043,0.985986,1.066044,1.021846,0.95567,0.95145,...,0.990332,1.052222,1.037988,0.952125,0.95716,1.049217,0.993071,1.00595,1.056365,1.057076
2,HOBBIES_1_003_CA_1_validation,1.251421,1.263109,1.371955,1.304427,1.312041,1.423179,1.377433,1.287387,1.282629,...,1.3318,1.409868,1.399217,1.280757,1.294161,1.410588,1.335355,1.348387,1.415704,1.42839
3,HOBBIES_1_004_CA_1_validation,0.869793,0.876802,0.956443,0.914308,0.919486,0.985046,0.944984,0.891143,0.884281,...,0.919028,0.976054,0.962968,0.887811,0.893871,0.977575,0.932792,0.937595,0.98166,0.985463
4,HOBBIES_1_005_CA_1_validation,1.08934,1.099927,1.192606,1.138067,1.153022,1.238311,1.204229,1.121159,1.122337,...,1.161201,1.223614,1.224107,1.115594,1.127026,1.220751,1.167549,1.173281,1.228912,1.245078


In [177]:
out_df.describe()

Unnamed: 0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
count,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,...,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0,60980.0
mean,0.510347,0.514764,0.555583,0.530852,0.542067,0.580666,0.543317,0.51658,0.51553,0.565537,...,0.541112,0.563676,0.558505,0.515609,0.51983,0.561643,0.536299,0.547292,0.567017,0.567922
std,0.74119,0.7453,0.802324,0.770411,0.784412,0.844512,0.784359,0.750507,0.742602,0.817655,...,0.779348,0.812113,0.801313,0.751043,0.754361,0.811718,0.779569,0.792454,0.819031,0.821313
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.114739,0.115585,0.122402,0.119072,0.120476,0.122654,0.121559,0.116891,0.116262,0.1245,...,0.120484,0.123982,0.123007,0.116333,0.117319,0.124061,0.120393,0.121852,0.123813,0.124411
75%,0.852114,0.860831,0.928988,0.887467,0.906841,0.96578,0.907936,0.861836,0.863415,0.944888,...,0.906052,0.942198,0.934531,0.859383,0.868487,0.93847,0.895804,0.914852,0.946577,0.947637
max,23.712971,23.811947,24.955412,24.822319,25.026979,26.005703,24.331423,24.076591,23.437435,25.417122,...,24.598057,24.781763,24.302635,24.163206,24.05509,25.204046,24.970743,25.225618,25.01766,25.162182


In [178]:
out_df.to_csv('preds.csv', index=False)
!kaggle competitions submit -c m5-forecasting-accuracy -f preds.csv -m "transformers 2"

In [172]:
# !head preds.csv

## Playground

In [None]:
x_src.shape

In [None]:
item_store_mask = list(np.random.randint(0, 10,3))
item_store_mask

In [None]:
torch.randn(10).sum().item()

In [118]:
sample.head(2)

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
dir(hparams)`

In [152]:
sub = pd.read_csv(data_dir/'sample_submission.csv')

In [174]:

    
eval_df.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_evaluation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,HOBBIES_1_002_CA_1_evaluation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,HOBBIES_1_003_CA_1_evaluation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,HOBBIES_1_004_CA_1_evaluation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,HOBBIES_1_005_CA_1_evaluation,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [155]:
sub.shape

(60980, 29)

In [156]:
preds.shape

(30490, 29)

In [157]:
preds.head()

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,1.108182,1.116894,1.21529,1.158227,1.163145,1.256685,1.212015,1.138547,1.13439,...,1.168011,1.238219,1.234245,1.133531,1.142793,1.237923,1.186019,1.182022,1.245165,1.259626
1,HOBBIES_1_002_CA_1_validation,0.935378,0.941673,1.023826,0.974043,0.985986,1.066044,1.021846,0.95567,0.95145,...,0.990332,1.052222,1.037988,0.952125,0.95716,1.049217,0.993071,1.00595,1.056365,1.057076
2,HOBBIES_1_003_CA_1_validation,1.251421,1.263109,1.371955,1.304427,1.312041,1.423179,1.377433,1.287387,1.282629,...,1.3318,1.409868,1.399217,1.280757,1.294161,1.410588,1.335355,1.348387,1.415704,1.42839
3,HOBBIES_1_004_CA_1_validation,0.869793,0.876802,0.956443,0.914308,0.919486,0.985046,0.944984,0.891143,0.884281,...,0.919028,0.976054,0.962968,0.887811,0.893871,0.977575,0.932792,0.937595,0.98166,0.985463
4,HOBBIES_1_005_CA_1_validation,1.08934,1.099927,1.192606,1.138067,1.153022,1.238311,1.204229,1.121159,1.122337,...,1.161201,1.223614,1.224107,1.115594,1.127026,1.220751,1.167549,1.173281,1.228912,1.245078
