In [4]:
import torch
import numpy as np
import pandas as pd
import category_encoders as ce
from tqdm import notebook
import matplotlib.pyplot as plt
import gc
import pickle as pkl
%matplotlib inline

In [2]:
train_data = pd.read_csv('../../data/sales_train_validation.csv')
sell_prices = pd.read_csv('../../data/sell_prices.csv')
calendar = pd.read_csv('../../data/calendar.csv')
sample_submission = pd.read_csv('../../data/sample_submission.csv')

### Process date features

In [3]:
calendar.date =  pd.to_datetime(calendar.date)

In [4]:
calendar['relative_year'] = 2016 - calendar.year

# convert month, day and weekday to cyclic encodings
calendar['month_sin'] = np.sin(2 * np.pi * calendar.month/12.0)
calendar['month_cos'] = np.cos(2 * np.pi * calendar.month/12.0)
calendar['day_sin'] = np.sin(2 * np.pi * calendar.date.dt.day/calendar.date.dt.days_in_month)
calendar['day_cos'] = np.cos(2 * np.pi * calendar.date.dt.day/calendar.date.dt.days_in_month)
calendar['weekday_sin'] = np.sin(2 * np.pi * calendar.wday/7.0)
calendar['weekday_cos'] = np.cos(2 * np.pi * calendar.wday/7.0)

In [5]:
one_day_events = ['SuperBowl', 'ValentinesDay', 'PresidentsDay', 'StPatricksDay',
                  'OrthodoxEaster', 'Cinco De Mayo', "Mother's day", 'MemorialDay',
                  "Father's day", 'IndependenceDay', 'Eid al-Fitr', 'LaborDay',
                  'ColumbusDay', 'Halloween', 'EidAlAdha', 'VeteransDay',
                  'Thanksgiving', 'Christmas', 'NewYear', 'OrthodoxChristmas', 
                  'MartinLutherKingDay', 'Easter']
multi_day_events = ['LentStart', 'LentWeek2', 'Purim End', 'Pesach End',
                    'NBAFinalsStart', 'NBAFinalsEnd', 'Ramadan starts', 'Chanukah End']

In [6]:
# create separate columns for each event
for event in one_day_events:
    calendar[event] = [1 if val == event else 0 for val in calendar.event_name_1]
    calendar.loc[calendar.event_name_2 == event, event] = 1
    
calendar['Lent'] = [1 if val == 'LentStart' else 0 for val in calendar.event_name_1]
calendar.loc[calendar.event_name_2 == 'LentStart', 'Lent'] = 1
calendar['Purim'] = [1 if val == 'Purim End' else 0 for val in calendar.event_name_1]
calendar.loc[calendar.event_name_2 == 'Purim End', 'Purim'] = 1
calendar['Pesach'] = [1 if val == 'Pesach End' else 0 for val in calendar.event_name_1]
calendar.loc[calendar.event_name_2 == 'Pesach End', 'Pesach'] = 1
calendar['Ramadan'] = [1 if val == 'Ramadan starts' else 0 for val in calendar.event_name_1]
calendar.loc[calendar.event_name_2 == 'Ramadan starts', 'Ramadan'] = 1
calendar['Chanukah'] = [1 if val == 'Chanukah End' else 0 for val in calendar.event_name_1]
calendar.loc[calendar.event_name_2 == 'Chanukah End', 'Chanukah'] = 1

calendar['NBAFinals'] = [1 if (val == 'NBAFinalsStart') else None for val in calendar.event_name_1]
calendar.loc[(calendar.event_name_2 == 'NBAFinalsStart'), 'NBAFinals'] = 1
calendar.loc[
    (calendar.event_name_1 == 'NBAFinalsEnd') | (calendar.event_name_2 == 'NBAFinalsEnd'), 'NBAFinals'] = 0


## for multi-day events, fill value as 1 from start to end
# Lent ends approx 6 weeks from the start
calendar['Lent'] = calendar['Lent'].rolling(min_periods=1, window=7*6).sum()
# Purim lasts just 2 days
calendar['Purim'] = calendar['Purim'].shift(-1).rolling(min_periods=1, window=2).sum()
# Purim usually lasts for 9 days
calendar['Pesach'] = calendar['Pesach'].shift(-8).rolling(min_periods=1, window=9).sum()
# both start and end dates for NBA Finals have been given
calendar['NBAFinals'] = calendar['NBAFinals'].fillna(method='ffill').fillna(0)
calendar.loc[
    (calendar.event_name_1 == 'NBAFinalsEnd') | (calendar.event_name_2 == 'NBAFinalsEnd'), 'NBAFinals'] = 1
# Ramadan ends approx 30 days from the start
calendar['Ramadan'] = calendar['Ramadan'].rolling(min_periods=1, window=30).sum()
# Chanukah lasts for 9 days
calendar['Chanukah'] = calendar['Chanukah'].shift(-8).rolling(min_periods=1, window=9).sum()

In [7]:
calendar_df = calendar[['wm_yr_wk', 'd', 'snap_CA', 'snap_TX', 'snap_WI', 'relative_year',
                        'month_sin', 'month_cos', 'day_sin', 'day_cos', 'weekday_sin', 'weekday_cos',
                        'SuperBowl', 'ValentinesDay', 'PresidentsDay', 'StPatricksDay', 'OrthodoxEaster',
                        'Cinco De Mayo', "Mother's day", 'MemorialDay', "Father's day", 'IndependenceDay',
                        'Eid al-Fitr', 'LaborDay', 'ColumbusDay', 'Halloween', 'EidAlAdha', 'VeteransDay',
                        'Thanksgiving', 'Christmas', 'NewYear', 'OrthodoxChristmas', 'MartinLutherKingDay',
                        'Easter', 'Lent', 'Purim', 'Pesach', 'Ramadan', 'Chanukah', 'NBAFinals']]

### Merge all dfs, keep calender_df features separate and just concat them for each batch

In [8]:
train_data.id = train_data.id.str[:-11]
sell_prices['id'] = sell_prices['item_id'] + '_' + sell_prices['store_id']

# add empty columns for future data
train_data = pd.concat([train_data, pd.DataFrame(columns=['d_'+str(i) for i in range(1914, 1970)])])

#### Encode categorical features using either one-hot or label encoding (for embeddings)

In [9]:
one_hot = ['cat_id', 'state_id'] 
label = ['item_id', 'dept_id', 'store_id']

In [10]:
train_data[[str(i)+'_enc' for i in one_hot]] = train_data[one_hot]
one_hot_encoder = ce.OneHotEncoder(cols=[str(i)+'_enc' for i in one_hot], use_cat_names=True)
one_hot_encoder.fit(train_data)
train_data = one_hot_encoder.transform(train_data)

In [11]:
train_data[[str(i)+'_enc' for i in label]] = train_data[label]
label_encoder = ce.OrdinalEncoder(cols=[str(i)+'_enc' for i in label])
label_encoder.fit(train_data)
train_data = label_encoder.transform(train_data)

In [12]:
# substract one from label encoded as pytorch uses 0-indexing
for col in [str(i)+'_enc' for i in label]:
    train_data[col] = train_data[col] - 1

#### Reshape, change dtypes and add previous day sales

In [14]:
data_df = pd.melt(train_data, id_vars=['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id',
                                      'cat_id_enc_HOBBIES', 'cat_id_enc_HOUSEHOLD', 'cat_id_enc_FOODS',
                                       'state_id_enc_CA', 'state_id_enc_TX', 'state_id_enc_WI',
                                      'item_id_enc', 'dept_id_enc', 'store_id_enc'],
                  var_name='d', value_vars=['d_'+str(i) for i in range(1, 1970)], value_name='sales')

# change dtypes to reduce memory usage
data_df[['sales']] = data_df[['sales']].fillna(-1).astype(np.int16)  # fill future sales as -1
calendar_df[one_day_events + ['Lent', 'Purim', 'Pesach', 'Ramadan', 'Chanukah', 'NBAFinals', 
                              'snap_CA', 'snap_TX', 'snap_WI', 'relative_year']] = calendar_df[
    one_day_events + ['Lent', 'Purim', 'Pesach', 'Ramadan', 'Chanukah', 'NBAFinals',
                     'snap_CA', 'snap_TX', 'snap_WI', 'relative_year']].astype(np.int8)
data_df[['cat_id_enc_HOBBIES', 'cat_id_enc_HOUSEHOLD', 'cat_id_enc_FOODS',
         'state_id_enc_CA', 'state_id_enc_TX', 'state_id_enc_WI']] = data_df[
    ['cat_id_enc_HOBBIES', 'cat_id_enc_HOUSEHOLD', 'cat_id_enc_FOODS',
     'state_id_enc_CA', 'state_id_enc_TX', 'state_id_enc_WI']].astype(np.int8)
data_df[['item_id_enc', 'dept_id_enc', 'store_id_enc']] = data_df[
    ['item_id_enc', 'dept_id_enc', 'store_id_enc']].astype(np.int16)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [14]:
data_df = data_df.merge(right=calendar_df[['d', 'wm_yr_wk']], on=['d'], how='left')
data_df = data_df.merge(right=sell_prices[['id', 'wm_yr_wk', 'sell_price']], on=['id', 'wm_yr_wk'], how='left')

In [15]:
data_df.sell_price = data_df.sell_price.fillna(0.0)
data_df['prev_day_sales'] = data_df.groupby(['id'])['sales'].shift(1)

In [16]:
# remove data for d_1
data_df = data_df.dropna()
calendar_df = calendar_df[calendar_df.d != 'd_1']

# change dtypes
data_df[['prev_day_sales']] = data_df[['prev_day_sales']].astype(np.int16)
data_df[['sell_price']] = data_df[['sell_price']].astype(np.float16)

#### Add previous day totals of aggregated series as features

In [21]:
# total
data_df = data_df.merge(right=
                        data_df.groupby(['d'])[['prev_day_sales']].sum().astype(
                            np.int32).add_suffix('_all').reset_index(),
                        on=['d'], how='left')
# category level
data_df = data_df.merge(right=
                        data_df.groupby(['d', 'cat_id'])[['prev_day_sales']].sum().astype(
                            np.int32).reset_index().pivot(
                            index='d', columns='cat_id', values='prev_day_sales').add_prefix('prev_d_cat_'),
                        on=['d'], how='left')
# state level
data_df = data_df.merge(right=
                        data_df.groupby(['d', 'state_id'])[['prev_day_sales']].sum().astype(
                            np.int32).reset_index().pivot(
                            index='d', columns='state_id', values='prev_day_sales').add_prefix('prev_d_state_'),
                        on=['d'], how='left')
# store level
data_df = data_df.merge(right=
                        data_df.groupby(['d', 'store_id'])[['prev_day_sales']].sum().astype(
                            np.int32).reset_index().pivot(
                            index='d', columns='store_id', values='prev_day_sales').add_prefix('prev_d_store_'),
                        on=['d'], how='left')
# department level
data_df = data_df.merge(right=
                        data_df.groupby(['d', 'dept_id'])[['prev_day_sales']].sum().astype(
                            np.int32).reset_index().pivot(
                            index='d', columns='dept_id', values='prev_day_sales').add_prefix('prev_d_dept_'),
                        on=['d'], how='left')

In [1]:
# remove category columns
del data_df['wm_yr_wk']
del data_df['item_id']
del data_df['dept_id']
del data_df['cat_id']
del data_df['store_id']
del data_df['state_id']

NameError: name 'data_df' is not defined

In [23]:
num_samples = data_df.id.nunique()
num_timesteps = data_df.d.nunique()
data_df = data_df.set_index(['id', 'd'])

In [24]:
enc_dec_feats = ['sell_price', 'cat_id_enc_HOBBIES', 'cat_id_enc_HOUSEHOLD', 'cat_id_enc_FOODS', 'state_id_enc_CA',
                 'state_id_enc_TX', 'state_id_enc_WI', 'item_id_enc', 'dept_id_enc', 'store_id_enc']
enc_only_feats = data_df.columns.difference(['sales', 'sell_price', 'prev_day_sales'] + enc_dec_feats)

sales_data_index = data_df.index
Y = data_df.sales.values.reshape(num_timesteps, num_samples).T

X_enc_only_feats = np.array(data_df[enc_only_feats]).reshape(num_timesteps, num_samples, -1)
gc.collect()

X_enc_dec_feats = np.array(data_df[enc_dec_feats]).reshape(num_timesteps, num_samples, -1)

X_prev_day_sales = data_df.prev_day_sales.values.reshape(num_timesteps, num_samples)

calendar_index = calendar_df.d
X_calendar = np.array(calendar_df.iloc[:, 2:])
X_calendar_cols = list(calendar_df.columns[2:])

#### save processed data

In [25]:
# data_dict = {'sales_data_index' : sales_data_index, 'calendar_index' : calendar_index, 
#              'X_prev_day_sales' : X_prev_day_sales, 
#              'X_enc_only_feats': X_enc_only_feats, 'X_enc_dec_feats' : X_enc_dec_feats,
#              'enc_dec_feat_names': enc_dec_feats, 'enc_only_feat_names': enc_only_feats,
#              'X_calendar' : X_calendar, 'X_calendar_cols' : X_calendar_cols, 
#              'Y' : Y,
#             'one_hot_encoder': one_hot_encoder, 'label_encoder': label_encoder}

# # pickle data
# with open('../data/data.pickle', 'wb') as f:
#     pkl.dump(data_dict, f, protocol=pkl.HIGHEST_PROTOCOL)

In [5]:
with open('../data/data.pickle', 'rb') as f:
    data_dict = pkl.load(f)
    
sales_data_index = data_dict['sales_data_index']
calendar_index = data_dict['calendar_index']
X_prev_day_sales = data_dict['X_prev_day_sales']
X_enc_only_feats = data_dict['X_enc_only_feats']
X_enc_dec_feats = data_dict['X_enc_dec_feats']
X_calendar = data_dict['X_calendar']
X_calendar_cols = data_dict['X_calendar_cols']
Y = data_dict['Y']

In [6]:
gc.collect()

44

## Build PyTorch Model

In [2]:
import torch 
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
import torch.utils.data
import torch.utils.data as data_utils

seed = 0
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
np.random.seed(seed)

In [3]:
# Dataset (Input Pipeline)
class CustomDataset(data_utils.Dataset):
    '''
    Custom dataset
    
    Let:
    training period timesteps = [0, N]
    prediction period timesteps = [N+1, N+P]
    
    Arguments:
    X_prev_day_sales : previous day sales for training period ([0, N])
    X_enc_only_feats : aggregated series' previous day sales for training period ([0, N])
    X_enc_dec_feats : sell price and categorical features for training and prediction period ([0, N+P])
    X_calendar : calendar features for training and prediction period ([0, N+P])
    X_last_day_sales : the actual sales for the day before the start of the prediction period (for timestep N)
                       (this will serve as the first timestep's input for the decoder)
    Y : actual sales, denoting targets for prediction period ([N+1, N+P])
    
    Returns:
    List of torch arrays:
    x_enc: concatenated encoder features (except embedding)
    x_enc_emb: concatenated encoder embedding features
    x_dec: concatenated decoder features (except embedding)
    x_dec_emb: concatenated decoder embedding features
    x_last_day_sales: the actual sales for the day before the start of the prediction period
    y: targets (only in training phase)
    '''

    def __init__(self, X_prev_day_sales, X_enc_only_feats,
                 X_enc_dec_feats, X_calendar, X_last_day_sales, 
                 Y=None, transform=None):
        self.X_prev_day_sales = X_prev_day_sales
        self.X_enc_only_feats = X_enc_only_feats
        self.X_enc_dec_feats = X_enc_dec_feats
        self.X_calendar = X_calendar
        self.X_last_day_sales = X_last_day_sales
        
        if Y is not None:
            self.Y = torch.from_numpy(Y).float()
        else:
            self.Y = None
        
        self.transform = transform

    def __len__(self):
        return self.X_prev_day_sales.shape[1]

    def __getitem__(self, idx):
        enc_timesteps = self.X_prev_day_sales.shape[0]
        dec_timesteps = self.X_enc_dec_feats.shape[0] - enc_timesteps
        num_embedding = 3
        
        # input data for encoder
        x_enc_dec_feats_enc = self.X_enc_dec_feats[:enc_timesteps, idx, :-num_embedding]
#         x_enc_only_feats = self.X_enc_only_feats[:, idx, :].reshape(enc_timesteps, -1)
        x_prev_day_sales_enc = self.X_prev_day_sales[:, idx].reshape(-1, 1)
        x_calendar_enc = self.X_calendar[:enc_timesteps, :]
#         x_enc = np.concatenate([x_enc_dec_feats_enc, x_calendar_enc, 
#                                 x_prev_day_sales_enc, x_enc_only_feats], axis=1)
        x_enc = np.concatenate([x_enc_dec_feats_enc, x_calendar_enc, 
                                x_prev_day_sales_enc], axis=1)
        x_enc_emb = self.X_enc_dec_feats[:enc_timesteps, idx, -num_embedding:].reshape(enc_timesteps, -1)
        
        # input data for decoder
        x_enc_dec_feats_dec = self.X_enc_dec_feats[enc_timesteps:, idx, :-num_embedding].reshape(dec_timesteps, -1)
        x_calendar_dec = self.X_calendar[enc_timesteps:, :]
        x_last_day_sales = self.X_last_day_sales[idx].reshape(-1)
        x_dec = np.concatenate([x_enc_dec_feats_dec, x_calendar_dec], axis=1)
        x_dec_emb = self.X_enc_dec_feats[enc_timesteps:, idx, -num_embedding:].reshape(dec_timesteps, -1)
        
        if self.Y is None:
            return [torch.from_numpy(x_enc).float(), torch.from_numpy(x_enc_emb).long(), 
                    torch.from_numpy(x_dec).float(), torch.from_numpy(x_dec_emb).long(),
                    torch.from_numpy(x_last_day_sales).float()]
            
        return [torch.from_numpy(x_enc).float(), torch.from_numpy(x_enc_emb).long(),
                torch.from_numpy(x_dec).float(), torch.from_numpy(x_dec_emb).long(),
                torch.from_numpy(x_last_day_sales).float(), self.Y[idx, :]]

In [95]:
train_t_b = 1969 - 1 - (28*4)
train_t_e = 1969 - 1 - (28*3)
val_t_b = 1969 - 1 - (28*3)
val_t_e = 1969 - 1 - (28*2)
test_t_b = 1969 - 1 - (28*2)
test_t_e = 1969 - 1 - (28*1)

train_dataset = CustomDataset(X_prev_day_sales[:train_t_b], X_enc_only_feats[:train_t_b],
                              X_enc_dec_feats[:train_t_e],
                              X_calendar[:train_t_e], X_prev_day_sales[train_t_b], Y=Y[:, train_t_b:train_t_e])
val_dataset = CustomDataset(X_prev_day_sales[:val_t_b], X_enc_only_feats[:val_t_b],
                            X_enc_dec_feats[:val_t_e],
                            X_calendar[:val_t_e], X_prev_day_sales[val_t_b], Y=Y[:, val_t_b:val_t_e])
test_dataset = CustomDataset(X_prev_day_sales[:test_t_b], X_enc_only_feats[:test_t_b],
                             X_enc_dec_feats[:test_t_e],
                             X_calendar[:test_t_e], X_prev_day_sales[test_t_b])

In [96]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, 
                                           num_workers=3, pin_memory=True)

In [4]:
# Build a seq2seq model

# Encoder
class Encoder(nn.Module):
    def __init__(self, input_size, embedding_sizes, config):
        super(Encoder, self).__init__()
        self.input_size = input_size
        
        self.embeddings = nn.ModuleList([nn.Embedding(classes, hidden_size) 
                                         for classes, hidden_size in embedding_sizes])
        self.rnn = nn.LSTM(self.input_size, config.rnn_num_hidden, 
                           config.rnn_num_layers, dropout=config.enc_rnn_dropout, bidirectional=True)

    def forward(self, x, x_emb):
        x, x_emb = x.permute(1,0,2), x_emb.permute(1,0,2) # make time-major
        output_emb = [emb(x_emb[:, :, i]) for i, emb in enumerate(self.embeddings)]
        output_emb = torch.cat(output_emb, 2)
        
        x_rnn = torch.cat([x, output_emb], 2)
        
        output, hidden = self.rnn(x_rnn)
        return output, hidden


# Decoder
class Decoder(nn.Module):
    def __init__(self, input_size, embedding_sizes, output_size, config):
        super(Decoder, self).__init__()
        self.input_size = input_size
        
        self.embeddings = nn.ModuleList([nn.Embedding(classes, hidden_size) 
                                         for classes, hidden_size in embedding_sizes])
        self.rnn = nn.LSTM(self.input_size, config.rnn_num_hidden, 
                           config.rnn_num_layers, dropout=config.dec_rnn_dropout, bidirectional=True)
        self.pred = nn.Linear(config.rnn_num_hidden*2, output_size)

    def forward(self, x, x_emb, hidden):
        x, x_emb = x.permute(1,0,2), x_emb.permute(1,0,2) # make time-major
        output_emb = [emb(x_emb[:, :, i]) for i, emb in enumerate(self.embeddings)]
        output_emb = torch.cat(output_emb, 2)
        x_rnn = torch.cat([x, output_emb], 2)
        
        output, hidden = self.rnn(x_rnn, hidden)
#         shape = output.size()
#         output = self.pred(output.view(-1, output.size(2)))
#         output = output.view(shape[0], shape[1]).permute(1, 0)
        output = self.pred(output[0])
        return output, hidden


class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales):
        batch_size, pred_len = x_dec.shape[0:2]
        
        # create a tensor to store the outputs
        predictions = torch.zeros(batch_size, pred_len).to(config.device)
        
        encoder_output, hidden = self.encoder(x_enc, x_enc_emb)
        
        # for each prediction timestep, use the output of the previous step, 
        # concatenated with other features as the input
                
        for timestep in range(0, pred_len):
            
            if timestep == 0:
                # for the first timestep of decoder, use previous steps' sales
                dec_input = torch.cat([x_dec[:, 0, :], x_last_day_sales], dim=1).unsqueeze(1)
            else:
                # for next timestep, current timestep's output will serve as the input along with other features
                dec_input = torch.cat([x_dec[:, timestep, :], decoder_output], dim=1).unsqueeze(1)
            
            # the hidden state of the encoder will be the initialize the decoder's hidden state
            decoder_output, hidden = self.decoder(dec_input, x_dec_emb[:, timestep, :].unsqueeze(1), hidden)
            
            # add predictions to predictions tensor
            predictions[:, timestep] = decoder_output.view(-1)
            
        return predictions

In [5]:
class Config():
    
    # hidden dimension and no. of layers will be the same for both encoder and decoder
    rnn_num_hidden = 256
    rnn_num_layers = 2
    enc_rnn_dropout = 0.0
    dec_rnn_dropout = 0.0
    
    num_epochs = 10
    batch_size = 64
    learning_rate = 0.001
    
    device = torch.device('cuda')
    
config = Config

In [6]:
embedding_sizes = [(3049, 50), (7, 4), (10, 5)] # for item_id, dept_id, store_id respectively
num_features_enc = 46 + sum([j for i, j in embedding_sizes])
num_features_dec = 46 + sum([j for i, j in embedding_sizes])
enc = Encoder(num_features_enc, embedding_sizes, config)
dec = Decoder(num_features_dec, embedding_sizes, 1, config)
model = Seq2Seq(enc, dec)
model.to(config.device)
# writer = SummaryWriter()

Seq2Seq(
  (encoder): Encoder(
    (embeddings): ModuleList(
      (0): Embedding(3049, 50)
      (1): Embedding(7, 4)
      (2): Embedding(10, 5)
    )
    (rnn): LSTM(105, 256, num_layers=2, bidirectional=True)
  )
  (decoder): Decoder(
    (embeddings): ModuleList(
      (0): Embedding(3049, 50)
      (1): Embedding(7, 4)
      (2): Embedding(10, 5)
    )
    (rnn): LSTM(105, 256, num_layers=2, bidirectional=True)
    (pred): Linear(in_features=512, out_features=1, bias=True)
  )
)

In [7]:
# Loss and Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)

NameError: name 'model' is not defined

In [10]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=config.batch_size)

In [6]:
def get_val_loss():
    model.eval()
    losses = []
    for i, (x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales, y) in enumerate(notebook.tqdm(val_loader)):
        x_enc, x_dec = Variable(x_enc).to(config.device), Variable(x_dec).to(config.device)
        x_enc_emb, x_dec_emb = Variable(x_enc_emb).to(config.device), Variable(x_dec_emb).to(config.device)
        x_last_day_sales = Variable(x_last_day_sales).to(config.device)
        y = Variable(y).to(config.device)

        preds = model(x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales)
        loss = criterion(preds, y)
        loss_iter = loss.data.cpu().numpy()
        losses.append(loss_iter)

    print('Validation Loss: %.4f' % np.mean(losses))

In [14]:
# Progress bar
for epoch in range(config.num_epochs):
    progbar = notebook.tqdm(train_loader)
    losses = []
    for i, (x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales, y) in enumerate(progbar):
        model.train()
        x_enc, x_dec = Variable(x_enc).to(config.device), Variable(x_dec).to(config.device)
        x_enc_emb, x_dec_emb = Variable(x_enc_emb).to(config.device), Variable(x_dec_emb).to(config.device)
        x_last_day_sales = Variable(x_last_day_sales).to(config.device)
        y = Variable(y).to(config.device)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        preds = model(x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales)
#         writer.add_graph(model, [x_enc, x_dec, x_last_day_sales])
        loss = criterion(preds, y)
        loss_iter = loss.data.cpu().numpy()
        progbar.set_description("loss = %0.3f " % np.round(loss_iter, 3))
        losses.append(loss_iter)
        
        loss = torch.mean(loss)
        loss.backward()
        optimizer.step()
        
    print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
           %(epoch+1, config.num_epochs, i+1, len(train_dataset)//config.batch_size, 
             np.mean(losses)))
    get_val_loss()

# writer.close()

HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [1/10], Iter [477/476] Loss: 2.3053


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.3677


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [2/10], Iter [477/476] Loss: 2.2163


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2312


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [3/10], Iter [477/476] Loss: 2.1666


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2313


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [4/10], Iter [477/476] Loss: 2.1116


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.1783


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [5/10], Iter [477/476] Loss: 2.0902


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2125


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [6/10], Iter [477/476] Loss: 2.0432


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2756


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [7/10], Iter [477/476] Loss: 2.0266


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.1981


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [8/10], Iter [477/476] Loss: 1.9879


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2167


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [9/10], Iter [477/476] Loss: 1.9722


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2564


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Epoch [10/10], Iter [477/476] Loss: 1.9353


HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 2.2858


In [17]:
torch.save(model.state_dict(), '../submissions/sub1/model.pth.tar')

#### Generate submission file

In [10]:
model.load_state_dict(torch.load('../submissions/sub1/model.pth.tar'))
model.eval()

Seq2Seq(
  (encoder): Encoder(
    (embeddings): ModuleList(
      (0): Embedding(3049, 50)
      (1): Embedding(7, 4)
      (2): Embedding(10, 5)
    )
    (rnn): LSTM(105, 256, num_layers=2, bidirectional=True)
  )
  (decoder): Decoder(
    (embeddings): ModuleList(
      (0): Embedding(3049, 50)
      (1): Embedding(7, 4)
      (2): Embedding(10, 5)
    )
    (rnn): LSTM(105, 256, num_layers=2, bidirectional=True)
    (pred): Linear(in_features=512, out_features=1, bias=True)
  )
)

In [11]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=config.batch_size)

In [53]:
preds = []
for i, (x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales) in enumerate(notebook.tqdm(test_loader)):
        x_enc, x_dec = Variable(x_enc).to(config.device), Variable(x_dec).to(config.device)
        x_enc_emb, x_dec_emb = Variable(x_enc_emb).to(config.device), Variable(x_dec_emb).to(config.device)
        x_last_day_sales = Variable(x_last_day_sales).to(config.device)

        preds.append(model(x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales).data.cpu().numpy())

HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))

ValueError: too many values to unpack (expected 5)

In [18]:
predictions = np.concatenate(preds, 0)

In [26]:
sample_submission.iloc[:predictions.shape[0], 1:] = predictions

In [31]:
sample_submission.to_csv('../submissions/sub1/submission.csv.gz', compression='gzip', index=False)

In [34]:
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=config.batch_size)

In [35]:
get_val_loss()

HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))


Validation Loss: 1.9693


In [36]:
train_data

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,0,0,2,2,...,2,0,0,0,0,0,1,0,0,1
30486,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
30487,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,0,6,0,2,...,2,1,0,2,0,1,0,0,1,0
30488,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,1,0,0,1,0,3,1,3


In [39]:
test_t_b, test_t_e

(1912, 1940)

In [47]:
1941-1914

27

In [37]:
sample_submission

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0.948881,0.818636,0.834599,0.884738,0.988404,1.120274,1.269215,1.171244,1.076180,...,1.075340,1.220519,1.376329,1.103644,0.979420,0.934300,0.943113,1.029701,1.175751,1.238253
1,HOBBIES_1_002_CA_1_validation,0.667062,0.418846,0.454884,0.442493,0.471557,0.527329,0.580185,0.485661,0.440305,...,0.463578,0.565727,0.692143,0.484468,0.435793,0.441920,0.459359,0.497804,0.574895,0.581653
2,HOBBIES_1_003_CA_1_validation,0.980776,0.854881,0.910752,0.979821,1.090250,1.198755,1.203921,1.045154,1.003860,...,1.082921,1.206002,1.246968,0.972033,0.934462,0.958826,1.016848,1.128089,1.271931,1.235085
3,HOBBIES_1_004_CA_1_validation,2.781727,2.314433,2.333030,2.248819,2.350440,2.592122,2.818342,2.500714,2.382025,...,2.314852,2.642580,2.921061,2.104538,1.960665,1.912035,1.912758,2.017019,2.209183,2.143029
4,HOBBIES_1_005_CA_1_validation,1.882275,1.600861,1.684266,1.818450,2.112558,2.437965,2.440682,1.948949,1.707919,...,1.813270,2.178214,2.371747,1.740101,1.583631,1.612978,1.788136,2.171239,2.618240,2.512470
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60975,FOODS_3_823_WI_3_evaluation,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
60976,FOODS_3_824_WI_3_evaluation,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
60977,FOODS_3_825_WI_3_evaluation,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
60978,FOODS_3_826_WI_3_evaluation,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [54]:
y_true, y_pred = [], []
for i, (x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales, y) in enumerate(notebook.tqdm(val_loader)):
        x_enc, x_dec = Variable(x_enc).to(config.device), Variable(x_dec).to(config.device)
        x_enc_emb, x_dec_emb = Variable(x_enc_emb).to(config.device), Variable(x_dec_emb).to(config.device)
        x_last_day_sales = Variable(x_last_day_sales).to(config.device)
        y_true.append(y)

        y_pred.append(model(x_enc, x_enc_emb, x_dec, x_dec_emb, x_last_day_sales).data.cpu().numpy())

HBox(children=(FloatProgress(value=0.0, max=477.0), HTML(value='')))




In [58]:
y_true = torch.cat(y_true, 0).data.cpu().numpy()
y_pred = np.concatenate(y_pred, 0)

In [59]:
from sklearn.metrics import mean_squared_error
mean_squared_error(y_true, y_pred, squared=False)

2.2369509