In [1]:
import pickle
import os
import numpy as np
base_dir = "C://DATA//train//49_kaggle//KaggleFun//Sales//DATA"

In [2]:
from torch.utils.data import Dataset, DataLoader
class NumpyDataset(Dataset):
    def __init__(self, ds, lag=15):
        self.x_cat = ds[:,0:3]
        self.x_price = ds[:,3:3+lag]
        self.x_time = np.expand_dims(ds[:,33-lag:32], -1)
        self.y = ds[:,-1]
    def __len__(self): 
        return len(self.y)
    def __getitem__(self, idx):
        return self.x_cat[idx], self.x_time[idx], self.x_price[idx], self.y[idx]

In [3]:
train_ds = pickle.load( open( os.path.join(base_dir,"train_15.pkl"), "rb" ) )
test_ds = pickle.load( open( os.path.join(base_dir,"test_15.pkl"), "rb" ) )

In [4]:
train_ds = NumpyDataset(train_ds, lag=15)
test_ds = NumpyDataset(test_ds, lag=15)

batch_size = 200

train_loader = DataLoader(train_ds, shuffle=True, batch_size=batch_size, drop_last = True)
test_loader = DataLoader(test_ds, shuffle=True, batch_size=batch_size, drop_last = True)

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):   
    def __init__(self, lag = 15, batch_size = 200):
        super(Net, self).__init__()
        self.hidden_size = 10
        self.lstm1 = nn.LSTM(1, hidden_size= self.hidden_size, num_layers = 4, batch_first=True)
        self.dropout = nn.Dropout(0.25)
        self.lag = lag
        self.linear1 = nn.Linear(self.hidden_size * (lag-1), 32)
        
#         self.lstm2 = nn.LSTM(1, num_layers = 2, batch_first=True)
#         self.linear2 = nn.Linear(self.hidden_size * (lag-1), 32)
        self.linear2 = nn.Linear(lag, 16)
    
        self.linear_layers = nn.Sequential(
            nn.Linear(3, 16),
            nn.Dropout(0.25),
            nn.Linear(16,32),
            nn.Dropout(0.25),
        )
        
        self.fc = nn.Sequential(
            nn.Linear(48+32,32),
            nn.BatchNorm1d(32),
            nn.Dropout(0.3),
            nn.Linear(32, 32), 
            nn.Dropout(0.3),
            nn.Linear(32, 16), 
            nn.Dropout(0.3),
            nn.Linear(16,1),
        )

    # Defining the forward pass    
    def forward(self, x1, x2, x3):
        x1 = self.linear_layers(x1)
        x2, _ = self.lstm1(x2)
        x2 = x2.contiguous().view(200,-1)
        x2 = self.linear1(x2)
        x2 = self.dropout(x2)
        
        x3 = self.linear2(x3)
        x3 = self.dropout(x3)
        
        x = torch.cat([x1, x2, x3], axis = 1)
        x = self.fc(x)
        return x

In [9]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = Net(lag=15)
model.to(device)
lr=0.05
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 20
print(model)

Net(
  (lstm1): LSTM(1, 10, num_layers=4, batch_first=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (linear1): Linear(in_features=140, out_features=32, bias=True)
  (linear2): Linear(in_features=15, out_features=16, bias=True)
  (linear_layers): Sequential(
    (0): Linear(in_features=3, out_features=16, bias=True)
    (1): Dropout(p=0.25, inplace=False)
    (2): Linear(in_features=16, out_features=32, bias=True)
    (3): Dropout(p=0.25, inplace=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=80, out_features=32, bias=True)
    (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=32, out_features=32, bias=True)
    (4): Dropout(p=0.3, inplace=False)
    (5): Linear(in_features=32, out_features=16, bias=True)
    (6): Dropout(p=0.3, inplace=False)
    (7): Linear(in_features=16, out_features=1, bias=True)
  )
)


In [None]:
model.train()
loss_min = np.inf
for i in range(epochs):
    for batch_idx, (x1, x2, x3, y) in enumerate(train_loader):
        x1, x2, x3, y = x1.to(device).float(), x2.to(device).float(), x3.to(device).float(), y.to(device).float()
        model.zero_grad()
        output = model(x1, x2, x3)
        loss = criterion(output.flatten(), y)
        loss.backward()
        optimizer.step()
        if batch_idx == 0:
            test_loss = []
            for xx1, xx2, xx3, yy in test_loader:
                xx1, xx2, xx3, yy = xx1.to(device).float(), xx2.to(device).float(), xx3.to(device).float(), yy.to(device).float()
                model.zero_grad()
                yyout = model(xx1, xx2, xx3)
                test_loss.append(criterion(yyout.flatten(), yy).detach().cpu().numpy())
            print("Epoch: {}, Loss: {:.2f}, Val Loss: {:.2f}".format(i, loss, np.mean(test_loss)))

Epoch: 0, Loss: 2.31, Val Loss: 24.35
Epoch: 1, Loss: 0.26, Val Loss: 24.27
Epoch: 2, Loss: 0.56, Val Loss: 24.27


In [None]:
# Epoch: 0, Loss: 1.84, Val Loss: 25.30
# Epoch: 1, Loss: 1.25, Val Loss: 24.68
# Epoch: 2, Loss: 0.68, Val Loss: 24.46
# Epoch: 3, Loss: 4.10, Val Loss: 26.74
# Epoch: 4, Loss: 0.46, Val Loss: 23.77
# Epoch: 5, Loss: 0.77, Val Loss: 24.23
# Epoch: 6, Loss: 4.06, Val Loss: 23.51
# Epoch: 7, Loss: 0.28, Val Loss: 24.08
# Epoch: 8, Loss: 0.22, Val Loss: 23.92
# Epoch: 9, Loss: 1.90, Val Loss: 23.50
# Epoch: 10, Loss: 1.17, Val Loss: 23.79
# Epoch: 11, Loss: 7.23, Val Loss: 24.23
# Epoch: 12, Loss: 0.47, Val Loss: 24.24
# Epoch: 13, Loss: 0.54, Val Loss: 23.81
# Epoch: 14, Loss: 0.73, Val Loss: 24.24
# Epoch: 15, Loss: 0.55, Val Loss: 24.22
# Epoch: 16, Loss: 8.27, Val Loss: 23.84
# Epoch: 17, Loss: 1.60, Val Loss: 23.38