In [59]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

In [60]:
# load pickle files

with open('data_sales/train_data.pkl', 'rb') as file:
    train_data = pickle.load(file)
    
with open('data_sales/test_data.pkl', 'rb') as file:
    test_data = pickle.load(file)

In [61]:
# Columns (left to right): shop_id, item_id, item_price, item_category, sales counts at 10 time points

print(train_data.shape)

(16648987, 14)


In [62]:
# dataset and dataloader

train_inputs = np.expand_dims(train_data[:, :13], -1)
train_outputs = (train_data[:, 13:])

test_inputs = np.expand_dims(test_data[:, :13], -1)
test_outputs = (test_data[:, 13:])

from torch.utils.data import TensorDataset, DataLoader

train_set = TensorDataset(torch.Tensor(train_inputs), torch.Tensor(train_outputs))
test_set = TensorDataset(torch.Tensor(test_inputs), torch.Tensor(test_outputs))

train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_set, batch_size=32, shuffle=True, num_workers=4)

In [63]:
class sales_LSTM(nn.Module):
    
    def __init__(self, num_lin_features, seq_len):
        super(sales_LSTM, self).__init__()
        
        # SELF VARIABLES
        self.num_LSTM = 1 # num of LSTM layers
        self.num_hidden = 10 # num of hidden states retained
        self.seq_len = seq_len # length of sequences passed in
        self.num_lin_features = num_lin_features # number of linear (non-time-series) inputs
        
        # Layers
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.num_hidden, num_layers=self.num_LSTM, batch_first=True) # LSTM layer for sales series data
        self.fc1 = nn.Linear(num_lin_features, 64) # Initial dense layer for ids, price, item cat.
        self.fc2 = nn.Linear(self.num_hidden*self.seq_len+64, 1) # Final fc layer for output
        
    # Forward pass given input vector x of size [batch_size, 14]
    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        lstm_out, self.hidden = self.lstm(x[:, self.num_lin_features:, :]) # pass time series sales counts into lstm
        lstm_out = lstm_out.contiguous().view(batch_size, -1) # change shape to (batch_size, seq_len * num_hidden)
        lin1_out = self.fc1(np.reshape(x[:, :self.num_lin_features, :], (batch_size, 4))) # run non-time-series features through fc layer
        x = torch.cat((lstm_out, lin1_out), dim=1) # concatenate outputs into single [batch_size, x] tensor
        x = self.fc2(x) # run everything through final fc layer
        return x
        
    # init hidden and cell states
    def init_hidden(self, batch_size):
        temp_state = torch.zeros(self.num_LSTM, batch_size, self.num_hidden)
        self.hidden = (temp_state, temp_state)

In [64]:
net = sales_LSTM(4, 9)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)

In [65]:
print(net)

sales_LSTM(
  (lstm): LSTM(1, 10, batch_first=True)
  (fc1): Linear(in_features=4, out_features=64, bias=True)
  (fc2): Linear(in_features=154, out_features=1, bias=True)
)


In [68]:
# training loop

import random

epochs = 3

for epoch in range(epochs):
    
    num_correct = 0
    
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data
        optimizer.zero_grad() # zero gradient vector
        
        # net.init_hidden(32)
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        num_correct += (outputs == labels).float().sum()
        
    # accuracy for epoch
    acc = (num_correct / len(train_data)) * 100
    print('Accuracy for epoch ' + str(epoch) + ': ' + str(acc) + '%')

Accuracy for epoch 0: tensor(0.0001)%
Accuracy for epoch 1: tensor(9.0096e-05)%
Accuracy for epoch 2: tensor(0.0002)%


In [69]:
# save state dict

torch.save(net.state_dict(), 'trained_models/sales_predictor.pt')