In [28]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import math
import json

torch.manual_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
daily_category_sales = pd.read_csv('../data/daily_category_sales.csv', header=[0,1], index_col=0)
data = daily_category_sales['quantity_sum'].copy()
print(data.isna().sum())
data.fillna(0, inplace=True) # use zero to replace NaN

category_name
Aquatic Roots and Tubers     0
Cauliflower                  1
Chili Peppers                0
Edible Mushrooms             0
Leafy Greens                 0
Solanaceous Vegetables      35
dtype: int64


### category sales quantity prediction: <br>use sales quantity of last N days to predict the current sales

In [5]:
lag = 14
X = np.array([np.array(data.iloc[idx-lag:idx,]) for idx in range(lag, data.shape[0], 1)])
y = np.array([np.array(data.iloc[idx,]) for idx in range(lag, data.shape[0], 1)])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, shuffle=True)

X_tensor = torch.FloatTensor(X_train).to(device)
y_tensor = torch.FloatTensor(y_train).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).to(device)
print(X_tensor.shape, y_tensor.shape)
# (batch, seq_len, input_size)

torch.Size([749, 14, 6]) torch.Size([749, 6])


In [8]:
# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, fc_size=24):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(hidden_size, fc_size)
        self.fc2= nn.Linear(fc_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc1(out[:, -1, :])  # Take the output from the last time step
        out = self.fc2(out)
        return out

In [25]:
# Initialize the LSTM model
input_size = 6
hidden_size = 128
num_layers = 2
output_size = 6

model = LSTMModel(input_size, hidden_size, num_layers,output_size).to(device)
print(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

LSTMModel(
  (lstm): LSTM(6, 128, num_layers=2, batch_first=True)
  (fc1): Linear(in_features=128, out_features=24, bias=True)
  (fc2): Linear(in_features=24, out_features=6, bias=True)
)


In [26]:
# load data in batches
dataset = TensorDataset(X_tensor, y_tensor)
batch_size = 128
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
n_batch = math.ceil(X_tensor.shape[0] / batch_size)


# train
num_epochs = 200
for epoch in range(num_epochs):
    epoch_train_loss = 0
    model.train()
    for X_batch, y_batch in data_loader:
        # forward pass
        outputs = model(X_batch)

        # Compute the RMSE loss
        loss = torch.sqrt(criterion(outputs, y_batch))

        # backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item() / n_batch
    
    # validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = torch.sqrt(criterion(val_outputs, y_val_tensor))
    
    if (epoch+1) % 5 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {val_loss:.4f}')

# Test the model on new data (you can replace this with your own test data)
# test_input = torch.FloatTensor(np.random.rand(10, m, t))
# with torch.no_grad():
#     predicted_output = model(test_input)

# Print the predicted output
# print("Predicted Output:")
# print(predicted_output.numpy())

Epoch [5/200], Train Loss: 96.8876, Validation Loss: 95.9333
Epoch [10/200], Train Loss: 84.5648, Validation Loss: 82.5474
Epoch [15/200], Train Loss: 67.9945, Validation Loss: 65.8948
Epoch [20/200], Train Loss: 55.3418, Validation Loss: 54.1174
Epoch [25/200], Train Loss: 50.9733, Validation Loss: 49.3223
Epoch [30/200], Train Loss: 48.8540, Validation Loss: 48.0607
Epoch [35/200], Train Loss: 48.8507, Validation Loss: 47.9249
Epoch [40/200], Train Loss: 48.5017, Validation Loss: 47.9224
Epoch [45/200], Train Loss: 48.5467, Validation Loss: 47.9190
Epoch [50/200], Train Loss: 48.5783, Validation Loss: 47.9221
Epoch [55/200], Train Loss: 47.5570, Validation Loss: 46.1855
Epoch [60/200], Train Loss: 40.7017, Validation Loss: 41.6644
Epoch [65/200], Train Loss: 38.6021, Validation Loss: 39.9742
Epoch [70/200], Train Loss: 38.5305, Validation Loss: 39.5123
Epoch [75/200], Train Loss: 37.2117, Validation Loss: 38.8817
Epoch [80/200], Train Loss: 37.0461, Validation Loss: 38.9446
Epoch [85

In [29]:
model_config = {
    "input_size": input_size,
    "hidden_size": hidden_size,
    "num_layers": num_layers,
    "output_size": output_size,
    "fc_size": 24
}

with open('../model/lstm_config.json', 'w') as f:
    json.dump(model_config, f)

torch.save(model.state_dict(),'../model/lstm_for_category_sales.pt')