In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import math
import json

torch.manual_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
daily_category_sales = pd.read_csv('../data/daily_category_sales.csv', header=[0,1], index_col=0)
data = daily_category_sales['quantity_sum'].copy()
print(data.isna().sum())
data.fillna(0, inplace=True) # use zero to replace NaN

# 1. LSTM for sales volume prediction

## Build dataset
### Use sales quantity of last 30 days to predict sales quantity of next 7 days

In [None]:
lag = 30
X = np.array([np.array(data.iloc[idx-lag:idx,]) for idx in range(lag, data.shape[0]-7, 1)])
y = np.array([np.array(data.iloc[idx:idx+7,]) for idx in range(lag, data.shape[0]-7, 1)])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).to(device)
print(X_train_tensor.shape, y_train_tensor.shape)
# (batch, seq_len, input_size)

## Define & initialize model

In [None]:
class MultiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, output_seq_len):
        super(MultiLSTM, self).__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size*output_seq_len)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_output = lstm_out[:, -1, :]
        output = self.fc(last_output)
        output = output.view(output.size(0), output_seq_len, output_size)  # 将输出变形为 (batch_size, m, 7)
        return output


In [None]:
# Initialize the LSTM model
input_size = 6
hidden_size = 256
num_layers = 2
output_size = 6
output_seq_len = 7

model = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
print(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

## Training

In [None]:
# load data in batches
dataset = TensorDataset(X_train_tensor, y_train_tensor)
batch_size = 128
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
n_batch = math.ceil(X_train_tensor.shape[0] / batch_size)

train_loss, val_loss = [], [] 
# train
num_epochs = 2000
for epoch in range(num_epochs):
    epoch_train_loss = 0
    model.train()
    for X_batch, y_batch in data_loader:
        # forward pass
        outputs = model(X_batch)

        # Compute the RMSE loss
        loss = torch.sqrt(criterion(outputs, y_batch))

        # backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item() / n_batch
    
    # validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        epoch_val_loss = torch.sqrt(criterion(val_outputs, y_val_tensor))

    train_loss.append(epoch_train_loss)
    val_loss.append(epoch_val_loss.item())

    if (epoch+1) % 50 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}')

# Test the model on new data (you can replace this with your own test data)
# test_input = torch.FloatTensor(np.random.rand(10, m, t))
# with torch.no_grad():
#     predicted_output = model(test_input)

# Print the predicted output
# print("Predicted Output:")
# print(predicted_output.numpy())

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(train_loss, label='Train Loss')
ax.plot(val_loss, label='Validation Loss')
ax.set_xlabel('Epoch', fontsize=16)
ax.set_ylabel('RMSE Loss', fontsize=16)
ax.set_title('Train and Validation Loss',fontsize=20)
ax.set_ylim(0, 110)
ax.legend(fontsize=16)
ax.grid(True)
plt.show()

In [None]:
fig.savefig('../figures/lstm1_train_val_loss.png',dpi=600)

In [None]:
model_config = {
    "input_size": input_size,
    "hidden_size": hidden_size,
    "num_layers": num_layers,
    "output_size": output_size,
    "fc_size": 24
}

with open('../model/lstm1_config.json', 'w') as f:
    json.dump(model_config, f)

torch.save(model.state_dict(),'../model/lstm1_for_category_sales_volume.pt')

## Model Metrics: R-square & RMSE

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred = model(X_val_tensor).cpu().detach().numpy()
y_true = y_val_tensor.cpu().detach().numpy()
print(f'R-square: {r2_score(y_true.reshape(-1), y_pred.reshape(-1)):.4f};\t RMSE: {torch.sqrt(criterion(val_outputs, y_val_tensor)).item():.4f}' )

### Visualize prediction of each category

In [None]:
# load model weights
model = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
model.load_state_dict(torch.load('../model/lstm1_for_category_sales_volume.pt'))

In [None]:
X_tensor = torch.FloatTensor(X).to(device)
y_tensor = torch.FloatTensor(y).to(device)

model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_tensor)

y_true = y_tensor[:,0,:].cpu().detach().numpy().T
y_pred = y_pred_tensor[:,0,:].cpu().detach().numpy().T

In [61]:
math.sqrt(mean_squared_error(y_true.reshape(-1), y_pred.reshape(-1)))

25.55943641865099

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(y_true[0].reshape(-1), label='True')
ax.plot(y_pred[0].reshape(-1), label='Predicted')

# 2. LSTM for wholesale price prediction