In [None]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import math
import json
import matplotlib.pyplot as plt
import seaborn as sns

torch.manual_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
daily_category_sales = pd.read_csv('../data/daily_category_sales.csv', header=[0,1], index_col=0)
quantity_data = daily_category_sales['quantity_sum'].copy()
print(quantity_data.isna().sum())
quantity_data.fillna(0, inplace=True) # use zero to replace NaN

# 1. LSTM for sales volume prediction

## Build dataset
### Use sales quantity of last 30 days to predict sales quantity of next 7 days

In [89]:
lag = 30
X = np.array([np.array(quantity_data.iloc[idx-lag:idx,]) for idx in range(lag, quantity_data.shape[0]-7, 1)])
y = np.array([np.array(quantity_data.iloc[idx:idx+7,]) for idx in range(lag, quantity_data.shape[0]-7, 1)])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).to(device)
print(X_train_tensor.shape, y_train_tensor.shape)
# (batch, seq_len, input_size)

torch.Size([733, 30, 6]) torch.Size([733, 7, 6])


## Configure & initialize model

In [None]:
class MultiLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, output_seq_len):
        super(MultiLSTM, self).__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size*output_seq_len)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        last_output = lstm_out[:, -1, :]
        output = self.fc(last_output)
        output = output.view(output.size(0), output_seq_len, output_size)  # 将输出变形为 (batch_size, m, 7)
        return output


In [None]:
# Initialize the LSTM model
input_size = 6
hidden_size = 256
num_layers = 2
output_size = 6
output_seq_len = 7

model = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
print(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.002)

## Training

In [None]:
# load data in batches
dataset = TensorDataset(X_train_tensor, y_train_tensor)
batch_size = 128
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
n_batch = math.ceil(X_train_tensor.shape[0] / batch_size)

train_loss, val_loss = [], [] 
# train
num_epochs = 2000
for epoch in range(num_epochs):
    epoch_train_loss = 0
    model.train()
    for X_batch, y_batch in data_loader:
        # forward pass
        outputs = model(X_batch)

        # Compute the RMSE loss
        loss = torch.sqrt(criterion(outputs, y_batch))

        # backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item() / n_batch
    
    # validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        epoch_val_loss = torch.sqrt(criterion(val_outputs, y_val_tensor))

    train_loss.append(epoch_train_loss)
    val_loss.append(epoch_val_loss.item())

    if (epoch+1) % 50 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}')

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(train_loss, label='Train Loss')
ax.plot(val_loss, label='Validation Loss')
ax.set_xlabel('Epoch', fontsize=16)
ax.set_ylabel('RMSE Loss', fontsize=16)
ax.set_title('Train and Validation Loss',fontsize=20)
ax.set_ylim(0, 110)
ax.legend(fontsize=16)
ax.grid(True)
plt.show()

In [None]:
fig.savefig('../figures/lstm1_train_val_loss.png',dpi=600)

In [None]:
model_config = {
    "input_size": input_size,
    "hidden_size": hidden_size,
    "num_layers": num_layers,
    "output_size": output_size,
    "fc_size": 24
}

with open('../model/lstm1_config.json', 'w') as f:
    json.dump(model_config, f)

torch.save(model.state_dict(),'../model/lstm1_for_category_sales_volume.pt')

## Model Metrics: R-square & RMSE

In [None]:
from sklearn.metrics import r2_score, mean_squared_error
y_pred = model(X_val_tensor).cpu().detach().numpy()
y_true = y_val_tensor.cpu().detach().numpy()
print(f'R-square: {r2_score(y_true.reshape(-1), y_pred.reshape(-1)):.4f};\t RMSE: {torch.sqrt(criterion(val_outputs, y_val_tensor)).item():.4f}' )

### TODO: Visualize prediction of each category

In [None]:
# load model weights
model = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
model.load_state_dict(torch.load('../model/lstm1_for_category_sales_volume.pt'))

In [None]:
X_tensor = torch.FloatTensor(X).to(device)
y_tensor = torch.FloatTensor(y).to(device)

model.eval()
with torch.no_grad():
    y_pred_tensor = model(X_tensor)

y_true = y_tensor[:,0,:].cpu().detach().numpy().T 
y_pred = y_pred_tensor[:,0,:].cpu().detach().numpy().T

math.sqrt(mean_squared_error(y_true.reshape(-1), y_pred.reshape(-1)))

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(y_true[0].reshape(-1), label='True')
ax.plot(y_pred[0].reshape(-1), label='Predicted')

# 2. LSTM for wholesale price prediction

In [None]:
# average wholesale price
price_data = daily_category_sales['cost_sum'] / daily_category_sales['quantity_sum']
price_data.fillna(0, inplace=True) 

lag = 30
X = np.array([np.array(price_data.iloc[idx-lag:idx,]) for idx in range(lag, price_data.shape[0]-7, 1)])
y = np.array([np.array(price_data.iloc[idx:idx+7,]) for idx in range(lag, price_data.shape[0]-7, 1)])

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

X_train_tensor = torch.FloatTensor(X_train).to(device)
y_train_tensor = torch.FloatTensor(y_train).to(device)
X_val_tensor = torch.FloatTensor(X_val).to(device)
y_val_tensor = torch.FloatTensor(y_val).to(device)
print(X_train_tensor.shape, y_train_tensor.shape)
# (batch, seq_len, input_size)

In [None]:
model = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
print(model)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# load data in batches
dataset = TensorDataset(X_train_tensor, y_train_tensor)
batch_size = 128
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
n_batch = math.ceil(X_train_tensor.shape[0] / batch_size)

train_loss, val_loss = [], [] 
# train
num_epochs = 1000
for epoch in range(num_epochs):
    epoch_train_loss = 0
    model.train()
    for X_batch, y_batch in data_loader:
        # forward pass
        outputs = model(X_batch)

        # Compute the RMSE loss
        loss = torch.sqrt(criterion(outputs, y_batch))

        # backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item() / n_batch
    
    # validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        epoch_val_loss = torch.sqrt(criterion(val_outputs, y_val_tensor))

    train_loss.append(epoch_train_loss)
    val_loss.append(epoch_val_loss.item())

    if (epoch+1) % 50 == 0 or epoch == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_train_loss:.4f}, Validation Loss: {epoch_val_loss:.4f}')


In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(train_loss, label='Train Loss')
ax.plot(val_loss, label='Validation Loss')
ax.set_xlabel('Epoch', fontsize=16)
ax.set_ylabel('RMSE Loss', fontsize=16)
ax.set_title('Train and Validation Loss',fontsize=20)
ax.set_ylim(0, 3)
ax.legend(fontsize=16)
ax.grid(True)
plt.show()

In [None]:
fig.savefig('../figures/lstm2_train_val_loss.png',dpi=600)

In [None]:
model_config = {
    "input_size": input_size,
    "hidden_size": hidden_size,
    "num_layers": num_layers,
    "output_size": output_size,
    "fc_size": 24
}

with open('../model/lstm2_config.json', 'w') as f:
    json.dump(model_config, f)

torch.save(model.state_dict(),'../model/lstm2_for_category_wholesale_price.pt')

# 3. Make predictions for the following week

In [None]:
lstm1 = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
lstm1.load_state_dict(torch.load('../model/lstm1_for_category_sales_volume.pt'))
lstm2 = MultiLSTM(input_size, hidden_size, output_size, output_seq_len).to(device)
lstm2.load_state_dict(torch.load('../model/lstm2_for_category_wholesale_price.pt'))

In [None]:
pred_X1 = quantity_data.iloc[-lag:,].values.reshape(1, lag, input_size)
pred_X1_tensor = torch.FloatTensor(pred_X1).to(device)

pred_X2 = price_data.iloc[-lag:,].values.reshape(1, lag, input_size)
pred_X2_tensor = torch.FloatTensor(pred_X2).to(device)

print(pred_X1_tensor.shape, pred_X2_tensor.shape)

In [None]:
with torch.no_grad():
    pred_y1 = lstm1(pred_X1_tensor)
    pred_y2 = lstm2(pred_X2_tensor)

pred_y1 = pred_y1.cpu().detach().numpy()
pred_y2 = pred_y2.cpu().detach().numpy()
pred_y1 = np.squeeze(pred_y1)
pred_y2 = np.squeeze(pred_y2)


In [None]:
np.save('../results/pred_sale_volume.npy', pred_y1)
np.save('../results/pred_wholesale_price.npy', pred_y2)

## Evaluation

In [96]:
# lag = 30
# q_X = np.array([np.array(quantity_data.iloc[idx-lag:idx,]) for idx in range(lag, quantity_data.shape[0]-7, 1)])
# q_y = np.array(quantity_data[30:-7]).T

# price_data = daily_category_sales['cost_sum'] / daily_category_sales['quantity_sum']
# price_data.fillna(0, inplace=True) 

# p_X = np.array([np.array(price_data.iloc[idx-lag:idx,]) for idx in range(lag, price_data.shape[0]-7, 1)])
# p_y = np.array(price_data[30:-7]).T

# print(q_X.shape, q_y.shape, p_X.shape, p_y.shape)

lag = 30
price_data = daily_category_sales['cost_sum'] / daily_category_sales['quantity_sum']
price_data.fillna(0, inplace=True) 

_q_X = np.array([np.array(quantity_data.iloc[idx-lag:idx,]) for idx in range(lag, quantity_data.shape[0]-7, 1)])
_q_y = np.array([np.array(quantity_data.iloc[idx:idx+7,]) for idx in range(lag, quantity_data.shape[0]-7, 1)])

q_X_train, q_X_val, q_y_train, q_y_val = train_test_split(_q_X, _q_y, test_size=0.3, shuffle=True, random_state=42)

_p_X = np.array([np.array(price_data.iloc[idx-lag:idx,]) for idx in range(lag, price_data.shape[0]-7, 1)])
_p_y = np.array([np.array(price_data.iloc[idx:idx+7,]) for idx in range(lag, price_data.shape[0]-7, 1)])

p_X_train, p_X_val, p_y_train, p_y_val = train_test_split(_p_X, _p_y, test_size=0.3, shuffle=True, random_state=42)
q_y_val = q_y_val.T
p_y_val = p_y_val.T
print(q_y_val.shape, p_y_val.shape)

(6, 7, 315) (6, 7, 315)


In [97]:
with torch.no_grad():
    q_pred_y = lstm1(torch.FloatTensor(q_X_val).to(device))
    p_pred_y = lstm2(torch.FloatTensor(p_X_val).to(device))

# q_pred_y = q_pred_y[:,0,:].cpu().detach().numpy().T
# p_pred_y = p_pred_y[:,0,:].cpu().detach().numpy().T
q_pred_y = q_pred_y.cpu().detach().numpy().T
p_pred_y = p_pred_y.cpu().detach().numpy().T

print(q_pred_y.shape, p_pred_y.shape)

(6, 7, 315) (6, 7, 315)


In [98]:
categories_enc =  {'Aquatic Roots and Tubers':'1', 'Cauliflower':'2' ,'Chili Peppers':'3', 'Edible Mushrooms':'4', 'Leafy Greens':'5', 'Solanaceous Vegetables':'6'}

In [None]:
# t_span = range(q_y.shape[1])
# fig, ax = plt.subplots(3,2,figsize=(20, 24), sharex=True)
# sns.lineplot(x=t_span, y=q_y[0], ax=ax[0,0], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[0], ax=ax[0,0], label='Predicted', color='orange', alpha=0.8)
# ax[0,0].set_title(list(categories_enc.keys())[0], fontsize=20)


# sns.lineplot(x=t_span, y=q_y[1], ax=ax[0,1], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[1], ax=ax[0,1], label='Predicted', color='orange', alpha=0.8)
# ax[0,1].set_title(list(categories_enc.keys())[1], fontsize=20)

# sns.lineplot(x=t_span, y=q_y[2], ax=ax[1,0], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[2], ax=ax[1,0], label='Predicted', color='orange', alpha=0.8)
# ax[1,0].set_title(list(categories_enc.keys())[2], fontsize=20)

# sns.lineplot(x=t_span, y=q_y[3], ax=ax[1,1], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[3], ax=ax[1,1], label='Predicted', color='orange', alpha=0.8)
# ax[1,1].set_title(list(categories_enc.keys())[3], fontsize=20)

# sns.lineplot(x=t_span, y=q_y[4], ax=ax[2,0], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[4], ax=ax[2,0], label='Predicted', color='orange', alpha=0.8)
# ax[2,0].set_title(list(categories_enc.keys())[4], fontsize=20)

# sns.lineplot(x=t_span, y=q_y[5], ax=ax[2,1], label='True',color='green', alpha=0.8)
# sns.lineplot(x=t_span, y=q_pred_y[5], ax=ax[2,1], label='Predicted', color='orange', alpha=0.8)
# ax[2,1].set_title(list(categories_enc.keys())[5], fontsize=20)

# ax[0,0].set_ylabel('Sales Volume', fontsize=16)
# ax[1,0].set_ylabel('Sales Volume', fontsize=16)
# ax[2,0].set_ylabel('Sales Volume', fontsize=16)
# ax[2,0].set_xlabel('Time', fontsize=16)
# ax[2,1].set_xlabel('Time', fontsize=16)




In [84]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
print(math.sqrt(mean_squared_error(q_y[0], q_pred_y[0])))
print(mean_absolute_error(q_y[0], q_pred_y[0]))

14.57787449853382
9.750038890149076


In [99]:
q_eval_df = pd.DataFrame(columns=['RMSE', 'MAE'], index=list(categories_enc.keys())+['Global Average'])
p_eval_df = pd.DataFrame(columns=['RMSE', 'MAE'], index=list(categories_enc.keys())+['Global Average'])
for i in range(6):
    q_eval_df.iloc[i,0] = round(math.sqrt(mean_squared_error(q_y_val[i], q_pred_y[i])),2)
    q_eval_df.iloc[i,1] = round(mean_absolute_error(q_y_val[i], q_pred_y[i]),2)
    p_eval_df.iloc[i,0] = round(math.sqrt(mean_squared_error(p_y_val[i], p_pred_y[i])),2)
    p_eval_df.iloc[i,1] = round(mean_absolute_error(p_y_val[i], p_pred_y[i]),2)

q_eval_df.iloc[6,0] = round(math.sqrt(mean_squared_error(q_y_val.reshape(-1), q_pred_y.reshape(-1))),2)
q_eval_df.iloc[6,1] = round(mean_absolute_error(q_y_val.reshape(-1), q_pred_y.reshape(-1)),2)
p_eval_df.iloc[6,0] = round(math.sqrt(mean_squared_error(p_y_val.reshape(-1), p_pred_y.reshape(-1))),2)
p_eval_df.iloc[6,1] = round(mean_absolute_error(p_y_val.reshape(-1), p_pred_y.reshape(-1)),2)
display(q_eval_df)
display(p_eval_df)

Unnamed: 0,RMSE,MAE
Aquatic Roots and Tubers,18.46,11.69
Cauliflower,15.15,11.11
Chili Peppers,27.34,15.98
Edible Mushrooms,26.06,15.83
Leafy Greens,43.0,23.6
Solanaceous Vegetables,8.61,6.05
Global Average,25.56,14.05


Unnamed: 0,RMSE,MAE
Aquatic Roots and Tubers,1.08,0.64
Cauliflower,0.58,0.37
Chili Peppers,0.5,0.35
Edible Mushrooms,0.85,0.63
Leafy Greens,0.34,0.25
Solanaceous Vegetables,0.55,0.37
Global Average,0.69,0.43
