#### Order Prediction and Prioritisation with Long Short-Term Memory (LSTM) Networks

In [None]:
# spare cell to install packages


In [1]:
# import necessary libraries

import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


Data Preparation

In [2]:
# generate sample data - a DataFrame containing simulated daily order volumes for one year

np.random.seed(0)
date_range = pd.date_range(start='1/1/2020', periods=365, freq='D')
order_volumes = np.random.poisson(lam=20, size=len(date_range))

data = pd.DataFrame({'date': date_range, 'order_volume': order_volumes})
data.head()

Unnamed: 0,date,order_volume
0,2020-01-01,21
1,2020-01-02,21
2,2020-01-03,19
3,2020-01-04,19
4,2020-01-05,32


Data Preprocessing

In [3]:
# normalise the order volumes - scale the order volumes to a range between 0 and 1

scaler = MinMaxScaler(feature_range=(0, 1))
data['order_volume'] = scaler.fit_transform(data[['order_volume']])

In [4]:
# create sequences for LSTM

# each sequence is a sliding window of 30 days, and the label is the order volume on the next day
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequence = data[i:i+seq_length]
        label = data[i+seq_length]
        sequences.append((sequence, label))
    return sequences

seq_length = 30
sequences = create_sequences(data['order_volume'].values, seq_length)

In [5]:
# unpacking the sequences and labels into two separate lists: 
# X will contain all the sequences, y will contain all the labels
X, y = zip(*sequences)

# convert to numpy arrays
X = np.array(X)
y = np.array(y)

# changes the shape of X to fit the expected input format for the LSTM model (number_of_samples, seq_length, 1)
# X.shape[0] is the number of samples (or sequences)
# X.shape[1] is the sequence length (seq_length, e.g., 30 days)
X = X.reshape((X.shape[0], X.shape[1], 1))


Model Definition

In [6]:
# defining LSTM model

# sequential model
model = Sequential()

# first LSTM layer with 50 units, which returns sequences
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))

# second LSTM layer with 50 units, which does not return sequences
model.add(LSTM(50))

# dense output layer with 1 unit for predicting the next day's order volume
model.add(Dense(1))

# adam optimizer and mean squared error (MSE) loss function
model.compile(optimizer='adam', loss='mse')

model.summary()

  super().__init__(**kwargs)


In [7]:
# training the model

history = model.fit(X, y, epochs=50, batch_size=32, validation_split=0.2)


Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 64ms/step - loss: 0.1808 - val_loss: 0.0441
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0484 - val_loss: 0.0363
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0363 - val_loss: 0.0353
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0351 - val_loss: 0.0312
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0358 - val_loss: 0.0318
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0288 - val_loss: 0.0306
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0329 - val_loss: 0.0301
Epoch 8/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0342 - val_loss: 0.0314
Epoch 9/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

Prediction and Prioritisation

In [8]:
# predict future order volumes

# to predict for the next 10 days
future_steps = 10

# initialise empty list to store the predicted order volumes
predictions = []

# start first prediction with the last sequence in the training data
last_sequence = X[-1]

# iteratively predict by looping through future steps
for _ in range(future_steps):

    # use model to predict the next order volume
    # reshape last_sequence to match the input shape expected by the model
    next_pred = model.predict(last_sequence.reshape((1, seq_length, 1)))[0][0]

    # append prediction to list
    predictions.append(next_pred)

    # update last_sequence by removing first element and appending the predicted value
    # this new sequence is used for the next prediction
    last_sequence = np.append(last_sequence[1:], [[next_pred]], axis=0)

# convert list into a numpy array and reshape it to have one column
# inverse transform to convert the normalized predictions back to the original scale of order volumes
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 270ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


In [9]:
# create dataframe to display the predicted order volumes and their corresponding dates

predicted_volumes = pd.DataFrame({
    'date': pd.date_range(start=data['date'].iloc[-1] + pd.Timedelta(days=1), periods=future_steps),
    'predicted_order_volume': predictions.flatten()
})

print(predicted_volumes)

# predicted_volumes dataFrame shows the forecasted order volumes for a future period beyond the original dataset

        date  predicted_order_volume
0 2020-12-31               19.884140
1 2021-01-01               19.905342
2 2021-01-02               19.907501
3 2021-01-03               19.890549
4 2021-01-04               19.863300
5 2021-01-05               19.832382
6 2021-01-06               19.799273
7 2021-01-07               19.764429
8 2021-01-08               19.727570
9 2021-01-09               19.694050


In [10]:
# prioritize picking sequences based on urgency

# sort dataframe by predicted order volumes in descending order: higher predicted volumes indicate higher urgency
urgent_orders = predicted_volumes.sort_values(by='predicted_order_volume', ascending=False)

print(urgent_orders)
# outputs sorted list of predicted order volumes for future dates, with higher predicted volumes indicating higher urgency
# so that we can prioritize orders w higher urgency 

        date  predicted_order_volume
2 2021-01-02               19.907501
1 2021-01-01               19.905342
3 2021-01-03               19.890549
0 2020-12-31               19.884140
4 2021-01-04               19.863300
5 2021-01-05               19.832382
6 2021-01-06               19.799273
7 2021-01-07               19.764429
8 2021-01-08               19.727570
9 2021-01-09               19.694050
