Forecast model for Walmart's top 10 best-selling products using an LSTM based on detailed specifications for cross-validation, train-test splits, and sequence handling

In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit

2024-05-14 11:12:51.858722: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load data
df = pd.read_csv('../raw_data/cleaned_merge_df_top10.csv')
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

In [3]:
# Prepare data for LSTM
scaler = MinMaxScaler(feature_range=(0, 1))
df['scaled_sales'] = scaler.fit_transform(df[['sales']])

def create_sequences(data, input_length, output_length):
    X, y = [], []
    for i in range(len(data) - input_length - output_length + 1):
        X.append(data[i:(i + input_length)])
        y.append(data[(i + input_length):(i + input_length + output_length)])
    return np.array(X), np.array(y)

input_length = 200
output_length = 28
n_features = 1  # since we are only using sales as feature
n_splits = 10  # Number of folds


In [4]:
# Time Series Cross-validation
tscv = TimeSeriesSplit(n_splits=n_splits)

# Define the LSTM model outside the loop
model = Sequential([LSTM(50, activation='relu'), Dense(output_length)])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])


2024-05-14 11:12:54.039113: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-05-14 11:12:54.039653: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
# Training and evaluation
fold_results = []

for train_index, test_index in tscv.split(df):
    train, test = df.iloc[train_index], df.iloc[test_index]

    # Create sequences
    X_train, y_train = create_sequences(train['scaled_sales'].values, input_length, output_length)
    X_test, y_test = create_sequences(test['scaled_sales'].values, input_length, output_length)

    # Reshape for LSTM input
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))

    # Fit the model
    model.fit(X_train, y_train, epochs=10, batch_size=16, verbose=0)

    # Evaluate the model
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)
    y_test = scaler.inverse_transform(y_test)

    mae = mean_absolute_error(y_test, predictions)
    fold_results.append(mae)

[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step


In [7]:
print(fold_results)

[24.300954867704963, 17.80040276465064, 20.875092110706454, 17.507685447234213, 20.81703526627847, 12.060208551523202, 12.464558223213617, 10.620830240844686, 9.662697408328611, 9.394611731343602]


In [6]:
# Report results
print("MAE per fold:", fold_results)
print("Average MAE:", np.mean(fold_results))

# Forecast next 28 days for the last sequence of the last fold
last_sequence = df['scaled_sales'].values[-input_length:]
last_sequence = last_sequence.reshape((1, input_length, n_features))
future_sales = model.predict(last_sequence)
future_sales = scaler.inverse_transform(future_sales)
print("Forecast for the next 28 days:", future_sales.flatten())

MAE per fold: [24.300954867704963, 17.80040276465064, 20.875092110706454, 17.507685447234213, 20.81703526627847, 12.060208551523202, 12.464558223213617, 10.620830240844686, 9.662697408328611, 9.394611731343602]
Average MAE: 15.550407661182845
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Forecast for the next 28 days: [19.929655   17.335506   43.516296   39.287666   31.189104   19.33259
 32.195198   17.676855   17.938084    2.0670536  14.995823   17.32747
 34.288174   33.34412    24.17708    12.287469   19.785622   13.292426
 10.820728    0.64970976 16.070171   17.874363   36.542915   34.42241
 23.064554   16.216448   25.215542   12.286285  ]
