In [5]:
!pip install tensorflow --quiet

In [9]:
# LSTM Neural Network for Transaction Volume Forecasting
# Deep learning approach using sequential patterns

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from google.cloud import storage
import pickle

# Configuration
PROJECT_ID = "transaction-forecast-mlops"
BUCKET_NAME = "transaction-forecast-data"

print("="*60)
print("LSTM NEURAL NETWORK TRAINING")
print("="*60)
print(f"TensorFlow version: {tf.__version__}")

# Load enriched data
print("\nLoading enriched dataset...")
df = pd.read_csv(f'gs://{BUCKET_NAME}/processed_data/daily_volumes_enriched.csv')
df['date'] = pd.to_datetime(df['date'])

print(f"✓ Loaded {len(df)} days with {len(df.columns)} features")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")

LSTM NEURAL NETWORK TRAINING
TensorFlow version: 2.20.0

Loading enriched dataset...
✓ Loaded 610 days with 28 features
Date range: 2016-09-04 00:00:00 to 2018-08-22 00:00:00


In [11]:
# LSTM with TOP features (multivariate approach)
print("\nPreparing multivariate sequential data for LSTM...")

# Use top features from XGBoost importance
selected_features = [
    'transaction_volume',  # Target (must include)
    'rolling_max_7',       # 48% importance
    'rolling_mean_7',      # 16.7%
    'rolling_min_7',       # 13.9%
    'momentum_7',          # 7.9%
    'lag_1',               # 6%
    'day_of_week',         # 1.7%
    'is_weekend'           # Helpful temporal
]

data = df[selected_features].values
print(f"✓ Using {len(selected_features)} features: {selected_features}")

# Scale data to 0-1 range
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

print(f"✓ Data scaled to range: [{data_scaled.min():.2f}, {data_scaled.max():.2f}]")

# Create sequences: use last 14 days to predict next day
lookback = 14

X, y = [], []
for i in range(lookback, len(data_scaled)):
    X.append(data_scaled[i-lookback:i, :])   # Past 14 days, ALL features
    y.append(data_scaled[i, 0])              # Next day volume (first column)

X, y = np.array(X), np.array(y)

print(f"✓ Created {len(X)} sequences with lookback={lookback} days")
print(f"  Input shape: {X.shape} (samples, timesteps, features)")
print(f"  Output shape: {y.shape}")

# Train/test split (last 14 days as test)
train_size = len(X) - 14
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

print(f"\n✓ Train/test split:")
print(f"  Training sequences: {len(X_train)}")
print(f"  Testing sequences: {len(X_test)}")
print(f"  X_train shape: {X_train.shape}")
print(f"  X_test shape: {X_test.shape}")


Preparing multivariate sequential data for LSTM...
✓ Using 8 features: ['transaction_volume', 'rolling_max_7', 'rolling_mean_7', 'rolling_min_7', 'momentum_7', 'lag_1', 'day_of_week', 'is_weekend']
✓ Data scaled to range: [0.00, 1.00]
✓ Created 596 sequences with lookback=14 days
  Input shape: (596, 14, 8) (samples, timesteps, features)
  Output shape: (596,)

✓ Train/test split:
  Training sequences: 582
  Testing sequences: 14
  X_train shape: (582, 14, 8)
  X_test shape: (14, 14, 8)


In [13]:
# Build LSTM model (updated syntax)
print("\n" + "="*60)
print("BUILDING LSTM MODEL")
print("="*60)

from tensorflow.keras.layers import Input

model = Sequential([
    # Input layer (explicit)
    Input(shape=(lookback, len(selected_features))),
    
    # First LSTM layer
    LSTM(50, activation='relu', return_sequences=True),
    Dropout(0.2),
    
    # Second LSTM layer
    LSTM(50, activation='relu'),
    Dropout(0.2),
    
    # Output layer
    Dense(1)
])

# Compile model
model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mae']
)

print("✓ Model architecture:")
model.summary()


BUILDING LSTM MODEL
✓ Model architecture:


In [14]:
# Train LSTM model
print("\n" + "="*60)
print("TRAINING LSTM MODEL")
print("="*60)

# Early stopping to prevent overfitting
early_stop = EarlyStopping(
    monitor='loss',
    patience=10,
    restore_best_weights=True
)

print("Training in progress...")
print("(This may take 2-3 minutes on CPU)\n")

# Train model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

print(f"\n✓ Training complete!")
print(f"  Stopped at epoch: {len(history.history['loss'])}")
print(f"  Final training loss: {history.history['loss'][-1]:.6f}")
print(f"  Final validation loss: {history.history['val_loss'][-1]:.6f}")


TRAINING LSTM MODEL
Training in progress...
(This may take 2-3 minutes on CPU)

Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 74ms/step - loss: 0.0108 - mae: 0.0754 - val_loss: 0.0037 - val_mae: 0.0495
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0045 - mae: 0.0433 - val_loss: 0.0035 - val_mae: 0.0487
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0034 - mae: 0.0350 - val_loss: 0.0031 - val_mae: 0.0468
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0031 - mae: 0.0325 - val_loss: 0.0032 - val_mae: 0.0470
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0031 - mae: 0.0310 - val_loss: 0.0034 - val_mae: 0.0479
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0031 - mae: 0.0307 - val_loss: 0.0029 - val_mae: 0.0450
Epoch 7

In [15]:
# Evaluate LSTM model
print("\n" + "="*60)
print("LSTM MODEL EVALUATION")
print("="*60)

# Make predictions
y_pred_scaled = model.predict(X_test, verbose=0)

# Inverse transform to get actual values
# Need to create array with same shape as original scaler input
y_pred_full = np.zeros((len(y_pred_scaled), len(selected_features)))
y_pred_full[:, 0] = y_pred_scaled.flatten()
y_pred_inv = scaler.inverse_transform(y_pred_full)[:, 0]

y_test_full = np.zeros((len(y_test), len(selected_features)))
y_test_full[:, 0] = y_test
y_test_inv = scaler.inverse_transform(y_test_full)[:, 0]

# Calculate metrics
mae = mean_absolute_error(y_test_inv, y_pred_inv)
rmse = np.sqrt(mean_squared_error(y_test_inv, y_pred_inv))
mape = mean_absolute_percentage_error(y_test_inv, y_pred_inv) * 100

# Get test dates
test_dates = df['date'].iloc[-14:].reset_index(drop=True)

# Create comparison dataframe
results = pd.DataFrame({
    'date': test_dates,
    'actual': y_test_inv,
    'predicted': y_pred_inv
})
results['error'] = results['actual'] - results['predicted']

print(f"MAE:  {mae:.2f} transactions/day")
print(f"RMSE: {rmse:.2f} transactions/day")
print(f"MAPE: {mape:.2f}%")

print(f"\n{'='*60}")
print("PREDICTIONS VS ACTUALS")
print(f"{'='*60}")
print(results)

print(f"\n{'='*60}")
print("MODEL COMPARISON")
print(f"{'='*60}")
print(f"Prophet:  9.89% MAPE")
print(f"XGBoost:  6.41% MAPE")
print(f"LSTM:     {mape:.2f}% MAPE")


LSTM MODEL EVALUATION
MAE:  30.53 transactions/day
RMSE: 35.47 transactions/day
MAPE: 12.26%

PREDICTIONS VS ACTUALS
         date  actual   predicted      error
0  2018-08-09   289.0  272.587875  16.412125
1  2018-08-10   256.0  256.620701  -0.620701
2  2018-08-11   188.0  204.815928 -16.815928
3  2018-08-12   197.0  231.437228 -34.437228
4  2018-08-13   292.0  261.972110  30.027890
5  2018-08-14   316.0  259.098765  56.901235
6  2018-08-15   288.0  254.323013  33.676987
7  2018-08-16   320.0  248.108346  71.891654
8  2018-08-17   257.0  223.443311  33.556689
9  2018-08-18   198.0  165.390448  32.609552
10 2018-08-19   204.0  190.823632  13.176368
11 2018-08-20   256.0  226.764725  29.235275
12 2018-08-21   243.0  230.410981  12.589019
13 2018-08-22   187.0  232.404786 -45.404786

MODEL COMPARISON
Prophet:  9.89% MAPE
XGBoost:  6.41% MAPE
LSTM:     12.26% MAPE


In [16]:
# Save LSTM model
print("\n" + "="*60)
print("SAVING LSTM MODEL")
print("="*60)

# Save model
model.save('/tmp/lstm_model.keras')

# Save scaler (needed for inference)
with open('/tmp/lstm_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Save feature list
with open('/tmp/lstm_features.pkl', 'wb') as f:
    pickle.dump(selected_features, f)

# Upload to GCS
client = storage.Client()
bucket = client.bucket(BUCKET_NAME)

bucket.blob('models/lstm_model.keras').upload_from_filename('/tmp/lstm_model.keras')
bucket.blob('models/lstm_scaler.pkl').upload_from_filename('/tmp/lstm_scaler.pkl')
bucket.blob('models/lstm_features.pkl').upload_from_filename('/tmp/lstm_features.pkl')

print(f"✓ Model saved to gs://{BUCKET_NAME}/models/lstm_model.keras")
print(f"✓ Scaler saved to gs://{BUCKET_NAME}/models/lstm_scaler.pkl")
print(f"✓ Features saved to gs://{BUCKET_NAME}/models/lstm_features.pkl")

print(f"\n{'='*60}")
print("LSTM TRAINING COMPLETE")
print(f"{'='*60}")
print(f"Performance: {mape:.2f}% MAPE")
print(f"Note: XGBoost (6.41%) outperformed LSTM due to limited data size")
print(f"      and effectiveness of engineered features for this problem.")


SAVING LSTM MODEL
✓ Model saved to gs://transaction-forecast-data/models/lstm_model.keras
✓ Scaler saved to gs://transaction-forecast-data/models/lstm_scaler.pkl
✓ Features saved to gs://transaction-forecast-data/models/lstm_features.pkl

LSTM TRAINING COMPLETE
Performance: 12.26% MAPE
Note: XGBoost (6.41%) outperformed LSTM due to limited data size
      and effectiveness of engineered features for this problem.
