In [12]:
# Transaction Volume Forecasting - Model Training
# Train Prophet model on daily transaction volumes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet
from google.cloud import storage

# Configuration
PROJECT_ID = "transaction-forecast-mlops"
BUCKET_NAME = "transaction-forecast-data"

# Load preprocessed data
print("Loading preprocessed data...")
df = pd.read_csv(f'gs://{BUCKET_NAME}/processed_data/daily_volumes_clean.csv')
df['date'] = pd.to_datetime(df['date'])

print(f"✓ Loaded {len(df)} days of data")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"\nData preview:")
print(df.head())

Loading preprocessed data...
✓ Loaded 610 days of data
Date range: 2016-09-04 00:00:00 to 2018-08-22 00:00:00

Data preview:
        date  transaction_volume
0 2016-09-04                   1
1 2016-09-05                   1
2 2016-09-13                   1
3 2016-09-15                   1
4 2016-10-02                   1


In [13]:
# Split data into train/test sets
# For time series: use last 14 days as test (forecast horizon)
train_size = len(df) - 14
train_df = df[:train_size].copy()
test_df = df[train_size:].copy()

print(f"Training set: {len(train_df)} days ({train_df['date'].min()} to {train_df['date'].max()})")
print(f"Test set: {len(test_df)} days ({test_df['date'].min()} to {test_df['date'].max()})")

# Prophet requires specific column names: 'ds' (datestamp) and 'y' (value)
train_prophet = train_df.rename(columns={'date': 'ds', 'transaction_volume': 'y'})
test_prophet = test_df.rename(columns={'date': 'ds', 'transaction_volume': 'y'})

print("\n✓ Data prepared for Prophet")
print("\nTraining data:")
print(train_prophet.tail())

Training set: 596 days (2016-09-04 00:00:00 to 2018-08-08 00:00:00)
Test set: 14 days (2018-08-09 00:00:00 to 2018-08-22 00:00:00)

✓ Data prepared for Prophet

Training data:
            ds    y
591 2018-08-04  245
592 2018-08-05  276
593 2018-08-06  372
594 2018-08-07  370
595 2018-08-08  316


In [14]:
# Initialize and train Prophet model
print("Training Prophet model...")

# Create model with basic settings
model = Prophet(
    daily_seasonality=True,     # Capture daily patterns
    weekly_seasonality=True,     # Capture weekly patterns (weekday vs weekend)
    yearly_seasonality=True,     # Capture yearly patterns
    seasonality_mode='multiplicative'  # Seasonality scales with trend
)

# Fit model on training data
model.fit(train_prophet)

print("✓ Model trained successfully!")
print("\nModel components:")
print("- Trend: Overall growth pattern")
print("- Weekly seasonality: Day-of-week effects")
print("- Yearly seasonality: Annual patterns")

Training Prophet model...


21:46:49 - cmdstanpy - INFO - Chain [1] start processing
21:46:49 - cmdstanpy - INFO - Chain [1] done processing


✓ Model trained successfully!

Model components:
- Trend: Overall growth pattern
- Weekly seasonality: Day-of-week effects
- Yearly seasonality: Annual patterns


In [15]:
# Create future dataframe for predictions (14 days)
future = model.make_future_dataframe(periods=14, freq='D')

print(f"Predicting for {len(future)} total dates (train + test)")
print(f"Last training date: {train_prophet['ds'].max()}")
print(f"Forecast through: {future['ds'].max()}")

# Make predictions
forecast = model.predict(future)

print("\n✓ Predictions generated")
print("\nForecast columns:", forecast.columns.tolist()[:10], "...")
print("\nLast 14 days (test set predictions):")
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(14))

Predicting for 610 total dates (train + test)
Last training date: 2018-08-08 00:00:00
Forecast through: 2018-08-22 00:00:00

✓ Predictions generated

Forecast columns: ['ds', 'trend', 'yhat_lower', 'yhat_upper', 'trend_lower', 'trend_upper', 'daily', 'daily_lower', 'daily_upper', 'multiplicative_terms'] ...

Last 14 days (test set predictions):
            ds        yhat  yhat_lower  yhat_upper
596 2018-08-09  282.545031  223.593088  341.520577
597 2018-08-10  268.140678  206.768756  330.366365
598 2018-08-11  204.249775  143.506901  258.341567
599 2018-08-12  220.927713  158.409521  275.169757
600 2018-08-13  295.823398  235.056751  359.043357
601 2018-08-14  285.528825  225.451310  347.172397
602 2018-08-15  276.448784  218.598752  335.320068
603 2018-08-16  255.327978  196.028128  317.086705
604 2018-08-17  241.105799  177.145108  305.293156
605 2018-08-18  177.627318  123.969613  236.471107
606 2018-08-19  196.005343  136.612238  253.930256
607 2018-08-20  273.645731  214.431176  3

In [16]:
# Save trained model to Cloud Storage
import pickle
import os

# Save model locally first
model_path = '/tmp/prophet_model.pkl'
with open(model_path, 'wb') as f:
    pickle.dump(model, f)

# Upload to GCS
client = storage.Client()
bucket = client.bucket(BUCKET_NAME)
blob = bucket.blob('models/prophet_baseline.pkl')
blob.upload_from_filename(model_path)

print(f"✓ Model saved to gs://{BUCKET_NAME}/models/prophet_baseline.pkl")
print("\nModel ready for evaluation!")

✓ Model saved to gs://transaction-forecast-data/models/prophet_baseline.pkl

Model ready for evaluation!
