In [1]:
import tensorflow as tf

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Loading the data

In [2]:
# Load the data 
df = pd.read_csv("data/CTA_-_Ridership_-_Daily_Boarding_Totals.csv", parse_dates=["service_date"])

# Shorten the column names
df.columns = ["date", "day_type", "bus", "rail", "total"]

# Arrange the rows in chronological order and set the date as index
df = df.sort_values("date").set_index("date")

# Drop the total column because it is only bus + rail
df = df.drop("total", axis=1)

# Drop the duplicates
df = df.drop_duplicates()

df.tail()

Unnamed: 0_level_0,day_type,bus,rail
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-10-27,W,528826,395129
2022-10-28,W,501985,352842
2022-10-29,A,311437,254149
2022-10-30,U,217204,177961
2022-10-31,W,454862,318942


# Preparing the model for time series prediction in Keras

The goal now is to forecast tomorrow's ridership based on the data of the past 8 weeks. We first split the data into train, test, and validation `periods`. To ensure that the data is around the range [0-1] we divide the training, valid, and test data by 1,000,000.

In [3]:
rail_train = df["rail"]["2016-01":"2018-12"] / 1e6
rail_valid = df["rail"]["2019-01":"2019-05"] / 1e6
rail_test = df["rail"]["2019-06":] / 1e6

# We use timeseries_dataset_from_array() for training and validation 

In [4]:
seq_length = 56

train_ds = tf.keras.utils.timeseries_dataset_from_array(rail_train.to_numpy(),
                                                        targets=rail_train[seq_length:],
                                                        sequence_length=seq_length,
                                                        batch_size=32,
                                                        shuffle=True, # shuffle the training windows
                                                        seed=42)

valid_ds = tf.keras.utils.timeseries_dataset_from_array(rail_valid.to_numpy(),
                                                        targets=rail_valid[seq_length:],
                                                        sequence_length=seq_length,
                                                        batch_size=32)

test_ds = tf.keras.utils.timeseries_dataset_from_array(rail_test.to_numpy(),
                                                        targets=rail_test[seq_length:],
                                                        sequence_length=seq_length,
                                                        batch_size=32)

# Linear model first

In [5]:
tf.random.set_seed(42)
model = tf.keras.Sequential([
                            tf.keras.layers.Dense(1, input_shape=[seq_length])
                            ])

early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_mae", 
                                                    patience=50, 
                                                    restore_best_weights=True)

opt = tf.keras.optimizers.SGD(learning_rate=0.02, 
                                momentum=0.9)

model.compile(loss=tf.keras.losses.Huber(), 
              optimizer=opt, 
              metrics=["mae"])

history = model.fit(train_ds, 
                    validation_data=valid_ds, 
                    epochs=500,
                    callbacks=[early_stopping_cb])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500


Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500


Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500
Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500


Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500
Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500


In [6]:
model.evaluate(valid_ds)[1]



0.03765106201171875

The MAE is around 37,911 which is better than the naive forecast.

# We use a simple RNN model first

In [7]:
tf.random.set_seed(42) 

univar_model = tf.keras.Sequential([
                                    tf.keras.layers.SimpleRNN(32, input_shape=[None, 1]),
                                    tf.keras.layers.Dense(1) # no activation function by default
                                    ])

# Stops training when the metric does not improve after 50 epochs
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_mae", 
                                                    patience=50, 
                                                    restore_best_weights=True)

opt = tf.keras.optimizers.SGD(learning_rate=0.02, 
                                momentum=0.9)

univar_model.compile(loss=tf.keras.losses.Huber(), 
                      optimizer=opt, 
                      metrics=["mae"])

history = univar_model.fit(train_ds, 
                            validation_data=valid_ds, 
                            epochs=500,
                            callbacks=[early_stopping_cb])

print(univar_model.evaluate(valid_ds)[1] * 1e6)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500


Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
30145.313590765


We got an MAE of around 30,000. This is better than the SARIMA model!

# Deep RNN model

We stack multiple RNN layers:

![image.png](attachment:image.png)

In [8]:
tf.random.set_seed(42)  # extra code – ensures reproducibility
deep_model = tf.keras.Sequential([
                                    tf.keras.layers.SimpleRNN(32, return_sequences=True, input_shape=[None, 1]),
                                    tf.keras.layers.SimpleRNN(32, return_sequences=True),
                                    tf.keras.layers.SimpleRNN(32),
                                    tf.keras.layers.Dense(1)
                                ])

# Stops training when the metric does not improve after 50 epochs
early_stopping_cb = tf.keras.callbacks.EarlyStopping(monitor="val_mae", 
                                                    patience=50, 
                                                    restore_best_weights=True)

opt = tf.keras.optimizers.SGD(learning_rate=0.01, 
                                momentum=0.9)

deep_model.compile(loss=tf.keras.losses.Huber(), 
                      optimizer=opt, 
                      metrics=["mae"])

history = deep_model.fit(train_ds, 
                            validation_data=valid_ds, 
                            epochs=500,
                            callbacks=[early_stopping_cb])

print(deep_model.evaluate(valid_ds)[1] * 1e6)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500


Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500
Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
29998.857527971268


It does not beat our shallower model.