In [1]:
#Yi-Lin Lou #301226659
import pandas as pd
import tensorflow as tf
import numpy as np

data_path = './dataset/Metro_Interstate_Traffic_Volume.csv'
df = pd.read_csv(data_path)
df.head(5)


Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,scattered clouds,2012-10-02 09:00:00,5545
1,,289.36,0.0,0.0,75,Clouds,broken clouds,2012-10-02 10:00:00,4516
2,,289.58,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 11:00:00,4767
3,,290.13,0.0,0.0,90,Clouds,overcast clouds,2012-10-02 12:00:00,5026
4,,291.14,0.0,0.0,75,Clouds,broken clouds,2012-10-02 13:00:00,4918


In [2]:
# Convert 'date_time' to datetime format
df['date_time'] = pd.to_datetime(df['date_time'])

# One-hot encode categorical variables
df = pd.get_dummies(df, columns=['holiday', 'weather_main', 'weather_description'], drop_first=True)

# Normalize the features using MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

# Avoid scaling the 'date_time' and target variable 'traffic_volume'
features_to_scale = [col for col in df.columns if col not in ['date_time', 'traffic_volume']]
df[features_to_scale] = scaler.fit_transform(df[features_to_scale])


In [3]:
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(n_steps, len(data)):
        X.append(data.iloc[i-n_steps:i].drop(['date_time', 'traffic_volume'], axis=1).values)
        y.append(data.iloc[i]['traffic_volume'])
    return np.array(X), np.array(y)

n_steps = 24  # Number of timesteps as 24 hours
X, y = create_sequences(df, n_steps)


In [4]:
# Assuming 'df' is already sorted by 'date_time'
cutoff = pd.to_datetime('2017-01-01')  # Adjust based on your dataset's date range
train_val_df = df[df['date_time'] < cutoff]
test_df = df[df['date_time'] >= cutoff]

# Generate sequences for training/validation and test datasets
X_train_val, y_train_val = create_sequences(train_val_df, n_steps)
X_test, y_test = create_sequences(test_df, n_steps)

# Further split the training and validation set (80-20 split)
split_idx = int(len(X_train_val) * 0.8)
X_train, X_val = X_train_val[:split_idx], X_train_val[split_idx:]
y_train, y_val = y_train_val[:split_idx], y_train_val[split_idx:]


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

model = Sequential([
    LSTM(256, activation='tanh', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True),
    Dropout(0.2),
    LSTM(128, activation='tanh', return_sequences=False),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])


  super().__init__(**kwargs)


In [6]:
history = model.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_val, y_val), verbose=1)


Epoch 1/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 50ms/step - loss: 13624497.0000 - mae: 3092.9875 - val_loss: 6571949.5000 - val_mae: 2134.8171
Epoch 2/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 52ms/step - loss: 6316391.0000 - mae: 2113.3811 - val_loss: 3526949.2500 - val_mae: 1655.2970
Epoch 3/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 57ms/step - loss: 4183377.0000 - mae: 1796.1133 - val_loss: 3551904.0000 - val_mae: 1652.7096
Epoch 4/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 55ms/step - loss: 4159231.0000 - mae: 1789.8094 - val_loss: 3553002.7500 - val_mae: 1652.7815
Epoch 5/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 51ms/step - loss: 4159961.5000 - mae: 1786.1377 - val_loss: 3541701.2500 - val_mae: 1652.1122
Epoch 6/30
[1m371/371[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 50ms/step - loss: 4199317.0000 - mae: 1795.0481 - val_loss: 3

In [7]:
test_loss, test_mae = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")


[1m580/580[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - loss: 3911924.7500 - mae: 1737.5234
Test Loss: 3930416.0, Test MAE: 1739.697509765625
