In [94]:
# Imports and Libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import holidays
import plotly.express as px
import plotly.graph_objects as go

In [95]:
# Data and Preprocessing
# Load your time series data
data = pd.read_csv('00_MRT_2023_LSTM(Test Format).csv')

In [96]:
# Ensure 'Date' column is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Extract year, month, and day
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month
data['Day'] = data['Date'].dt.day

# Aggregate data by year
yearly_data = data.groupby('Year')['Entry'].sum().reset_index()

# Aggregate data by month
monthly_data = data.groupby(['Year', 'Month'])['Entry'].sum().reset_index()
monthly_data['Date'] = pd.to_datetime(monthly_data[['Year', 'Month']].assign(DAY=1)) + pd.offsets.MonthEnd(1)

# Aggregate data by day
daily_data = data.groupby(['Year', 'Month', 'Day'])['Entry'].sum().reset_index()
daily_data['Date'] = pd.to_datetime(daily_data[['Year', 'Month', 'Day']])

In [97]:
daily_data.head()


Unnamed: 0,Year,Month,Day,Entry,Date
0,2023,1,1,30264,2023-01-01
1,2023,1,2,37952,2023-01-02
2,2023,1,3,46134,2023-01-03
3,2023,1,4,48791,2023-01-04
4,2023,1,5,42915,2023-01-05


In [98]:
monthly_data.head(12)

Unnamed: 0,Year,Month,Entry,Date
0,2023,1,1466919,2023-01-31
1,2023,2,1406923,2023-02-28
2,2023,3,1525850,2023-03-31
3,2023,4,1241611,2023-04-30
4,2023,5,1520365,2023-05-31
5,2023,6,1494518,2023-06-30
6,2023,7,1561701,2023-07-31
7,2023,8,1659979,2023-08-31
8,2023,9,1630212,2023-09-30
9,2023,10,1618931,2023-10-31


In [99]:
# Add a column to determine if the date is a holiday in the Philippines
ph_holidays = holidays.PH()
data['Holiday'] = data['Date'].apply(lambda x: 1 if x in ph_holidays else 0)

In [100]:
data.head()

Unnamed: 0,Day,Date,Time,Entry,Year,Month,Holiday
0,1,2023-01-01,0:00,0,2023,1,1
1,1,2023-01-01,1:00,0,2023,1,1
2,1,2023-01-01,2:00,0,2023,1,1
3,1,2023-01-01,3:00,0,2023,1,1
4,1,2023-01-01,4:00,0,2023,1,1


In [101]:
# Extract the 'Entry' column for prediction
entry_data = data['Entry'].values

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(entry_data.reshape(-1, 1))

In [102]:
# Plot hourly data by year
fig_year = px.line(data, x='Date', y='Entry', color='Year',
                   title='Hourly Data by Year', labels={'color': 'Year'})
fig_year.update_xaxes(title_text='Date')
fig_year.update_yaxes(title_text='Entry')
fig_year.show()

In [103]:
# Plot the total sum of 'Entry' by month
fig_month = px.line(monthly_data, x='Date', y='Entry',
                    title='Total Sum of Entry by Month', labels={'Entry': 'Total Entry'})
fig_month.update_xaxes(title_text='Date')
fig_month.update_yaxes(title_text='Total Entry')
fig_month.show()

In [104]:
# Plot the total sum of 'Entry' by day
fig_day = px.line(daily_data, x='Date', y='Entry',
                  title='Total Sum of Entry by Day', labels={'Entry': 'Total Entry'})
fig_day.update_xaxes(title_text='Date')
fig_day.update_yaxes(title_text='Total Entry')
fig_day.show()

In [105]:
# Prepare the data for LSTM
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length])
        labels.append(data[i + seq_length])
    return np.array(sequences), np.array(labels)

seq_length = 50  # Example sequence length
X, y = create_sequences(scaled_data, seq_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data to fit the LSTM input requirements
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [106]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(LSTM(50))
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')


Epoch 1/20



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 45ms/step - loss: 0.0570 - val_loss: 0.0167
Epoch 2/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step - loss: 0.0162 - val_loss: 0.0088
Epoch 3/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - loss: 0.0098 - val_loss: 0.0081
Epoch 4/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - loss: 0.0083 - val_loss: 0.0054
Epoch 5/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 67ms/step - loss: 0.0063 - val_loss: 0.0051
Epoch 6/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 54ms/step - loss: 0.0048 - val_loss: 0.0033
Epoch 7/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 49ms/step - loss: 0.0045 - val_loss: 0.0032
Epoch 8/20
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 51ms/step - loss: 0.0038 - val_loss: 0.0026
Epoch 9/20
[1m175/175[0m [32m━━━━━━━━━

In [107]:
# Make predictions
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Clip the predictions to ensure non-negative values
predictions = np.clip(predictions, 0, None)

# Inverse transform the test data for comparison
y_test = scaler.inverse_transform(y_test)

# Calculate evaluation metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mae = mean_absolute_error(y_test, predictions)
r2 = r2_score(y_test, predictions)

print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R^2 Score: {r2}')


[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step
MSE: 83555.40120536636
RMSE: 289.0595115289693
MAE: 170.09740391120434
R^2 Score: 0.9658799055986536


In [108]:
# Extract dates for the test set
test_indices = data.index[len(X_train) + seq_length:len(X_train) + seq_length + len(X_test)]
test_dates = data['Date'].iloc[test_indices]

In [109]:
# Plot the results using Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=test_dates, y=y_test.flatten(), mode='lines', name='True Values'))
fig.add_trace(go.Scatter(x=test_dates, y=predictions.flatten(), mode='lines', name='Predictions'))

fig.update_layout(title='True Values vs Predictions',
                  xaxis_title='Date',
                  yaxis_title='Entry',
                  legend=dict(x=0, y=1))

fig.show()