In [60]:
# Mounting Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [61]:
# Libraries
import pandas as pd
import numpy as np
from numpy import array
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from datetime import datetime, timedelta

In [62]:
df = pd.read_csv("/content/drive/MyDrive/First Year/Big Data Technologies/Traffic Project/db/2021-06-10.csv")

In [63]:
df.date = pd.to_datetime(df.date)

In [64]:
# Codify datetime or string columns to ordinal or categorical index
def codify(df,column_name):
  df[column_name]= pd.Categorical(df[column_name])
  return df[column_name].cat.codes

In [65]:
# New codified data
df_new = pd.DataFrame()
df_new["time"] = codify(df,"time")
df_new["date"] = codify(df,"date")
df_new["count"] = df["count"]
df_new["station"] = codify(df,"station")

In [66]:
# Train test split (taking the most recent data for testing)
train = df_new.iloc[:-200]
test= df_new.iloc[-200:]

In [67]:
# split a univariate sequence
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

In [68]:
# Example dataset: testing on one single station
es = df[df['station']=='Torricelli']

In [69]:
raw_seq = array(es[['count']])

In [70]:
raw_seq.shape

(297, 1)

In [71]:
# split into samples
X, y = split_sequence(raw_seq, n_steps)
# reshape from [samples, timesteps] into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1], n_features))

In [72]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(LSTM(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=200, verbose=0)

<keras.callbacks.History at 0x7fdbaa797510>

In [73]:
# demonstrate prediction
x_input = array([40,50,60])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)

[[94.507095]]
