<a href="https://colab.research.google.com/github/claudiosegala/Monografia/blob/master/code/tcc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Dependencies

In this phase we have to download all the dependencies that our code will need

In [0]:
!pip install tensorflow
!pip install pandas
!pip install matplotlib
!pip install numpy
!pip install sklearn
!pip install keras



# Define headers

In this phase we have to declare all the libraries that we will use.

In [0]:
import tensorflow as tf # machine learning library
import pandas as pd # data manipulation library
import matplotlib.pyplot as plt # plot library
import numpy as np # math library
import datetime as dt # to discover week day
import time as tm # to convert to seconds
import sklearn as skl # regression templates library

from sklearn.metrics import mean_absolute_error, mean_squared_error, precision_score, accuracy_score, max_error
from keras.models import Sequential
from keras.layers import LSTM, GRU, SimpleRNN, Dense

# Mount Drive

Connect to Google Drive of 'alfredcoinworth'

In [1]:
import google as g # To connect with google drive
g.colab.drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


# Configure Hyperparameters

In [0]:
FLOW_INTERVAL = 300 # the interval size for each flow

N_STEPS = 20 # the number of flows to see in the past

N_FUTURE = 12 # how much in the future we want to predict (0 = predict the flow on the next 5 minutes)

N_FEATURES = 1

# Data Retrieval & Transformation

In this phase we have to get the data stored in Google Drive and remove the columns that we won't need. Also, convert some of them to other types.


In [0]:
def prepare_data (data):
  """ Prepare the data
  
  This will fix types of the dataframe to use time as seconds instead of string,
  use week day instead of date as string, use speed as float instead of string. 
  Also, will drop columns that are not necessary.
  """
  
  data = data.drop(columns=['Unnamed: 0', 'Sensor', 'Max Speed', 'Size', 'Lane'])
  
  data['Time'] = data['Time'].apply(lambda x : tm.strptime(x, '%H:%M:%S'))
  data['Time'] = data['Time'].apply(lambda x : dt.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds())
  data['Time'] = data['Time'].apply(lambda x : int(x))

  data['Date'] = pd.to_datetime(data['Date'], format='%Y/%m/%d')
  
  data['WeekDay'] = data['Date'].apply(lambda x : x.weekday())

  data['Speed'].apply(lambda x : float(x))
  
  return data



# Get data from Google Drive
data = pd.read_csv('/content/drive/My Drive/TCC/chunks/chunk_00.csv', sep=',')
data = prepare_data(data)


FileNotFoundError: ignored

# Get Flow

This will transform the time series of register cars that passed in a array of flow per 5 minute.

In [0]:
def get_flow (data, interval):
  """ Extract flow from data
  
  This will transform the time series of register cars that passed in a array of
  flow per 'timeInterval' seconds.
  """
  
  date = np.asarray(data['Date'])
  time = np.asarray(data['Time'])
  speed = np.asarray(data['Speed'])
  
  sz = len(speed)
  dateControl = date[0] #seta o controle de data com o primeiro dia do chunk
  timeBlock = interval
  countFlow = 0
  flow_data = []

  for i in range(sz):
    if time[i] >= timeBlock: # init a new time block
      flow_data.append((date[i], timeBlock, countFlow)) 
      timeBlock += interval
      countFlow = 0 # TODO: verify if this is correct
      
    if date[i] > dateControl: # reset on day change
      dateControl = date[i]
      timeBlock = interval 
      countFlow = 0
      
    if time[i] < timeBlock: # add car on flow
      countFlow += 1

  #df_flow = pd.DataFrame(flow_data, columns = ["Day", "TimeBlock", "Flow"])
  
  return [f for d, t, f in flow_data ]


raw_seq = get_flow(data, FLOW_INTERVAL)

# Plot Data

In [0]:
plt.figure(figsize=(80, 10))
plt.plot(raw_seq)

# Prepare for dataset for training

+ Adjust the dataset
+ Split the dataset
+ Create storage for the results

In [0]:
def split_sequence(sequence, n_steps, n_future):
  """ Split a univariate sequence into samples
  
  This function will split a sequence into many samples in the form of two
  arrays. The first array will have as elements arrays of size n_step and the 
  second array will have as elements a integer. 
  Example:
  
  split_sequence([1, 2, 3, 4, 5], 3) #=> ([[1, 2, 3], [2, 3, 4]], [4, 5])
  """
  
  n = len(sequence)
  X, Y = list(), list()
  
  for i in range(n):
    # find the end of this pattern
    end_ix = i + n_steps

    # check if we are beyond the sequence
    if end_ix + n_future > n-1:
      break

    # gather input and output parts of the pattern
    seq_x, seq_y = sequence[i:end_ix], sequence[end_ix + n_future]
    X.append(seq_x)
    Y.append(seq_y)

  return np.array(X), np.array(Y)


def reshape_flow (raw_seq, n_steps, n_future, n_features):  
  # define what is test and what is training
  training_side = int(len(raw_seq) * 0.8)
  
  # split into samples
  X_train, Y_train = split_sequence(raw_seq[:training_side], n_steps, n_future)
  X_test, Y_test = split_sequence(raw_seq[training_side:], n_steps, n_future)
  
  # reshape from [samples, timesteps] into [samples, timesteps, features]
  X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features))
  X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], n_features))
  
  return X_train, Y_train, X_test, Y_test

res = {}
X_train, Y_train, X_test, Y_test = reshape_flow(raw_seq, N_STEPS, N_FUTURE, N_FEATURES)

# Train GRU

In [0]:
def gru (X_train, Y_train, X_test, Y_test, n_steps, n_features): 
  # define model
  model = Sequential()
  model.add(GRU(50, activation='relu', input_shape=(n_steps, n_features)))
  model.add(Dense(1))
  
  # compile model
  model.compile(optimizer='adam', loss='mse', metrics = ["accuracy"])
  
  # fit model
  model.fit(X_train, Y_train, batch_size=64, epochs=50, verbose=0) # verbose = 2
  
  return model.predict(X_test, verbose=0) # verbose = 2

res["gru"] = gru(X_train, Y_train, X_test, Y_test, N_STEPS, N_FEATURES)

# Train LSTM



In [0]:
def lstm (X_train, Y_train, X_test, Y_test, n_steps, n_features): 
  # define model
  model = Sequential()
  model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
  model.add(Dense(1))
  
  # compile model
  model.compile(optimizer='adam', loss='mse', metrics = ["accuracy"])
  
  # fit model
  model.fit(X_train, Y_train, batch_size=64, epochs=50, verbose=0) # verbose = 2
  
  return model.predict(X_test, verbose=0) # verbose = 2

res["lstm"] = lstm(X_train, Y_train, X_test, Y_test, N_STEPS, N_FEATURES)

# Train RNN

In [0]:
def rnn (X_train, Y_train, X_test, Y_test, n_steps, n_features): 
  # define model
  model = Sequential()
  model.add(SimpleRNN(50, activation='relu', input_shape=(n_steps, n_features)))
  model.add(Dense(1))
  
  # compile model
  model.compile(optimizer='adam', loss='mse', metrics = ["accuracy"])
  
  # fit model
  model.fit(X_train, Y_train, batch_size=64, epochs=50, verbose=0) # verbose = 2
  
  return model.predict(X_test, verbose=0) # verbose = 2

res["rnn"] = rnn(X_train, Y_train, X_test, Y_test, N_STEPS, N_FEATURES)

# Test & Validate

In [0]:
def print_metrics (Y_hat, Y_test):
  mae = mean_absolute_error(Y_test, Y_hat)
  mse = mean_squared_error(Y_test, Y_hat)
  me = max_error(Y_test, Y_hat)
  
  print(f"MAE: {mae}")
  print(f"MSE: {mse}")
  print(f"RMSE: {np.sqrt(mse)}")
  print(f"Max Error: {me}")
  
  
print("--- GRU ---")
print_metrics(res["gru"].round().flatten().tolist(), Y_test.tolist())

print("--- LSTM ---")
print_metrics(res["lstm"].round().flatten().tolist(), Y_test.tolist())

print("--- RNN ---")
print_metrics(res["rnn"].round().flatten().tolist(), Y_test.tolist())