In [None]:
# Importing the libraries
import time

# Logging the start time of code execution
start_time = time.time()

import keras
import scikitplot as skplt

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

from sklearn.metrics import plot_confusion_matrix

# Suppressing warnings
from warnings import simplefilter
simplefilter(action = 'ignore', category = FutureWarning)

# Configuring the style of the plots with Seaborn
sns.set_style('dark')

In [None]:
# Importing the training datasets
inputs_training_df = pd.read_csv('../data/refined/inputs_training_df.csv')
outputs_training_df = pd.read_csv('../data/refined/outputs_training_df.csv')

# Importing the test datasets
inputs_test_df = pd.read_csv('../data/refined/inputs_test_df.csv')
outputs_test_df = pd.read_csv('../data/refined/outputs_test_df.csv')

In [None]:
# Converting the training dataframe into an array
input_training_array = np.array(inputs_training_df)
output_training_array = np.array(outputs_training_df)

# Converting the test dataframe into an array
input_test_array = np.array(inputs_test_df)
output_test_array = np.array(outputs_test_df)

In [None]:
# Using under-sampling only to separate "Não Grave" and "Grave" accidents at opposite ends...
# which will facilitate the interpretation of only "Grave" accidents
rus = RandomUnderSampler(sampling_strategy = 'not minority')
x_res_test, y_res_test = rus.fit_resample(input_test_array, output_test_array)

# To see the balance and distribution of the classes
print(pd.Series(y_res_test).value_counts())

# Plotting the new class distribution
sns.countplot(y_res_test);

In [None]:
# Changing the dimension of the arrays to fit the requirements of LSTM
# Training
input_training_array = input_training_array.reshape(-1, 1, len(inputs_training_df.columns))
output_training_array = output_training_array.reshape(-1, 1, 1)

# Test
input_test_array = input_test_array.reshape(-1, 1, len(inputs_test_df.columns))

# Interpretation
x_res_test = x_res_test.reshape(-1, 1, len(inputs_test_df.columns))
y_res_test = y_res_test.reshape(-1, 1, 1)

In [None]:
# Building the neural network
model = Sequential()

model.add(LSTM(80, return_sequences = True, kernel_initializer = 'random_normal', input_shape = (1, len(inputs_training_df.columns))))
model.add(LSTM(80, return_sequences = False, kernel_initializer = 'random_normal'))
model.add(Dense(1, activation = 'sigmoid', kernel_initializer = 'random_normal'))

model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = 'accuracy')

In [None]:
# Training the neural network
r = model.fit(input_training_array, output_training_array, batch_size = 250, epochs = 5)

In [None]:
# Plotting the loss graph
plt.plot(r.history['loss'], label = 'loss')

plt.legend()

In [None]:
# Running the network with test data
y_pred = model.predict(input_test_array)

# Assigning True to values greater than 0.5 and False to values less than or equal to 0.5
y_pred = (y_pred > 0.5)

In [None]:
# Plotting the confusion matrix
# VN, FN
# FP, VP
# 0 == "Não Grave" / 1 == "Grave"

labels = ['Não Grave', 'Grave']

skplt.metrics.plot_confusion_matrix(output_test_array, y_pred)

plt.xticks(ticks = [0, 1], labels = labels, size = 12)
plt.yticks(ticks = [0, 1], labels = labels, size = 12)

plt.ylabel('True label', size = 13, labelpad = 15)
plt.xlabel('Predicted label', size = 13, labelpad = 15)

In [None]:
# Logging the end time of code execution
end_time = time.time()

# Calculating the runtime
execution_time = end_time - start_time

# Displaying the runtime in seconds
print("Execution time:", execution_time, "seconds.")