In [None]:
# Script for running the pre-trained network

# Importing the libraries
import time
import shap

# Logging the start time of code execution
start_time = time.time()

import pickle
import scikitplot as skplt

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import plot_confusion_matrix
from imblearn.under_sampling import RandomUnderSampler

# Suppressing warnings
from warnings import simplefilter
simplefilter(action = 'ignore', category = FutureWarning)

# Configuring the style of the plots with Seaborn
sns.set_style('dark')
shap.initjs()

In [None]:
# Reading the pickle file
with open('../../data/networks_pkl/mlp_model.pkl', 'rb') as file:
    model = pickle.load(file)

In [None]:
# Importing the test datasets
inputs_test_df = pd.read_csv('../../data/refined/inputs_test_df.csv')
outputs_test_df = pd.read_csv('../../data/refined/outputs_test_df.csv')

In [None]:
# Converting the test dataframe into an array
input_test_array = np.array(inputs_test_df)
output_test_array = np.array(outputs_test_df)

In [None]:
# Using under-sampling only to separate "Não Grave" and "Grave" accidents at opposite ends...
# which will facilitate the interpretation of only "Grave" accidents
rus = RandomUnderSampler(sampling_strategy = 'not minority')
x_res_test, y_res_test = rus.fit_resample(input_test_array, output_test_array)

# To see the balance and distribution of the classes
print(pd.Series(y_res_test).value_counts())

# Plotting the new class distribution
sns.countplot(y_res_test);

In [None]:
# Running the network with test data
y_pred = model.predict(input_test_array)

# Assigning True to values greater than 0.5 and False to values less than or equal to 0.5
y_pred = (y_pred > 0.5)

In [None]:
# Plotting the confusion matrix
# VN, FN
# FP, VP
# 0 == "Não Grave" / 1 == "Grave"

labels = ['Não Grave', 'Grave']

skplt.metrics.plot_confusion_matrix(output_test_array, y_pred)

plt.xticks(ticks = [0, 1], labels = labels, size = 12)
plt.yticks(ticks = [0, 1], labels = labels, size = 12)

plt.ylabel('True label', size = 13, labelpad = 15)
plt.xlabel('Predicted label', size = 13, labelpad = 15)

In [None]:
# Getting the initial number from where the "Grave" accidents are located in the array.
start = int((len(y_res_test)) / 2)

# Getting the final number from the array
end = int(len(y_res_test))

In [None]:
# Get SHAP values
explainer_shap = shap.Explainer(model.predict, x_res_test, feature_names = inputs_test_df.columns)
shap_values = explainer_shap(x_res_test[start : end])

In [None]:
# Plotting the beeswarm plot for SHAP values
shap.plots.beeswarm(shap_values, max_display = 21, plot_size = (8, 8), order = shap_values.abs.max(0))

In [None]:
# Plotting the beeswarm plot for SHAP values
#shap.plots.beeswarm(shap_values, max_display = 88, plot_size = (30, 30), order = shap_values.abs.max(0))

In [None]:
# Logging the end time of code execution
end_time = time.time()

# Calculating the runtime
execution_time = end_time - start_time

# Displaying the runtime in seconds
print("Execution time:", execution_time, "seconds.")