In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils.class_weight import compute_class_weight
from matplotlib import pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers, callbacks
from keras.layers import Dense, Dropout, BatchNormalization

In [2]:
df = pd.read_csv('Modified_Preprocessed_Data_3_Mar_2021.csv', index_col = 0)
df.head(5)

Unnamed: 0_level_0,Latitude coordinate (degrees),Longitude coordinate (degrees),Altitude (meters),Course (degrees),Difcourse: course variation (degrees),Acceleration in X filtered by KF (Gs),Acceleration in Y filtered by KF (Gs),Acceleration in Z filtered by KF (Gs),Roll (degrees),Pitch (degrees),...,no speed limit,no lanes number,Type of road_motorway,Type of road_motorway_link,Type of road_primary,Type of road_primary_link,Type of road_tertiary_link,AGGRESSIVE,DROWSY,NORMAL
Speed (km/h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
64.5,40.512924,-3.404577,612.5,331.9,0.0,-0.03,-0.015,0.008,-1.535,0.029,...,0,0,0,0,0,0,0,0,0,1
63.6,40.513065,-3.40468,612.9,330.8,1.055,0.026,-0.012,-0.006,-1.523,0.022,...,0,0,0,0,0,0,0,0,0,1
62.2,40.51321,-3.404772,613.3,330.8,1.055,-0.021,-0.018,0.036,-1.518,0.035,...,0,0,0,0,0,0,0,0,0,1
60.9,40.513348,-3.404868,613.5,330.1,0.703,0.025,0.006,0.065,-1.533,0.018,...,0,0,0,0,0,0,0,0,0,1
61.2,40.513477,-3.404971,613.8,329.4,1.406,0.014,-0.031,0.042,-1.56,0.022,...,0,0,0,0,0,0,0,0,0,1


In [3]:
X = df.iloc[:,:-3]
Y = df.iloc[:,-3:]


In [4]:
# Function to create sequences of 5 time steps
def create_sequences(X, Y, window_length=5):
    Xs, Ys = [], []
    for i in range(len(X) - window_length + 1):
        Xs.append(X.iloc[i:(i + window_length)].values)
        Ys.append(Y.iloc[i + window_length - 1])
    return np.array(Xs), np.array(Ys)

In [5]:
# Create sequences for the entire dataset before the K-Fold split
X_seq, Y_seq = create_sequences(X, Y)

In [6]:
kf = KFold(n_splits=5, random_state=4, shuffle=True)

In [7]:
# K-Fold Cross Validation
count = 0
for train_index, test_index in kf.split(X):
    print(f'Fold: {count}')

   # Split the data using the indices generated for the sequenced data
    X_train_fold, X_test_fold = X_seq[train_index], X_seq[test_index]
    Y_train_fold, Y_test_fold = Y_seq[train_index], Y_seq[test_index]
    
    # Normalize features for each sequence
    scaler = MinMaxScaler()
    X_train_fold_scaled = np.array([scaler.fit_transform(x) for x in X_train_fold])
    X_test_fold_scaled = np.array([scaler.transform(x) for x in X_test_fold])

    # Compute class weights for the current fold
    y_integers = np.argmax(Y_train_fold, axis=1)
    class_weights = compute_class_weight('balanced', classes=np.unique(y_integers), y=y_integers)
    d_class_weights = dict(enumerate(class_weights))
    
    # Define the LSTM model
    model = keras.Sequential([
        layers.LSTM(units=70, input_shape=(5, X_train_fold.shape[2]), return_sequences=True, activation="tanh"),
        Dropout(0.1),
        BatchNormalization(),
        layers.LSTM(units=80, return_sequences=True, activation="tanh"),
        Dropout(0.05),
        BatchNormalization(),
        layers.LSTM(units=70, return_sequences=True, activation="tanh"),
        Dropout(0.005),
        BatchNormalization(),
        layers.LSTM(units=80, activation="tanh"),
        Dropout(0.001),
        BatchNormalization(),
        Dense(3, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')])
    model.summary()

    # Train model
    history = model.fit(X_train_fold, Y_train_fold, validation_data=(X_test_fold, Y_test_fold), epochs=600, class_weight=d_class_weights, callbacks=[callbacks.EarlyStopping(monitor='val_loss', patience=40)])
    
    count += 1

y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert true labels from one-hot encoding to class labels if necessary
y_true_classes = np.argmax(Y_test, axis=1)

# Calculate and display precision, recall, and F1-score for each class
print("Evaluation Metrics Per Class:")
print(classification_report(y_true_classes, y_pred_classes, target_names=['Class 1', 'Class 2', 'Class 3']))

Fold: 0


IndexError: index 30736 is out of bounds for axis 0 with size 30736

In [None]:
precision = history.history['val_precision'][-1]  
recall = history.history['val_recall'][-1] 
f1_score = 2 * (precision * recall) / (precision + recall)
print('Precision '+ precision)
print('Recall '+ recall)
print('F1 Score '+ f1_score)


In [None]:
final_result = pd.concat([history_fold_0[-1:],
                          history_fold_1[-1:],
                          history_fold_2[-1:],
                          history_fold_3[-1:],
                          history_fold_4[-1:],
                         ])
final_result.columns = ['Training Accuracy', 'Training Loss (Crossentropy)','Training Precision','Training Recall', 'Validation Accuracy', 'Validation Loss (Crossentropy)','Validation Precision','Validation Recall']
final_result = final_result.reset_index(drop = True)
final_result.index += 1 
final_result.index.name = 'Fold'
final_result['Last Training Accuracy'] *= 100
final_result['Last Validation Accuracy'] *= 100
final_result['Last Training Loss (Crossentropy)'] = final_result['Last Training Loss (Crossentropy)'].round(5)
final_result['Last Validation Loss (Crossentropy)'] = final_result['Last Validation Loss (Crossentropy)'].round(5)
final_result['Last Training Accuracy'] = final_result['Last Training Accuracy'].round(2)
final_result['Last Validation Accuracy'] = final_result['Last Validation Accuracy'].round(2)

final_result

In [None]:
plt.figure(figsize = [4,3])
plt.plot(final_result.loc[:, ['Training Loss (Crossentropy)', 'Validation Loss (Crossentropy)']],'o-')
plt.legend(['Training Loss', 'Validation Loss'])
plt.title('Long Short Term Memory Loss')
plt.xticks(np.arange(1,6,1))
plt.xlabel('Folds')
plt.ylabel('Categorical Crossentropy')
plt.show()

In [None]:
plt.figure(figsize = [4,3])
plt.plot(final_result.loc[:, ['Training Accuracy', 'Validation Accuracy']],'o-')
plt.legend(['Training Acc', 'Validation Acc'])
plt.title('Long Short Term Memory Accuracy')
plt.xticks(np.arange(1, 6, 1))
plt.xlabel('Folds')
plt.ylabel('Accuracy')
plt.show()

In [None]:
plt.figure(figsize = [4,3])
plt.plot(final_result.loc[:, ['Training Precision', 'Validation Precision']],'o-')
plt.legend(['Training Precision', 'Validation Precison'])
plt.title('Long Short Term Memory Precision')
plt.xticks(np.arange(1, 6, 1))
plt.xlabel('Folds')
plt.ylabel('Precision')
plt.show()

In [None]:
plt.figure(figsize = [4,3])
plt.plot(final_result.loc[:, ['Training Recall', 'Validation Recall']],'o-')
plt.legend(['Training Recall', 'Validation Recall'])
plt.title('Long Short Term Memory Recall')
plt.xticks(np.arange(1, 6, 1))
plt.xlabel('Folds')
plt.ylabel('Recall')
plt.show()

In [None]:
average_validation_accuracy = final_result['Last Validation Accuracy'].mean()

print(f"The average validation accuracy is: {average_validation_accuracy}%")