# Hyperparametric Tuning


## Imports

In [93]:
import pandas as pd
import posixpath
from data_mining_project import data, PROJECT_PATH, DATA_PATH, OUTPUT_PATH
import numpy as np
import matplotlib as plt
import ast
import plotly.express as px
from sklearn.metrics import *

## Load Data

In [94]:
filename = "preprocessed_data.csv"
filepath = posixpath.join(OUTPUT_PATH, filename)
data_df = data.load_data_csv(filepath)
data_df = data.reformat_data(data_df)
data_df

Unnamed: 0,incident_id,vehicles_sequence,events_sequence,seconds_to_incident_sequence,approx_lat,approx_lon,train_kph_sequence,dj_ac_state_sequence,dj_dc_state_sequence,ac_dc_prob_num,ac_dc_prob_timestamp,ac_dc_prob,incident_type
0,4432881,"[609, 609, 609, 609, 609, 609, 609, 609, 609, ...","[1132, 2970, 4082, 4092, 2982, 3236, 4100, 270...","[-5506, -3583, -3546, -3546, -3542, -3478, -34...",50.876601,4.718143,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",5,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1,4
1,4432955,"[592, 592, 592, 592, 592, 592, 592, 592, 592, ...","[4114, 4168, 4168, 4114, 4168, 4168, 4114, 416...","[-10932, -10932, -10913, -10472, -10472, -1045...",50.864083,4.162115,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, ...",83,"[0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, ...",1,14
2,4433021,"[576, 576, 576, 576, 576, 576, 576, 576]","[358, 4056, 4054, 2740, 3528, 3506, 3516, 3528]","[-596, 418, 595, 699, 1122, 1151, 1156, 1178]",51.183220,4.276025,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 0, 0, 0, 0, 0, 0, 0]","[1, 1, 1, 1, 1, 1, 1, 1]",0,"[0, 0, 0, 0, 0, 0, 0, 0]",0,2
3,4433129,"[634, 634, 634, 634]","[4140, 4168, 4140, 4168]","[-139, -105, 944, 953]",50.818727,3.253601,"[0.0, 0.0, 0.0, 0.0]","[0, 0, 0, 0]","[0, 0, 0, 0]",4,"[1, 1, 1, 1]",1,14
4,4433267,"[1025, 1025, 1025, 1025, 1025, 1025, 1025, 102...","[4028, 3620, 4028, 4076, 4028, 4076, 3620, 362...","[-2328, -1970, -1970, -1970, -1498, -1498, -29...",49.663705,5.698090,"[0.0, 22.8, 22.7, 22.8, 145.4, 145.1, 0.0, 26....","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",5,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",1,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,4611895,"[539, 544, 544, 544, 544, 544, 544, 544, 544, ...","[2456, 1620, 1620, 1620, 1620, 1620, 1620, 162...","[2101, -7670, -5337, -3986, -2678, -2016, -127...",50.656246,4.421481,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,9
641,4611931,"[1057, 1057, 1057, 1057, 1057, 1057, 1057, 105...","[3238, 3512, 4048, 4050, 2740, 3528, 4048, 273...","[-1463, -1463, -1463, -1456, -1174, -1165, -10...",50.886315,4.400089,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,2
642,4611953,"[1016, 1016, 1016, 1016, 1016, 1016, 1016, 101...","[4158, 4140, 4162, 4160, 4168, 4166, 2252, 411...","[-13835, -13829, -13826, -13822, -13813, -1381...",50.159057,5.972059,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",56,"[1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, ...",1,14
643,4611991,"[505, 505, 505, 505, 505, 505, 505, 505, 505, ...","[4054, 2736, 3240, 3532, 4056, 3540, 2740, 350...","[-24, -23, 33, 33, 36, 41, 107, 121, 146, 158,...",50.767118,4.424321,"[0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,2


In [96]:
X = data_df.to_numpy()
X.shape

(645, 13)

## 10-fold cross validation

In [101]:
def init_confusion_matrix(classes):
    class_count = {c:0 for c in classes}
    confusion_matrix = {}
    for c in classes:
        confusion_matrix[c] = class_count.copy()
    return confusion_matrix
    

def evaluate_model(Y, Yhat, classes):
    confusion_matrix = init_confusion_matrix(classes)    
    for i in range(Y.shape[0]):
        y = Y[i]
        yhat = Yhat[i]
        confusion_matrix[y][yhat] += 1 
    
    return confusion_matrix

def ten_fold_cross_val(X, param):
    model_evals = []
    N = X.shape[0]
    n = X.shape[1]
    classes = np.unique(X[:, -1])
    for i in range(1, 11):
        start_index = int((i-1)/10 * N)
        end_index = int((i%10)/10 * N)
        X_tr = X[start_index:end_index]
        X_ts = X[end_index:, :-1]  # Drop the incident type
        Y = X[end_index:, -1]
        
       # model = train_model(training_data, param)
       # Yhat = test_model(testing_data, model)

        Yhat = np.repeat(2, Y.shape[0])      
        model_evals.append(evaluate_model(Y, Yhat, classes))
    
    model_eval = []
    keys = model_evals[0].keys()
    for i, d in enumerate(model_evals):
        model_eval.append([])
        for j, key1 in enumerate(keys):
            model_eval[i].append([])
            for key2 in keys:
                model_eval[i][j].append(d[key1][key2])
                
    model_eval = np.array(model_eval).mean(axis=0)
    avg_model_eval = {}
    for key1, value in zip(keys, model_eval):
        avg_model_eval[key1] = {}
        for i, key2 in enumerate(keys):
            avg_model_eval[key1][key2] = value[i]
        
    return avg_model_eval
    
avg_model_eval = ten_fold_cross_val(X, param=None)
print(avg_model_eval)

{2: {2: 55.8, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 3: {2: 2.4, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 4: {2: 42.1, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 6: {2: 2.8, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 7: {2: 2.5, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 9: {2: 41.7, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 11: {2: 15.1, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 13: {2: 65.6, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 14: {2: 72.9, 3: 0.0, 4: 0.0, 6: 0.0, 7: 0.0, 9: 0.0, 11: 0.0, 13: 0.0, 14: 0.0, 16: 0.0, 17: 0.0, 99: 0.0}, 16: {2: 1.4, 3: 0.0, 4: 0.0