# Original Dataset - ML Test

In [24]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers

import sqlalchemy
from sqlalchemy import create_engine, inspect

import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from pprint import pprint

import os
import time
from datetime import datetime

import numpy as np
from joblib import dump, load
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score

from imblearn.over_sampling import SMOTE
import json

%run functions.ipynb

In [2]:
# Time the run
start_time = time.time()

In [3]:
# Check for untitled_project folder
if os.path.exists("untitled_project"):
    input("DELETE untitled_project")

## Import datasets

In [4]:
# Import the data
engine = create_engine("sqlite:///voice.sqlite")

# View all of the classes
inspector = inspect(engine)
table_names = inspector.get_table_names()
table_names

['alexaval',
 'alexbval',
 'alexgval',
 'alexrval',
 'aval',
 'bval',
 'chroma1',
 'chroma10',
 'chroma11',
 'chroma12',
 'chroma2',
 'chroma3',
 'chroma4',
 'chroma5',
 'chroma6',
 'chroma7',
 'chroma8',
 'chroma9',
 'chromastd',
 'deltachroma1',
 'deltachroma10',
 'deltachroma11',
 'deltachroma12',
 'deltachroma2',
 'deltachroma3',
 'deltachroma4',
 'deltachroma5',
 'deltachroma6',
 'deltachroma7',
 'deltachroma8',
 'deltachroma9',
 'deltachromastd',
 'deltaenergy',
 'deltaenergyentropy',
 'deltamfcc1',
 'deltamfcc10',
 'deltamfcc11',
 'deltamfcc12',
 'deltamfcc13',
 'deltamfcc2',
 'deltamfcc3',
 'deltamfcc4',
 'deltamfcc5',
 'deltamfcc6',
 'deltamfcc7',
 'deltamfcc8',
 'deltamfcc9',
 'deltaspectralcentroid',
 'deltaspectralentropy',
 'deltaspectralflux',
 'deltaspectralrolloff',
 'deltaspectralspread',
 'deltazcr',
 'demographic',
 'diagnosis',
 'energy',
 'energyentropy',
 'gval',
 'habits',
 'mfcc1',
 'mfcc10',
 'mfcc11',
 'mfcc12',
 'mfcc13',
 'mfcc2',
 'mfcc3',
 'mfcc4',
 'mfcc5

In [5]:
# Tables not to import
no_import_tables = [
    'rval', 'gval', 'bval', 'aval',
    'demographic', 'habits',
    'alexrval', 'alexgval', 'alexbval', 'alexaval'
]

In [6]:
# Initialise a dictionary to hold dataframes
dataframes = dict()

# Loop through each table
for table in table_names:
    
    if table not in no_import_tables:
        # Dataframe name
        df_name = f'{table}_df'

        # Create dataframe
        dataframes[df_name] = pd.read_sql(
            f'SELECT * FROM {table}',
            engine
        )

## Preprocessing

### Separate the target and feature variables

In [7]:
# Isolate the diagnosis column
diagnosis_df = dataframes['diagnosis_df'].sort_values(by='id').reset_index(drop=True)
y = diagnosis_df['diagnosis'].copy()

# Encode the target variable, ignore subtype
y = y.apply(encode_binary)
y

0      1
1      0
2      1
3      1
4      1
      ..
199    0
200    1
201    0
202    0
203    1
Name: diagnosis, Length: 204, dtype: int64

### Recombine the feature variables

In [8]:
# Define subset of only temporal features
temporal_tables = list(dataframes.keys())
temporal_tables.remove('diagnosis_df')

# Initialise a dictionary to hold all features
all_feats = dict()

# Loop through each temporal feature
for table in sorted(temporal_tables):
    
    # Get the dataframe
    df = dataframes[table]
    
    # Get the feature array and voice IDs
    feat_array = df.values[:, 1:]
    voice_list = df.values[:, 0]
    
    # Initialise a feature dictionary
    feat_dict = dict()
    
    # Use a for-loop to populate the dictionary
    for idx, feat in enumerate(feat_array):
        feat_dict[voice_list[idx]] = feat
        # if len(feat) != 192:
            # print(len(feat))
    
    # Append to the all feature dictionary
    feat_name = table.split("_")[0]
    all_feats[feat_name] = feat_dict

# Convert to a dataframe
X = pd.DataFrame(all_feats).sort_index().reset_index(drop=True)
# X = pd.DataFrame(all_feats)
X.head()

Unnamed: 0,chroma10,chroma11,chroma12,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,...,mfcc6,mfcc7,mfcc8,mfcc9,spectralcentroid,spectralentropy,spectralflux,spectralrolloff,spectralspread,zcr
0,"[8.160939658571425e-36, 8.160939658571425e-36,...","[6.259135778625184e-35, 6.259135778625184e-35,...","[6.521327966022236e-37, 6.521327966022236e-37,...","[1.981977565762179e-36, 1.981977565762179e-36,...","[1.3575144582002011e-36, 1.3575144582002011e-3...","[0.2, 0.2, 0.2, 0.0083557589856543, 0.00571074...","[4.104873703767106e-36, 4.104873703767106e-36,...","[2.155332585060541e-35, 2.155332585060541e-35,...","[5.559684454245396e-36, 5.559684454245396e-36,...","[1.3576749403691584e-33, 1.3576749403691584e-3...",...,"[1.1190399446969557e-07, 1.1190399446969557e-0...","[8.413179620042933e-08, 8.413179620042933e-08,...","[6.508014845220068e-08, 6.508014845220068e-08,...","[4.8439279586533656e-08, 4.8439279586533656e-0...","[0.005, 0.005, 0.005, 0.3465049654087774, 0.28...","[1.223008975714376e-10, 1.223008975714376e-10,...","[0.0, 0.0, 0.0, 0.9996124728099652, 0.00645907...","[0.0, 0.0, 0.0, 0.42, 0.325, 0.33, 0.325, 0.32...","[4.770690588753296e-09, 4.770690588753296e-09,...","[0.0, 0.0, 0.0, 0.0701754385964912, 0.20300751..."
1,"[7.013453879267504e-36, 7.013453879267504e-36,...","[0.0, 0.0, 0.0, 0.0166532558508933, 0.07869997...","[0.0, 0.0, 0.0, 0.0129537558999756, 0.00446014...","[0.0, 0.0, 0.0, 0.0030468925055301, 0.00093043...","[7.408793082486265e-37, 7.408793082486265e-37,...","[0.1999999999999999, 0.1999999999999999, 0.199...","[3.4822079082192796e-36, 3.4822079082192796e-3...","[3.341581545922013e-36, 3.341581545922013e-36,...","[0.0, 0.0, 0.0, 0.0251245180249349, 0.02791182...","[8.629140939612992e-35, 8.629140939612992e-35,...",...,"[5.570574598767376e-09, 5.570574598767376e-09,...","[2.3875995972436553e-09, 2.3875995972436553e-0...","[2.749349885292186e-10, 2.749349885292186e-10,...","[-1.6617109241744871e-09, -1.6617109241744871e...","[0.005, 0.005, 0.005, 0.2686992943604466, 0.24...","[1.6891478942563823e-09, 1.6891478942563823e-0...","[0.0, 0.0, 0.0, 1.015672065306574, 0.012061107...","[0.0, 0.0, 0.0, 0.305, 0.295, 0.295, 0.295, 0....","[3.0750895132596103e-09, 3.0750895132596103e-0...","[0.0, 0.0, 0.0, 0.0701754385964912, 0.18045112..."
2,"[0.0, 0.0, 0.0, 0.0254328104050869, 0.00097298...","[0.0, 0.0, 0.0, 0.0193505451629107, 0.00262743...","[0.0, 0.0, 0.0, 0.0083292562363225, 0.00160357...","[0.0, 0.0, 0.0, 0.0080703610497697, 0.01283563...","[0.0, 0.0, 0.0, 0.0070286755905986, 0.00614136...","[0.2, 0.2, 0.2, 0.0085329179281302, 0.00461212...","[0.0, 0.0, 0.0, 0.0052980532581862, 0.00109933...","[0.0, 0.0, 0.0, 0.00504925682982, 0.0052632757...","[0.0, 0.0, 0.0, 0.0059614712279919, 0.01369267...","[0.0, 0.0, 0.0, 0.0128160916733445, 0.01380721...",...,"[0.0, 0.0, 0.0, -0.2599778999084198, -0.290567...","[0.0, 0.0, 0.0, 0.1582810230304283, 0.22599124...","[0.0, 0.0, 0.0, -0.1396072459697239, -0.235683...","[0.0, 0.0, 0.0, -0.2101056367575636, -0.212658...","[0.0049999999999999, 0.0049999999999999, 0.004...","[7.084185505857156e-10, 7.084185505857156e-10,...","[0.0, 0.0, 0.0, 1.0014504084715852, 0.01066764...","[0.0, 0.0, 0.0, 0.33, 0.32, 0.32, 0.32, 0.32, ...","[8.88178419700125e-19, 8.88178419700125e-19, 8...","[0.0, 0.0, 0.0, 0.0651629072681704, 0.17293233..."
3,"[2.702574583604019e-36, 2.702574583604019e-36,...","[4.776934641543469e-37, 4.776934641543469e-37,...","[0.0, 0.0, 0.0, 0.0090083795612209, 0.00081537...","[1.1698336326834081e-34, 1.1698336326834081e-3...","[7.2024558647616085e-37, 7.2024558647616085e-3...","[0.2, 0.2, 0.2, 0.0356911864166217, 0.00370847...","[1.3418390923476396e-36, 1.3418390923476396e-3...","[1.2876499240617028e-36, 1.2876499240617028e-3...","[0.0, 0.0, 0.0, 0.0065263846329425, 0.00703412...","[7.234653246625582e-35, 7.234653246625582e-35,...",...,"[-9.094534884871259e-08, -9.094534884871259e-0...","[-5.831674649803382e-07, -5.831674649803382e-0...","[-8.326572895673644e-07, -8.326572895673644e-0...","[-6.521214770955914e-07, -6.521214770955914e-0...","[0.005, 0.005, 0.005, 0.2888106496790997, 0.29...","[1.0169918121625369e-11, 1.0169918121625369e-1...","[0.0, 0.0, 0.0, 0.9866939562783864, 0.00961743...","[0.0, 0.0, 0.0, 0.425, 0.425, 0.43, 0.385, 0.3...","[3.904414395649892e-09, 3.904414395649892e-09,...","[0.0, 0.0, 0.0, 0.025062656641604, 0.082706766..."
4,"[0.0, 0.0, 0.0, 0.0041882695402504, 0.02049149...","[0.0, 0.0, 0.0, 0.0040731803154477, 0.00261262...","[0.0, 0.0, 0.0, 0.0026445650113565, 0.00700864...","[0.0, 0.0, 0.0, 0.0029177614915496, 0.01259147...","[0.0, 0.0, 0.0, 0.0023704152160725, 0.00010023...","[0.2, 0.2, 0.2, 0.0093191724292075, 0.01320134...","[0.0, 0.0, 0.0, 0.0110916645774918, 0.00755240...","[0.0, 0.0, 0.0, 0.021437601576933, 0.009915651...","[0.0, 0.0, 0.0, 0.0344103741912122, 0.00904430...","[0.0, 0.0, 0.0, 0.0300217717324319, 0.06891515...",...,"[0.0, 0.0, 0.0, 0.1528818094724874, 0.05941509...","[0.0, 0.0, 0.0, 0.5976081964535674, 0.33569853...","[0.0, 0.0, 0.0, 0.1015572440453905, 0.04580434...","[0.0, 0.0, 0.0, -0.9055532969635782, -0.709582...","[0.0049999999999999, 0.0049999999999999, 0.004...","[9.842672086828249e-11, 9.842672086828249e-11,...","[0.0, 0.0, 0.0, 1.0073867109950227, 0.00740300...","[0.0, 0.0, 0.0, 0.305, 0.31, 0.285, 0.305, 0.3...","[8.88178419700125e-19, 8.88178419700125e-19, 8...","[0.0, 0.0, 0.0, 0.0275689223057644, 0.11528822..."


In [9]:
# Display the dataframe
X.info()
X.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 204 entries, 0 to 203
Data columns (total 68 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   chroma10               204 non-null    object
 1   chroma11               204 non-null    object
 2   chroma12               204 non-null    object
 3   chroma1                204 non-null    object
 4   chroma2                204 non-null    object
 5   chroma3                204 non-null    object
 6   chroma4                204 non-null    object
 7   chroma5                204 non-null    object
 8   chroma6                204 non-null    object
 9   chroma7                204 non-null    object
 10  chroma8                204 non-null    object
 11  chroma9                204 non-null    object
 12  chromastd              204 non-null    object
 13  deltachroma10          204 non-null    object
 14  deltachroma11          204 non-null    object
 15  deltachroma12          

Unnamed: 0,chroma10,chroma11,chroma12,chroma1,chroma2,chroma3,chroma4,chroma5,chroma6,chroma7,...,mfcc6,mfcc7,mfcc8,mfcc9,spectralcentroid,spectralentropy,spectralflux,spectralrolloff,spectralspread,zcr
0,"[8.160939658571425e-36, 8.160939658571425e-36,...","[6.259135778625184e-35, 6.259135778625184e-35,...","[6.521327966022236e-37, 6.521327966022236e-37,...","[1.981977565762179e-36, 1.981977565762179e-36,...","[1.3575144582002011e-36, 1.3575144582002011e-3...","[0.2, 0.2, 0.2, 0.0083557589856543, 0.00571074...","[4.104873703767106e-36, 4.104873703767106e-36,...","[2.155332585060541e-35, 2.155332585060541e-35,...","[5.559684454245396e-36, 5.559684454245396e-36,...","[1.3576749403691584e-33, 1.3576749403691584e-3...",...,"[1.1190399446969557e-07, 1.1190399446969557e-0...","[8.413179620042933e-08, 8.413179620042933e-08,...","[6.508014845220068e-08, 6.508014845220068e-08,...","[4.8439279586533656e-08, 4.8439279586533656e-0...","[0.005, 0.005, 0.005, 0.3465049654087774, 0.28...","[1.223008975714376e-10, 1.223008975714376e-10,...","[0.0, 0.0, 0.0, 0.9996124728099652, 0.00645907...","[0.0, 0.0, 0.0, 0.42, 0.325, 0.33, 0.325, 0.32...","[4.770690588753296e-09, 4.770690588753296e-09,...","[0.0, 0.0, 0.0, 0.0701754385964912, 0.20300751..."
1,"[7.013453879267504e-36, 7.013453879267504e-36,...","[0.0, 0.0, 0.0, 0.0166532558508933, 0.07869997...","[0.0, 0.0, 0.0, 0.0129537558999756, 0.00446014...","[0.0, 0.0, 0.0, 0.0030468925055301, 0.00093043...","[7.408793082486265e-37, 7.408793082486265e-37,...","[0.1999999999999999, 0.1999999999999999, 0.199...","[3.4822079082192796e-36, 3.4822079082192796e-3...","[3.341581545922013e-36, 3.341581545922013e-36,...","[0.0, 0.0, 0.0, 0.0251245180249349, 0.02791182...","[8.629140939612992e-35, 8.629140939612992e-35,...",...,"[5.570574598767376e-09, 5.570574598767376e-09,...","[2.3875995972436553e-09, 2.3875995972436553e-0...","[2.749349885292186e-10, 2.749349885292186e-10,...","[-1.6617109241744871e-09, -1.6617109241744871e...","[0.005, 0.005, 0.005, 0.2686992943604466, 0.24...","[1.6891478942563823e-09, 1.6891478942563823e-0...","[0.0, 0.0, 0.0, 1.015672065306574, 0.012061107...","[0.0, 0.0, 0.0, 0.305, 0.295, 0.295, 0.295, 0....","[3.0750895132596103e-09, 3.0750895132596103e-0...","[0.0, 0.0, 0.0, 0.0701754385964912, 0.18045112..."
2,"[0.0, 0.0, 0.0, 0.0254328104050869, 0.00097298...","[0.0, 0.0, 0.0, 0.0193505451629107, 0.00262743...","[0.0, 0.0, 0.0, 0.0083292562363225, 0.00160357...","[0.0, 0.0, 0.0, 0.0080703610497697, 0.01283563...","[0.0, 0.0, 0.0, 0.0070286755905986, 0.00614136...","[0.2, 0.2, 0.2, 0.0085329179281302, 0.00461212...","[0.0, 0.0, 0.0, 0.0052980532581862, 0.00109933...","[0.0, 0.0, 0.0, 0.00504925682982, 0.0052632757...","[0.0, 0.0, 0.0, 0.0059614712279919, 0.01369267...","[0.0, 0.0, 0.0, 0.0128160916733445, 0.01380721...",...,"[0.0, 0.0, 0.0, -0.2599778999084198, -0.290567...","[0.0, 0.0, 0.0, 0.1582810230304283, 0.22599124...","[0.0, 0.0, 0.0, -0.1396072459697239, -0.235683...","[0.0, 0.0, 0.0, -0.2101056367575636, -0.212658...","[0.0049999999999999, 0.0049999999999999, 0.004...","[7.084185505857156e-10, 7.084185505857156e-10,...","[0.0, 0.0, 0.0, 1.0014504084715852, 0.01066764...","[0.0, 0.0, 0.0, 0.33, 0.32, 0.32, 0.32, 0.32, ...","[8.88178419700125e-19, 8.88178419700125e-19, 8...","[0.0, 0.0, 0.0, 0.0651629072681704, 0.17293233..."
3,"[2.702574583604019e-36, 2.702574583604019e-36,...","[4.776934641543469e-37, 4.776934641543469e-37,...","[0.0, 0.0, 0.0, 0.0090083795612209, 0.00081537...","[1.1698336326834081e-34, 1.1698336326834081e-3...","[7.2024558647616085e-37, 7.2024558647616085e-3...","[0.2, 0.2, 0.2, 0.0356911864166217, 0.00370847...","[1.3418390923476396e-36, 1.3418390923476396e-3...","[1.2876499240617028e-36, 1.2876499240617028e-3...","[0.0, 0.0, 0.0, 0.0065263846329425, 0.00703412...","[7.234653246625582e-35, 7.234653246625582e-35,...",...,"[-9.094534884871259e-08, -9.094534884871259e-0...","[-5.831674649803382e-07, -5.831674649803382e-0...","[-8.326572895673644e-07, -8.326572895673644e-0...","[-6.521214770955914e-07, -6.521214770955914e-0...","[0.005, 0.005, 0.005, 0.2888106496790997, 0.29...","[1.0169918121625369e-11, 1.0169918121625369e-1...","[0.0, 0.0, 0.0, 0.9866939562783864, 0.00961743...","[0.0, 0.0, 0.0, 0.425, 0.425, 0.43, 0.385, 0.3...","[3.904414395649892e-09, 3.904414395649892e-09,...","[0.0, 0.0, 0.0, 0.025062656641604, 0.082706766..."
4,"[0.0, 0.0, 0.0, 0.0041882695402504, 0.02049149...","[0.0, 0.0, 0.0, 0.0040731803154477, 0.00261262...","[0.0, 0.0, 0.0, 0.0026445650113565, 0.00700864...","[0.0, 0.0, 0.0, 0.0029177614915496, 0.01259147...","[0.0, 0.0, 0.0, 0.0023704152160725, 0.00010023...","[0.2, 0.2, 0.2, 0.0093191724292075, 0.01320134...","[0.0, 0.0, 0.0, 0.0110916645774918, 0.00755240...","[0.0, 0.0, 0.0, 0.021437601576933, 0.009915651...","[0.0, 0.0, 0.0, 0.0344103741912122, 0.00904430...","[0.0, 0.0, 0.0, 0.0300217717324319, 0.06891515...",...,"[0.0, 0.0, 0.0, 0.1528818094724874, 0.05941509...","[0.0, 0.0, 0.0, 0.5976081964535674, 0.33569853...","[0.0, 0.0, 0.0, 0.1015572440453905, 0.04580434...","[0.0, 0.0, 0.0, -0.9055532969635782, -0.709582...","[0.0049999999999999, 0.0049999999999999, 0.004...","[9.842672086828249e-11, 9.842672086828249e-11,...","[0.0, 0.0, 0.0, 1.0073867109950227, 0.00740300...","[0.0, 0.0, 0.0, 0.305, 0.31, 0.285, 0.305, 0.3...","[8.88178419700125e-19, 8.88178419700125e-19, 8...","[0.0, 0.0, 0.0, 0.0275689223057644, 0.11528822..."


### Split and Scale

In [10]:
# Split the preprocessed data to training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [12]:
# Flatten the feature arrays
X_train_flat = np.array(
    [X[feature][i] for feature in X.keys() for i in range(len(X_train))])

X_test_flat = np.array(
    [X[feature][i] for feature in X.keys() for i in range(len(X_test))])

In [15]:
# Initialize the MinMaxScaler
scaler = MinMaxScaler()

In [16]:
# Use fit_transform on X_train
X_train_scaled = scaler.fit_transform(
    X_train_flat.reshape(
        len(X_train) * len(X.columns) * len(X['chroma10'][0]),
        -1
    )).reshape(
        len(X_train),
        len(X.columns),
        len(X['chroma10'][0])
    )

# Use transform on X_Test
X_test_scaled = scaler.transform(
    X_test_flat.reshape(
        len(X_test) * len(X.columns) * len(X['chroma10'][0]),
        -1
    )).reshape(
        len(X_test),
        len(X.columns),
        len(X['chroma10'][0])
    )

In [21]:
X_train_scaled.shape

(153, 68, 192)

In [20]:
# Save the scaler
# dump(X_scaler, '../voice_app/assets/scaler.joblib')

['../voice_app/assets/scaler.joblib']

## Hyperparameter Tuning

In [55]:
# Define the model parameters
# number_input_features = len(X_train.columns)
input_shape = (len(X.columns), len(X['chroma10'][0]))

# Maximum hidden layers (min. 2 for DL)
max_hidden_layers = 10

# Maximum neurons per hidden layer
max_num_neurons = len(X.columns) * 2 - 1

# Step count
step_count = 5

# Hidden layer activation functions
activation_functions = [
    'relu', 'leaky_relu', 'tanh',
    'elu', 'selu', 'exponential',
    'softmax', 'softplus'
]

# Define the output layer
output_layer_neurons = 1 # 4
output_layer_activation = 'sigmoid' # 'softmax'

# Model compilation
compile_loss = "binary_crossentropy"
# compile_loss = "sparse_categorical_crossentropy"
compile_opt = "adam"

# Number of max epochs
tuner_max_epochs = 20
search_max_epochs = 20

# Hyperband iterations
hp_iterations = 2

# Regularizers
reg_kernel = regularizers.L1(0.01)

In [56]:
# Extract class name and parameter value, for performance tracker
class_name = reg_kernel.__class__.__name__

if class_name == "L1L2":
    value_l1 = reg_kernel.get_config()['l1']
    value_l2 = reg_kernel.get_config()['l2']

    # Create string version
    reg_kernel_string = f"{class_name}(l1={value_l1:.3f})(l2={value_l2:.3f})"

else:
    param_value = reg_kernel.get_config()[f'{class_name.lower()}']

    # Create string version
    reg_kernel_string = f"{class_name}({param_value:.3f})"

print(reg_kernel_string)

L1(0.010)


In [57]:
# Initialise the Hyperband tuner
tuner = kt.Hyperband(
    create_dl_model,
    objective = "val_accuracy",
    max_epochs = tuner_max_epochs,
    hyperband_iterations = hp_iterations
)

In [58]:
# Find the best hyperparameters
tuner.search(
    X_train_scaled,
    y_train,
    epochs = search_max_epochs,
    validation_data = (X_test_scaled, y_test)
)
# tuner.search(
#     X_resampled,
#     y_resampled,
#     epochs = search_max_epochs,
#     validation_data = (X_test_scaled, y_test)
# )

Trial 60 Complete [00h 00m 02s]
val_accuracy: 0.7254902124404907

Best val_accuracy So Far: 0.7254902124404907
Total elapsed time: 00h 01m 11s


## Compile, Train, Evaluate the Best Model

In [25]:
# Get the top 3 model hyperparameters
top3_hyper = tuner.get_best_hyperparameters(3)

for model in top3_hyper:
    pprint(model.values)

{'activation_layer_0': 'softplus',
 'activation_layer_1': 'exponential',
 'activation_layer_2': 'elu',
 'activation_layer_3': 'elu',
 'activation_layer_4': 'tanh',
 'num_layers': 1,
 'tuner/bracket': 1,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/round': 1,
 'tuner/trial_id': '0049',
 'units_layer_0': 16,
 'units_layer_1': 36,
 'units_layer_2': 26,
 'units_layer_3': 36,
 'units_layer_4': 26}
{'activation_layer_0': 'tanh',
 'activation_layer_1': 'relu',
 'activation_layer_2': 'exponential',
 'activation_layer_3': 'softmax',
 'activation_layer_4': 'relu',
 'num_layers': 2,
 'tuner/bracket': 2,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/round': 0,
 'units_layer_0': 31,
 'units_layer_1': 41,
 'units_layer_2': 46,
 'units_layer_3': 31,
 'units_layer_4': 31}
{'activation_layer_0': 'selu',
 'activation_layer_1': 'leaky_relu',
 'activation_layer_2': 'selu',
 'activation_layer_3': 'relu',
 'activation_layer_4': 'exponential',
 'num_layers': 1,
 'tuner/bracket': 0,
 'tune

In [26]:
# Get the top model
best_hyper = top3_hyper[0]
best_hyper.values

{'activation_layer_0': 'softplus',
 'units_layer_0': 16,
 'num_layers': 1,
 'units_layer_1': 36,
 'activation_layer_1': 'exponential',
 'units_layer_2': 26,
 'activation_layer_2': 'elu',
 'units_layer_3': 36,
 'activation_layer_3': 'elu',
 'units_layer_4': 26,
 'activation_layer_4': 'tanh',
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0049'}

In [27]:
# Define the model parameters
number_input_features = len(X_train.columns)
hidden_layer0_neurons = best_hyper.values['units_layer_0']
hidden_layer0_activation = best_hyper.values['activation_layer_0']

# Return the number of hidden layers
total_hidden = best_hyper.values['num_layers'] + 1

# Train model
model_train_epochs = 100

In [28]:
# Initialise the sequential model
nn = Sequential()

# Create the input layer and first hidden layer
nn.add(Dense(
    units = hidden_layer0_neurons,
    activation = hidden_layer0_activation,
    kernel_regularizer = reg_kernel,
    input_dim = number_input_features
))
        
# Create additional hidden layers
for layer in range(1, total_hidden):
    nn.add(Dense(
        units = best_hyper.values[f'units_layer_{layer}'],
        activation = best_hyper.values[f'activation_layer_{layer}'],
        kernel_regularizer = reg_kernel
    ))

# Create the output layer
nn.add(Dense(
    units = output_layer_neurons,
    activation = output_layer_activation
))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 16)                544       
                                                                 
 dense_7 (Dense)             (None, 36)                612       
                                                                 
 dense_8 (Dense)             (None, 1)                 37        
                                                                 
Total params: 1193 (4.66 KB)
Trainable params: 1193 (4.66 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [29]:
# Compile the model
nn.compile(
    loss = compile_loss,
    optimizer = compile_opt,
    metrics = ["accuracy"]
)

In [30]:
# Train the model
fit_model = nn.fit(
    X_train_scaled,
    y_train,
    shuffle = True,
    epochs = model_train_epochs,
    verbose = 1
)
# fit_model = nn.fit(
#     X_resampled,
#     y_resampled,
#     shuffle = True,
#     epochs = model_train_epochs,
#     verbose = 1
# )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

## Evaluate the Model Results

In [31]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(
    X_test_scaled,
    y_test,
    verbose = 2
)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 1.0219 - accuracy: 0.7255 - 43ms/epoch - 22ms/step
Loss: 1.0219465494155884, Accuracy: 0.7254902124404907


In [32]:
# Check the prediction's output probabilities
predicted_prob = nn.predict(X_test_scaled)
clean_prob = np.round(predicted_prob, 2)

# Round to the nearest integer and flatten
clean_predicted = np.round(predicted_prob).astype(int).flatten()

# Convert to a dataframe for readability
output_prob = pd.DataFrame({
    'Actual': y_test,
    'Predicted': clean_predicted,
    'Probability': clean_prob.flatten()
})

output_prob.head(10)



Unnamed: 0,Actual,Predicted,Probability
58,0,0,0.37
14,1,1,0.97
7,1,1,1.0
28,1,1,0.98
114,1,1,0.93
135,0,1,0.95
22,1,1,0.62
119,0,0,0.34
3,1,1,1.0
88,1,1,0.99


In [33]:
# Display the confusion matrix
cmatrix = confusion_matrix(y_test, clean_predicted)

# Convert the matrix to a DataFrame
cmatrix_df = pd.DataFrame(
    cmatrix,
    index = ["Actual 0", "Actual 1"],
    columns = ["Predicted 0", "Predicted 1"]
)

# Display the confusion matrix
print("Confusion Matrix:")
cmatrix_df

Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,6,8
Actual 1,6,31


In [34]:
# Print the classification report for the model
creport = classification_report(
    y_test,
    clean_predicted,
    target_names = ["Healthy (0)", "Pathological (1)"]
)

print("Classification Report:\n", creport)

Classification Report:
                   precision    recall  f1-score   support

     Healthy (0)       0.50      0.43      0.46        14
Pathological (1)       0.79      0.84      0.82        37

        accuracy                           0.73        51
       macro avg       0.65      0.63      0.64        51
    weighted avg       0.71      0.73      0.72        51



In [35]:
# Extract the precision and recall
precision = precision_score(y_test, clean_predicted, average=None)
recall = recall_score(y_test, clean_predicted, average=None)
print(precision, recall)

[0.5        0.79487179] [0.42857143 0.83783784]


## Save Results to Performance Tracker

In [36]:
# Create dictionary to save the results
results_dict = dict()

current_time = datetime.now()
results_dict['timestamp'] = current_time
results_dict['runtime'] = time.time() - start_time
results_dict['model_loss'] = model_loss
results_dict['model_accuracy'] = model_accuracy
results_dict['precision_0'] = precision[0]
results_dict['precision_1'] = precision[1]
results_dict['recall_0'] = recall[0]
results_dict['recall_1'] = recall[1]

In [37]:
# Print the model architecture
print(f"Input features: {number_input_features}")
print(f"Hidden Layer 0: {hidden_layer0_activation}, {hidden_layer0_neurons}")

# Populate results_dict with model details
results_dict['input_features'] = number_input_features
results_dict['num_layers'] = best_hyper.values['num_layers']
results_dict['hlayer_0_activation'] = hidden_layer0_activation
results_dict['hlayer_0_neurons'] = hidden_layer0_neurons
results_dict['kernel_regularizer'] = reg_kernel_string

for layer in range(1, total_hidden):
    activation = best_hyper.values[f'activation_layer_{layer}']
    neurons = best_hyper.values[f'units_layer_{layer}']
    print(f"Hidden Layer {layer}: {activation}, {neurons}")
    
    results_dict[f'hlayer_{layer}_activation'] = best_hyper.values[
        f'activation_layer_{layer}']
    results_dict[f'hlayer_{layer}_neurons'] = best_hyper.values[
        f'units_layer_{layer}']

results_dict['olayer_neurons'] = output_layer_neurons
results_dict['olayer_activation'] = output_layer_activation

# Populate results_dict with tuning details
results_dict['tuning_max_hidden'] = max_hidden_layers
results_dict['tuning_max_neurons'] = max_num_neurons
results_dict['tuning_step_count'] = step_count
results_dict['activation_functions'] = str(activation_functions)
results_dict['tuning_tuner_epochs'] = tuner_max_epochs
results_dict['tuning_search_epochs'] = search_max_epochs
results_dict['tuning_hp_iterations'] = hp_iterations

# Populate results_dict with compilation details
results_dict['compile_loss'] = compile_loss
results_dict['compile_optimizer'] = compile_opt

Input features: 33
Hidden Layer 0: softplus, 16
Hidden Layer 1: exponential, 36


In [38]:
# Change message
change_message = input("Changes from previous iteration: ")

# Append to results_dict
results_dict['change_message'] = change_message

Changes from previous iteration:  no change, run after notebook/database cleanup


In [39]:
# Display the dictionary
results_dict

{'timestamp': datetime.datetime(2024, 1, 7, 9, 27, 20, 380495),
 'runtime': 48.56920504570007,
 'model_loss': 1.0219465494155884,
 'model_accuracy': 0.7254902124404907,
 'precision_0': 0.5,
 'precision_1': 0.7948717948717948,
 'recall_0': 0.42857142857142855,
 'recall_1': 0.8378378378378378,
 'input_features': 33,
 'num_layers': 1,
 'hlayer_0_activation': 'softplus',
 'hlayer_0_neurons': 16,
 'kernel_regularizer': 'L1(0.010)',
 'hlayer_1_activation': 'exponential',
 'hlayer_1_neurons': 36,
 'olayer_neurons': 1,
 'olayer_activation': 'sigmoid',
 'tuning_max_hidden': 5,
 'tuning_max_neurons': 65,
 'tuning_step_count': 5,
 'activation_functions': "['relu', 'leaky_relu', 'tanh', 'elu', 'selu', 'exponential', 'softmax', 'softplus']",
 'tuning_tuner_epochs': 20,
 'tuning_search_epochs': 20,
 'tuning_hp_iterations': 2,
 'compile_loss': 'binary_crossentropy',
 'compile_optimizer': 'adam',
 'change_message': 'no change, run after notebook/database cleanup'}

In [40]:
# Convert the dictionary to a dataframe
results_df = pd.DataFrame(results_dict, index=[0])
results_df.head()

Unnamed: 0,timestamp,runtime,model_loss,model_accuracy,precision_0,precision_1,recall_0,recall_1,input_features,num_layers,...,tuning_max_hidden,tuning_max_neurons,tuning_step_count,activation_functions,tuning_tuner_epochs,tuning_search_epochs,tuning_hp_iterations,compile_loss,compile_optimizer,change_message
0,2024-01-07 09:27:20.380495,48.569205,1.021947,0.72549,0.5,0.794872,0.428571,0.837838,33,1,...,5,65,5,"['relu', 'leaky_relu', 'tanh', 'elu', 'selu', ...",20,20,2,binary_crossentropy,adam,"no change, run after notebook/database cleanup"


In [41]:
# Performance tracker
tracker_path = "../resources/tracker/dl_performance_tracker.csv"

# Model percentage
model_pct = round(model_accuracy, 3)

# Check if the CSV exists
if os.path.exists(tracker_path):
    
    # Read the existing CSV
    tracker_df = pd.read_csv(tracker_path)
    
    # Append the new row of data
    updated_df = pd.concat([tracker_df, results_df], ignore_index=True)
    
    # Update the CSV file
    updated_df.to_csv(tracker_path, index=False)
    
    # Export model to HDF5 file
    nn.save(f'../models/dl/run_{len(tracker_df)}_{model_pct}.h5', save_format='h5')

else:    
    # Export to CSV
    results_df.to_csv(tracker_path, index=False)
    
    # Export model to HDF5 file
    nn.save(f'../models/dl/run_0_{model_pct}.h5', save_format='h5')

  saving_api.save_model(


In [42]:
# Check the prediction's output probabilities
predicted_prob = nn.predict(X_test_scaled)
clean_prob = np.round(predicted_prob, 2)

# Round to the nearest integer and flatten
clean_predicted = np.round(predicted_prob).astype(int).flatten()

# Convert to a dataframe for readability
output_prob = pd.DataFrame({
    'Actual': y_test,
    'Predicted': clean_predicted,
    'Probability': clean_prob.flatten()
})
output_prob



Unnamed: 0,Actual,Predicted,Probability
58,0,0,0.37
14,1,1,0.97
7,1,1,1.0
28,1,1,0.98
114,1,1,0.93
135,0,1,0.95
22,1,1,0.62
119,0,0,0.34
3,1,1,1.0
88,1,1,0.99


In [43]:
output_prob.loc[output_prob['Actual'] != output_prob['Predicted']]

Unnamed: 0,Actual,Predicted,Probability
135,0,1,0.95
179,1,0,0.19
166,0,1,0.94
163,1,0,0.13
46,0,1,0.55
67,1,0,0.47
134,0,1,0.73
83,0,1,0.65
142,0,1,1.0
113,1,0,0.23
