# Spectrograms - CNN Test

In [115]:
# Import dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

import sqlalchemy
from sqlalchemy import create_engine, inspect

import math
import numpy as np
import pandas as pd
import tensorflow as tf
import keras_tuner as kt
from pprint import pprint

import os
import time
from datetime import datetime

%run functions.ipynb

In [5]:
# Time the run
start_time = time.time()

## Import datasets

In [6]:
# Import the data
engine = create_engine("sqlite:///voice.sqlite")

# View all of the classes
inspector = inspect(engine)
table_names = inspector.get_table_names()
table_names

['aval',
 'bval',
 'demographic',
 'diagnosis',
 'gval',
 'habits',
 'rval',
 'spectrogram']

In [7]:
# Initialise a dictionary to hold dataframes
dataframes = dict()

# Loop through each table
for table in table_names:
    
    # Dataframe name
    df_name = f'{table}_df'
    
    # Create dataframe
    dataframes[df_name] = pd.read_sql(
        f'SELECT * FROM {table}',
        engine
    )

## Preprocessing

### Define the target variable

In [22]:
# Isolate the diagnosis column
y = dataframes['diagnosis_df']['diagnosis'].copy()

# Encode the target variable, ignore subtype
y = y.apply(encode_binary)
y

0      0
1      0
2      1
3      1
4      1
      ..
199    0
200    1
201    1
202    0
203    0
Name: diagnosis, Length: 204, dtype: int64

### Reshape feature variables

In [56]:
# Input shape
width_px = 225
height_px = 166
num_channels = 4 # since RGBA

# Define inputs
input_shape = (height_px, width_px, num_channels)
input_reshape = (height_px, width_px)

In [57]:
# Rename the dataframes
df_r = dataframes['rval_df']
df_g = dataframes['gval_df']
df_b = dataframes['bval_df']
df_a = dataframes['aval_df']

# Reshape each RGBA dataframe to original dimensions
data_r = np.array([df_r[col].values.reshape(input_reshape) for col in df_r.columns])
data_g = np.array([df_g[col].values.reshape(input_reshape) for col in df_g.columns])
data_b = np.array([df_b[col].values.reshape(input_reshape) for col in df_b.columns])
data_a = np.array([df_a[col].values.reshape(input_reshape) for col in df_a.columns])

# Define the feature variables
X = np.stack([
    data_r,
    data_g,
    data_b,
    data_a],
    axis = -1
)

# Display the first 3 for confirmation
X[:3]

array([[[[ 47,  17,  99, 255],
         [ 47,  17,  99, 255],
         [ 43,  16,  93, 255],
         ...,
         [ 35,  11,  70, 255],
         [ 47,  16,  90, 255],
         [ 49,  17,  93, 255]],

        [[ 45,  17,  97, 255],
         [ 45,  17,  98, 255],
         [ 41,  16,  90, 255],
         ...,
         [ 49,  15,  93, 255],
         [ 59,  16, 108, 255],
         [ 60,  15, 111, 255]],

        [[ 46,  17,  98, 255],
         [ 46,  17,  99, 255],
         [ 43,  16,  92, 255],
         ...,
         [ 29,  13,  68, 255],
         [ 51,  16,  99, 255],
         [ 54,  16, 104, 255]],

        ...,

        [[215,  69, 107, 255],
         [215,  69, 107, 255],
         [211,  67, 109, 255],
         ...,
         [ 76,  18, 120, 255],
         [184,  56, 115, 255],
         [201,  62, 114, 255]],

        [[235,  90,  96, 255],
         [235,  90,  96, 255],
         [231,  87,  98, 255],
         ...,
         [ 91,  20, 125, 255],
         [206,  70, 105, 255],
         

### Split and Scale

In [58]:
# Split the preprocessed data to training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

In [59]:
# Reshape the data
X_train_reshaped = X_train.reshape((
    X_train.shape[0], # total number of samples
    height_px * width_px * num_channels # total number flattened
))

X_test_reshaped = X_test.reshape((
    X_test.shape[0],
    height_px * width_px * num_channels
    ))
X_train_reshaped

array([[ 47,  17,  99, ..., 161, 120, 255],
       [ 47,  17,  99, ..., 162, 120, 255],
       [ 47,  17,  99, ..., 162, 120, 255],
       ...,
       [ 46,  17,  98, ..., 162, 120, 255],
       [ 47,  17,  99, ..., 162, 120, 255],
       [ 47,  17,  99, ..., 160, 119, 255]])

In [60]:
# Normalize training data to be between 0 and 1
X_scaler = MinMaxScaler()

# Scale the data
X_train_scaled = X_scaler.fit_transform(X_train_reshaped)
X_test_scaled = X_scaler.fit_transform(X_test_reshaped)

# Reshape the data back to the original
X_train_scaled = X_train_scaled.reshape((
    X_train_scaled.shape[0],
    height_px,
    width_px,
    num_channels
))
X_test_scaled = X_test_scaled.reshape((
    X_test_scaled.shape[0],
    height_px,
    width_px,
    num_channels
))

## Hyperparameter Tuning

In [233]:
# Define the model parameters
min_conv_layers = 2
max_conv_layers = 5

choose_conv_layers = list(np.arange(
    min_conv_layers,
    max_conv_layers + 1
))

choose_conv_filters = list(np.arange(3, 9))

# Define the kernel size choices, should be odd
choose_kernel_size = list(np.arange(3, 9, 2))
print(choose_kernel_size)

# Convolutional layer activation functions
activation_functions = [
    'relu', 'leaky_relu', 'tanh',
    'elu', 'selu', 'exponential',
    'softmax', 'softplus'
]

# Define the max number of dense layers
max_dense_layers = 2
min_dense_neurons = 2 ** 3
max_dense_neurons = 2 ** 7

# Define tuner parameters
tuner_max_epochs = 10
search_max_epochs = 10
hp_iterations = 2

[3, 5, 7]


In [268]:
def create_cnn_model(hp):
    cnn_model = Sequential()
    
    # Choose the number of convolutional layers
    options_conv_layers = [int(value) for value in choose_conv_layers]
    num_conv_layers = hp.Choice(
        'num_conv_layers',
        values = options_conv_layers
    )
    print(f'Number of convolutional layers: ', num_conv_layers)
    
    # Choose the number of filters per layer
    options_conv_filters = [int(2 ** value) for value in choose_conv_filters]
    print(options_conv_filters)
    
    options_kernel = [int(value) for value in choose_kernel_size]
    
    
    
    # Create convolutional layers
    for i in range(1, num_conv_layers + 1):
        print(i)
        
        if (i == 1):
            filters = hp.Choice(
                'filters_layer_1',
                values = options_conv_filters
            )
            
            first_index = options_conv_filters.index(filters)
            print(f'Index is: {first_index}, {filters}')
            
        else:
            next_index = first_index + i - 1
            filters = hp.Choice(
                f'filters_layer_{i}',
                values = [options_conv_filters[next_index]]
            )
        
        # Choose the kernel size per layer
        kernel_size = hp.Choice(
            f'conv_kernel_size_{i}',
            values = options_kernel
        )
        
        # Choose the activation function per layer
        activation = hp.Choice(
            f'conv_activation_{i}',
            activation_functions
        )
        
        # Add the first convolutional layer
        if (i == 1):
            
        cnn_model.add(Conv2D(
            filters = filters,
            kernel_size = (kernel_size, kernel_size),
            activation = activation,
            input_shape = (
                height_px,
                width_px,
                num_channels) if i == 1 else None
        ))

In [269]:
# Initialise the Hyperband tuner
tuner = kt.Hyperband(
    create_cnn_model,
    objective = "val_accuracy",
    max_epochs = tuner_max_epochs,
    hyperband_iterations = hp_iterations
)

Number of convolutional layers:  2
[8, 16, 32, 64, 128, 256]
1
Index is: 0, 8
2


TypeError: 'NoneType' object is not iterable

In [131]:
# Find the best hyperparameters
tuner.search(
    X_train_scaled,
    y_train,
    epochs = search_max_epochs,
    validation_data = (X_test_scaled, y_test)
)


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
4                 |4                 |num_conv_layers
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

4


FatalTypeError: Expected the model-building function, or HyperModel.build() to return a valid Keras Model instance. Received: None of type <class 'NoneType'>.

In [177]:
# Define the CNN model
model = Sequential()

# Add convolutional layers
model.add(Conv2D(
    32,
    (3, 3),
    activation = 'relu',
    input_shape = (height_px, width_px, num_channels)
))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(
    64,
    (3, 3),
    activation = 'relu'
))

model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(
    128,
    (3, 3),
    activation='relu'
))

model.add(MaxPooling2D((2, 2)))

# Flatten the output before feeding into the fully connected layers
model.add(Flatten())

# Add dense layers for classification
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy'])

# Train the model
model.fit(
    X_train_scaled,
    y_train,
    epochs = 15,
    shuffle = True, # reduce risk of overfitting
    verbose = 1
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x350848190>

In [178]:
# Evaluate the model using the test data
model_loss, model_accuracy = model.evaluate(
    X_test_scaled,
    y_test,
    verbose = 2
)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.8180 - accuracy: 0.6667 - 163ms/epoch - 81ms/step
Loss: 0.8179723024368286, Accuracy: 0.6666666865348816
