## New CNN Notebook

In [1]:
import sys
import os
import zipfile
import numpy as np
import numpy.matlib as npm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import scipy.io as sio
import pandas as pd
import numpy as np
import warnings
import itertools

from scipy.signal import butter, filtfilt

In [2]:
from sklearn.model_selection import KFold 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.utils import shuffle

import tensorflow as tf
from tensorflow import keras

from keras.layers import Dense, LSTM, Input
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, BatchNormalization
from keras.layers import Input,Flatten, Dense
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core import Dropout, Activation
from keras.layers.pooling import GlobalAveragePooling2D
from keras.models import Model
from keras.models import load_model
from keras import optimizers
from keras.losses import categorical_crossentropy
from keras.layers import Dense, Activation, Flatten, Dropout, Conv2D, BatchNormalization
from keras.utils.np_utils import to_categorical
from keras import initializers, regularizers

## Helper Functions

In [3]:
def butter_bandpass_filter(data, lowcut, highcut, sample_rate, order):
    '''
    Returns bandpass filtered data between the frequency ranges specified in the input.
    Args:
        data (numpy.ndarray): array of samples. 
        lowcut (float): lower cutoff frequency (Hz).
        highcut (float): lower cutoff frequency (Hz).
        sample_rate (float): sampling rate (Hz).
        order (int): order of the bandpass filter.
    Returns:
        (numpy.ndarray): bandpass filtered data.
    '''
    
    nyq = 0.5 * sample_rate
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    y = filtfilt(b, a, data)
    return y

In [4]:
def buffer(data, duration, data_overlap):
    '''
    Returns segmented data based on the provided input window duration and overlap.
    Args:
        data (numpy.ndarray): array of samples. 
        duration (int): window length (number of samples).
        data_overlap (int): number of samples of overlap.
    Returns:
        (numpy.ndarray): segmented data of shape (number_of_segments, duration).
    '''
    
    number_segments = int(np.ceil((len(data) - data_overlap)/(duration - data_overlap)))
    temp_buf = [data[i:i+duration] for i in range(0, len(data), (duration - int(data_overlap)))]
    temp_buf[number_segments-1] = np.pad(temp_buf[number_segments-1],
                                         (0, duration-temp_buf[number_segments-1].shape[0]),
                                         'constant')
    segmented_data = np.vstack(temp_buf[0:number_segments])
    
    return segmented_data

In [5]:
def ingest_eeg(csvname, flicker_freq):

    # Establishing what flicker frequencies are present
    flicker_freq_dict = dict()    

    path = os.path.split(os.getcwd())[0] + '/data/' + csvname + '.csv'

    df = pd.read_csv(path)

    #count value for zero cases
    count = 0

    # Adding row keys of relevant frequencies from dataframe 
    for i, freq_point in enumerate(df['Frequency']):
        if not np.isnan(freq_point): 
            
            #zero case
            if freq_point == 0 and count < (1 + num_stims):
                count +=1

            elif freq_point == 0 and count == (1 + num_stims):

                if freq_point not in flicker_freq_dict.keys():
                    flicker_freq_dict.update({freq_point: [i]})
                else:
                    flicker_freq_dict[freq_point].append(i)

                count = 1
            
            #normal case  
            elif freq_point not in flicker_freq_dict.keys():
                flicker_freq_dict.update({freq_point: [i]})
            else:
                flicker_freq_dict[freq_point].append(i)

    flicker_freq = np.array(list(flicker_freq_dict.keys()))
    flicker_freq.sort()

    # Formatting the eeg data -> making the appropriate matrix
    # Initializing the dimensions of the eeg matrix

    num_classes = len(flicker_freq) 
    n_ch = 8 
    total_trial_len = 1114 

    #scales to number of trials in csv for each freq
    num_trials = min(len(flicker_freq_dict[key]) for key in flicker_freq)

    #instantiates eeg data in 4 dimensional np array
    eeg = np.zeros((num_classes,n_ch,total_trial_len,num_trials))

    # Assigning the correct values to the matrix/object

    start_idx_list = []


    #grabs start and endpoints for each frequency flash
    for i, freq in enumerate(flicker_freq):
        for j in range(num_trials):
            start_idx = flicker_freq_dict[freq][j]
            start_idx_list.append(start_idx)
            end_idx = start_idx + total_trial_len

            #shaves off timestamps and markers and does a transpose, we transpose it back and cast as a np array
            eeg[i, :, :, j] = np.array(df.iloc[start_idx:end_idx, 1:9]).transpose((1,0))

    return eeg, flicker_freq


In [6]:
#no overlapping buffer, were keeping it simple for now (ask aravind later)
def get_filtered_eeg(eeg, lowcut, highcut, order, sample_rate):
    '''
    Returns bandpass filtered eeg for all channels and trials.
    Args:
        eeg (numpy.ndarray): raw eeg data of shape (num_classes, num_channels, num_samples, num_trials).
        lowcut (float): lower cutoff frequency (Hz).
        highcut (float): lower cutoff frequency (Hz).
        order (int): order of the bandpass filter.
        sample_rate (float): sampling rate (Hz).
    Returns:
        (numpy.ndarray): bandpass filtered eeg of shape (num_classes, num_channels, num_samples, num_trials).
    '''
    
    num_classes = eeg.shape[0]
    num_chan = eeg.shape[1]
    total_trial_len = eeg.shape[2]
    num_trials = eeg.shape[3]

    #instantiate object to be sent to BP filter
    filtered_data = np.zeros((eeg.shape[0], eeg.shape[1], total_trial_len, eeg.shape[3]))

    
    for target in range(0, num_classes):
        for channel in range(0, num_chan):
            for trial in range(0, num_trials):
                
                #data to be filtered
                signal_to_filter = np.squeeze( eeg[target, channel, 0:total_trial_len, trial] )
                
                #call to BP filter
                filtered_data[target, channel, :, trial] = butter_bandpass_filter(signal_to_filter, 
                                                                                  lowcut, highcut, 
                                                                                  sample_rate, order)
                
    return filtered_data

In [7]:
def get_segmented_epochs(data, window_len, shift_len, sample_rate):
    '''
    Returns epoched eeg data based on the window duration and step size.
    Args:
        data (numpy.ndarray): array of samples. 
        window_len (int): window length (seconds).
        shift_len (int): step size (seconds).
        sample_rate (float): sampling rate (Hz).
    Returns:
        (numpy.ndarray): epoched eeg data of shape. 
        (num_classes, num_channels, num_trials, number_of_segments, duration).
    '''
    
    num_classes = data.shape[0]
    num_chan = data.shape[1]
    num_trials = data.shape[3]
    
    duration = int(window_len*sample_rate)
    data_overlap = (window_len - shift_len)*sample_rate
    
    #number_of_segments = int(np.ceil((data.shape[2] - data_overlap)/
                                #       (duration - data_overlap)))

    number_of_segments = 4 

    print('number of segments')
    print(number_of_segments)
    
    segmented_data = np.zeros((data.shape[0], data.shape[1], 
                               data.shape[3], number_of_segments, duration))

    for target in range(0, num_classes):
        for channel in range(0, num_chan):
            for trial in range(0, num_trials):
                segmented_data[target, channel, trial, :, :] = buffer(data[target, channel, :, trial], 
                                                                      duration, data_overlap) 
    
    return segmented_data

In [8]:
def get_magnitude_spectrum_features(segmented_data, FFT_PARAMS):
    '''
    Returns magnitude spectrum features. Fast Fourier Transform computed based on
    the FFT parameters provided as input.

    Args:
        segmented_data (numpy.ndarray): epoched eeg data of shape 
        (num_classes, num_channels, num_trials, number_of_segments, num_samples).
        FFT_PARAMS (dict): dictionary of parameters used for feature extraction.
        FFT_PARAMS['resolution'] (float): frequency resolution per bin (Hz).
        FFT_PARAMS['start_frequency'] (float): start frequency component to pick from (Hz). 
        FFT_PARAMS['end_frequency'] (float): end frequency component to pick upto (Hz). 
        FFT_PARAMS['sampling_rate'] (float): sampling rate (Hz).

    Returns:
        (numpy.ndarray): magnitude spectrum features of the input EEG.
        (n_fc, num_channels, num_classes, num_trials, number_of_segments).
    '''
    
    num_classes = segmented_data.shape[0]
    num_chan = segmented_data.shape[1]
    num_trials = segmented_data.shape[2]
    number_of_segments = segmented_data.shape[3]
    fft_len = segmented_data[0, 0, 0, 0, :].shape[0]

    NFFT = round(FFT_PARAMS['sampling_rate']/FFT_PARAMS['resolution'])
    fft_index_start = int(round(FFT_PARAMS['start_frequency']/FFT_PARAMS['resolution']))
    fft_index_end = int(round(FFT_PARAMS['end_frequency']/FFT_PARAMS['resolution']))+1

    features_data = np.zeros(((fft_index_end - fft_index_start), 
                              segmented_data.shape[1], segmented_data.shape[0], 
                              segmented_data.shape[2], segmented_data.shape[3]))
    
    for target in range(0, num_classes):
        for channel in range(0, num_chan):
            for trial in range(0, num_trials):
                for segment in range(0, number_of_segments):
                    temp_FFT = np.fft.fft(segmented_data[target, channel, trial, segment, :], NFFT)/fft_len
                    magnitude_spectrum = 2*np.abs(temp_FFT)
                    features_data[:, channel, target, trial, segment] = magnitude_spectrum[fft_index_start:fft_index_end,]
    
    return features_data

In [9]:
def complex_spectrum_features(segmented_data, FFT_PARAMS):
    '''
    Returns complex spectrum features. Fast Fourier Transform computed based on
    the FFT parameters provided as input. The real and imaginary parts of the input
    signal are concatenated into a single feature vector.

    Args:
        segmented_data (numpy.ndarray): epoched eeg data of shape 
        (num_classes, num_channels, num_trials, number_of_segments, num_samples).
        FFT_PARAMS (dict): dictionary of parameters used for feature extraction.
        FFT_PARAMS['resolution'] (float): frequency resolution per bin (Hz).
        FFT_PARAMS['start_frequency'] (float): start frequency component to pick from (Hz). 
        FFT_PARAMS['end_frequency'] (float): end frequency component to pick upto (Hz). 
        FFT_PARAMS['sampling_rate'] (float): sampling rate (Hz).

    Returns:
        (numpy.ndarray): complex spectrum features of the input EEG.
        (2*n_fc, num_channels, num_classes, num_trials, number_of_segments)
    '''
    
    num_classes = segmented_data.shape[0]
    num_chan = segmented_data.shape[1]
    num_trials = segmented_data.shape[2]
    number_of_segments = segmented_data.shape[3]
    fft_len = segmented_data[0, 0, 0, 0, :].shape[0]

    NFFT = round(FFT_PARAMS['sampling_rate']/FFT_PARAMS['resolution'])
    fft_index_start = int(round(FFT_PARAMS['start_frequency']/FFT_PARAMS['resolution']))
    fft_index_end = int(round(FFT_PARAMS['end_frequency']/FFT_PARAMS['resolution']))+1

    features_data = np.zeros((2*(fft_index_end - fft_index_start), 
                              segmented_data.shape[1], segmented_data.shape[0], 
                              segmented_data.shape[2], segmented_data.shape[3]))
    
    for target in range(0, num_classes):
        for channel in range(0, num_chan):
            for trial in range(0, num_trials):
                for segment in range(0, number_of_segments):
                    temp_FFT = np.fft.fft(segmented_data[target, channel, trial, segment, :], NFFT)/fft_len
                    real_part = np.real(temp_FFT)
                    imag_part = np.imag(temp_FFT)
                    features_data[:, channel, target, trial, segment] = np.concatenate((
                        real_part[fft_index_start:fft_index_end,], 
                        imag_part[fft_index_start:fft_index_end,]), axis=0)
    
    return features_data

In [10]:
def CNN_model(input_shape, CNN_PARAMS):
    '''
    Returns the Concolutional Neural Network model for SSVEP classification.

    Args:
        input_shape (numpy.ndarray): shape of input training data 
        e.g. [num_training_examples, num_channels, n_fc] or [num_training_examples, num_channels, 2*n_fc].
        CNN_PARAMS (dict): dictionary of parameters used for feature extraction.        
        CNN_PARAMS['batch_size'] (int): training mini batch size.
        CNN_PARAMS['epochs'] (int): total number of training epochs/iterations.
        CNN_PARAMS['droprate'] (float): dropout ratio.
        CNN_PARAMS['learning_rate'] (float): model learning rate.
        CNN_PARAMS['lr_decay'] (float): learning rate decay ratio.
        CNN_PARAMS['l2_lambda'] (float): l2 regularization parameter.
        CNN_PARAMS['momentum'] (float): momentum term for stochastic gradient descent optimization.
        CNN_PARAMS['kernel_f'] (int): 1D kernel to operate on conv_1 layer for the SSVEP CNN. 
        CNN_PARAMS['n_ch'] (int): number of eeg channels
        CNN_PARAMS['num_classes'] (int): number of SSVEP targets/classes

    Returns:
        (keras.Sequential): CNN model.
    '''
    
    model = Sequential()
    model.add(Conv2D(2*CNN_PARAMS['n_ch'], kernel_size=(CNN_PARAMS['n_ch'], 1), 
                     input_shape=(input_shape[0], input_shape[1], input_shape[2]), 
                     padding="valid", kernel_regularizer=regularizers.l2(CNN_PARAMS['l2_lambda']), 
                     kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(CNN_PARAMS['droprate']))  
    model.add(Conv2D(2*CNN_PARAMS['n_ch'], kernel_size=(1, CNN_PARAMS['kernel_f']), 
                     kernel_regularizer=regularizers.l2(CNN_PARAMS['l2_lambda']), padding="valid", 
                     kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(CNN_PARAMS['droprate']))  
    model.add(Flatten())
    model.add(Dense(CNN_PARAMS['num_classes'], activation='softmax', 
                    kernel_regularizer=regularizers.l2(CNN_PARAMS['l2_lambda']), 
                    kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=None)))
    
    return model

## PARAMETERS

In [11]:
# define params
CNN_PARAMS = {
    'batch_size': 64,
    'epochs': 250,
    'droprate': 0.25,
    'learning_rate': 0.001,
    'lr_decay': 0.0,
    'l2_lambda': 0.0001,
    'momentum': 0.9,
    'kernel_f': 10,
    'n_ch': 8, 
    'num_classes': 5} # can be changed

all_acc = np.zeros((10, 1))


In [12]:
# Setting up some completely fixed parameters
FFT_PARAMS = {
    'resolution': 0.2930,
    'start_frequency': 3.0,
    'end_frequency': 35.0,
    'sampling_rate': 250
}

magnitude_spectrum_features = dict()

flicker_freq = []

#change this depending on the number of stimuli in the data
num_stims = 4

#window and shift in time (seconds)

#left at 4.456 seconds (total time of one trail for one class) by default for no overlapping segments, only one segment
# in mikas notebook these were both set to 1s
window_len = 4.456
shift_len = 4.456

sample_rate = FFT_PARAMS['sampling_rate']

## DATA Ingestion

In [13]:
#REPLACE CSV NAME WITH CSV OF INTEREST

#---> gets data from mind-speech-interface-ssvep/eeg_ai_layer/data/

csvnames = ['174_2022_159090', '174_2022_445753', '174_2022_538724'] #Bryan

#csvnames = ['174_2022_040508', '174_2022_123780', '174_2022_729377'] #Chris

#csvnames = ['173_2022_515272'] #Avery

eeg1, flicker_freq1 = ingest_eeg(csvnames[0], flicker_freq)
eeg2, flicker_freq2 = ingest_eeg(csvnames[1], flicker_freq)
eeg3, flicker_freq3 = ingest_eeg(csvnames[2], flicker_freq)

eeg = np.concatenate((eeg1, eeg2, eeg3), axis=3)

#combines epoched data across all csvs along trials axis
eeg.shape

(5, 8, 1114, 15)

## DATA Filtering

In [14]:
#wrapper function for EEG data filtering with 4th order BP
filtered_data = get_filtered_eeg(eeg, 9, 17, 4, sample_rate)
filtered_data.shape #(classes, channels, # of samples, # of trials)

(5, 8, 1114, 15)

# Feature Extraction 

In [15]:
print(flicker_freq1)
print(flicker_freq2)
print(flicker_freq3)

flicker_freq = flicker_freq1
 

[ 0.   10.25 11.75 12.75 14.75]
[ 0.   10.25 11.75 12.75 14.75]
[ 0.   10.25 11.75 12.75 14.75]


## DATA Manufacturing via WINDOW length & SHIFT length

In [16]:
# !!!!!!!!!!! set to full window size and no shift by default, change params to change this

segmented_data = get_segmented_epochs(filtered_data, window_len, shift_len, sample_rate)
segmented_data.shape #(classes, channels, trials, number of windowed segments, total number of samples)

number of segments
4


(5, 8, 15, 4, 1114)

In [17]:
#segmented_data = np.transpose(segmented_data, [3,0, 1, 4, 2])
#segmented_data.shape 

In [18]:
for i in segmented_data:
    print(i)

[[[[ 0.17261569  3.27745455  6.09409511 ...  1.57613305  1.29929393
     0.8893768 ]
   [ 0.17261569  3.27745455  6.09409511 ...  1.57613305  1.29929393
     0.8893768 ]
   [ 0.17261569  3.27745455  6.09409511 ...  1.57613305  1.29929393
     0.8893768 ]
   [ 0.17261569  3.27745455  6.09409511 ...  1.57613305  1.29929393
     0.8893768 ]]

  [[-0.28652598 -3.67091128 -6.80085691 ... -4.12446173 -3.18477436
    -2.04069937]
   [-0.28652598 -3.67091128 -6.80085691 ... -4.12446173 -3.18477436
    -2.04069937]
   [-0.28652598 -3.67091128 -6.80085691 ... -4.12446173 -3.18477436
    -2.04069937]
   [-0.28652598 -3.67091128 -6.80085691 ... -4.12446173 -3.18477436
    -2.04069937]]

  [[-0.2235469  -0.97714118 -1.65935977 ...  2.59495416  1.62835943
     0.5862773 ]
   [-0.2235469  -0.97714118 -1.65935977 ...  2.59495416  1.62835943
     0.5862773 ]
   [-0.2235469  -0.97714118 -1.65935977 ...  2.59495416  1.62835943
     0.5862773 ]
   [-0.2235469  -0.97714118 -1.65935977 ...  2.59495416  1.62

In [19]:
segmented_data[0].shape

(8, 15, 4, 1114)

In [20]:
#magnitude_spectrum_features= magnitude_spectrum_features(segmented_data, FFT_PARAMS)



for i in range(len(segmented_data)):
    magnitude_spectrum_features[i] = get_magnitude_spectrum_features(segmented_data, FFT_PARAMS)



In [21]:
print(magnitude_spectrum_features[i].shape)

(110, 8, 5, 15, 4)


# DATA Train/Validation Split 

In [22]:
mcnn_training_data = dict()
mcnn_results = dict()

In [23]:
#function to get training data

def get_training_data(features_data):
    print(features_data.shape[3])
    features_data = np.reshape(features_data, (features_data.shape[0], features_data.shape[1], 
                                               features_data.shape[2], 
                                               features_data.shape[3]*features_data.shape[4]))

    train_data = features_data[:, :, 0, :].T
    for target in range(1, features_data.shape[2]):
        train_data = np.vstack([train_data, np.squeeze(features_data[:, :, target, :]).T])

    train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1], 
                                         train_data.shape[2], 1))
    print('train data shape:')
    print(train_data.shape)
    total_epochs_per_class = features_data.shape[3] 
    print('features data shape')
    print(features_data.shape)
    print('total epochs per class')
    print(total_epochs_per_class)
    features_data = []
    class_labels = np.arange(CNN_PARAMS['num_classes'])
    
    print(class_labels)
    labels = (npm.repmat(class_labels, total_epochs_per_class, 1).T).ravel()
    print('len labels')
    print(len(labels))
    labels = to_categorical(labels)
  
    print(labels)
    
    return train_data, labels

In [24]:
mcnn_training_data = dict()
mcnn_results = dict()

for i in range(len(magnitude_spectrum_features.keys())):
    mcnn_training_data[i] = dict()
    print(magnitude_spectrum_features[i].shape)
   
    train_data, labels = get_training_data(magnitude_spectrum_features[i])
    mcnn_training_data[i]['train_data'] = train_data
    #print(len(train_data))
    #print(len(labels))
    mcnn_training_data[i]['label'] = labels
    
   

(110, 8, 5, 15, 4)
15
train data shape:
(300, 8, 110, 1)
features data shape
(110, 8, 5, 60)
total epochs per class
60
[0 1 2 3 4]
len labels
300
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]]
(110, 8, 5, 15, 4)
15
train data shape:
(300, 8, 110, 1)
features data shape
(110, 8, 5, 60)
total epochs per class
60
[0 1 2 3 4]
len labels
300
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]]
(110, 8, 5, 15, 4)
15
train data shape:
(300, 8, 110, 1)
features data shape
(110, 8, 5, 60)
total epochs per class
60
[0 1 2 3 4]
len labels
300
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 1.]]
(110, 8, 5, 15, 4)
15
train data shape:
(300, 8, 110, 1)
features data shape
(110, 8, 5, 60)
total epochs per class
60
[0 1 2 3 4]
len labels
300
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [0. 0. 0. 0. 1

In [25]:
mcnn_training_data[0]['train_data'].shape

(300, 8, 110, 1)

In [26]:
mcnn_training_data[0]['label'].shape

(300, 5)

In [27]:

def split_train_test(data, labels):
   
   """Splits data into training and validation set"""
   n_data, n_labels = shuffle(data,labels, random_state = 0)

   split_index = int(np.round(len(n_data)*0.8) )
   print(split_index)
   

   train_data = n_data[:split_index]
   test_data = n_data[split_index:]

   train_labels = n_labels[:split_index]
   test_labels = n_labels[split_index:]

   return (train_data, test_data, train_labels, test_labels) 


In [28]:
train_data, test_data, train_labels, test_labels = split_train_test(mcnn_training_data[0]['train_data'], mcnn_training_data[0]['label'])

240


In [29]:
train_data.shape 

(240, 8, 110, 1)

In [30]:
train_labels.shape

(240, 5)

# Training CNN Model

In [31]:
model = load_model('./CNN_files_OLD/model.h5') #*make sure h5 model is named "model.h5" and in working directory

In [32]:
model.summary(line_length = 100)

Model: "sequential_100"
____________________________________________________________________________________________________
 Layer (type)                                Output Shape                            Param #        
 conv2d_200 (Conv2D)                         (None, 1, 110, 16)                      144            
                                                                                                    
 batch_normalization_200 (BatchNormalization  (None, 1, 110, 16)                     64             
 )                                                                                                  
                                                                                                    
 activation_200 (Activation)                 (None, 1, 110, 16)                      0              
                                                                                                    
 dropout_200 (Dropout)                       (None, 1, 110, 16)    

In [33]:
from tensorflow.keras import layers 

In [34]:
# Build new model
new_model = Sequential()


for layer in model.layers[:-1]: # go through until last layer
    print(layer)
    new_model.add(layer)
    
#new_model.add(Dense(13, activation='softmax'))


new_model.add(Dense(5, activation='softmax'))
new_model.summary()

<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020168482020>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x0000020168483AF0>
<keras.layers.core.activation.Activation object at 0x0000020168482200>
<keras.layers.regularization.dropout.Dropout object at 0x00000201684836D0>
<keras.layers.convolutional.conv2d.Conv2D object at 0x0000020168483E80>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x0000020168483E50>
<keras.layers.core.activation.Activation object at 0x00000201687C4DC0>
<keras.layers.regularization.dropout.Dropout object at 0x00000201687C4DF0>
<keras.layers.reshaping.flatten.Flatten object at 0x00000201687C7550>
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_200 (Conv2D)         (None, 1, 110, 16)        144       
                                                                 
 batch_normaliza

In [35]:
for layer in new_model.layers[:-1]:
  layer.trainable = False 


## Compile The New Model

In [36]:
new_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [37]:
history = new_model.fit(train_data, train_labels, epochs = 100, validation_data = (test_data, test_labels))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [38]:
test_data.shape

(60, 8, 110, 1)

# Model Metrics and Evaluation

## Making Predictions

In [39]:
preds = new_model.predict(test_data)
preds.shape



(60, 5)

In [40]:
new_preds = []
for i in range(0, len(preds)):
  new_preds.append(np.argmax(preds[i]))

In [41]:
new_preds

[2,
 3,
 1,
 0,
 2,
 4,
 3,
 0,
 1,
 2,
 1,
 0,
 4,
 3,
 0,
 2,
 0,
 4,
 2,
 0,
 3,
 2,
 2,
 4,
 3,
 0,
 0,
 2,
 3,
 4,
 4,
 2,
 4,
 2,
 1,
 3,
 4,
 1,
 4,
 1,
 0,
 2,
 4,
 2,
 4,
 0,
 1,
 1,
 1,
 1,
 4,
 4,
 3,
 0,
 1,
 4,
 1,
 1,
 0,
 2]

In [42]:
g = np.argmax(test_labels, axis = -1)


In [44]:
def calculate_results(y_true, y_pred):
  """
  Calculates model accuracy, precision, recall and f1 score of a binary classification model.
  Args:
      y_true: true labels in the form of a 1D array
      y_pred: predicted labels in the form of a 1D array
  Returns a dictionary of accuracy, precision, recall, f1-score.
  """
  # Calculate model accuracy
  model_accuracy = accuracy_score(y_true, y_pred) * 100
  # Calculate model precision, recall and f1 score using "weighted average
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": model_accuracy,
                  "precision": model_precision,
                  "recall": model_recall,
                  "f1": model_f1}
  return model_results

In [45]:
calculate_results(g, new_preds)

{'accuracy': 95.0,
 'precision': 0.9615384615384616,
 'recall': 0.95,
 'f1': 0.9493135011441648}

In [46]:
new_model.save('4_freq_model.h5')

## Evaluating performance and getting metrics

In [48]:
magnitude_spectrum_features.shape

AttributeError: 'dict' object has no attribute 'shape'

In [None]:
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15, norm=False, savefig=False): 
  """Makes a labelled confusion matrix comparing predictions and ground truth labels.
  If classes is passed, confusion matrix will be labelled, if not, integer class values
  will be used.
  Args:
    y_true: Array of truth labels (must be same shape as y_pred).
    y_pred: Array of predicted labels (must be same shape as y_true).
    classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
    figsize: Size of output figure (default=(10, 10)).
    text_size: Size of output figure text (default=15).
    norm: normalize values or not (default=False).
    savefig: save confusion matrix to file (default=False).
  
  Returns:
    A labelled confusion matrix plot comparing y_true and y_pred.
  Example usage:
    make_confusion_matrix(y_true=test_labels, # ground truth test labels
                          y_pred=y_preds, # predicted labels
                          classes=class_names, # array of class label names
                          figsize=(15, 15),
                          text_size=10)
  """  
  # Create the confustion matrix
  cm = confusion_matrix(y_true, y_pred)
  cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
  n_classes = cm.shape[0] # find the number of classes we're dealing with

  # Plot the figure and make it pretty
  fig, ax = plt.subplots(figsize=figsize)
  cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
  fig.colorbar(cax)

  # Are there a list of classes?
  if classes:
    labels = classes
  else:
    labels = np.arange(cm.shape[0])
  
  # Label the axes
  ax.set(title="Confusion Matrix",
         xlabel="Predicted label",
         ylabel="True label",
         xticks=np.arange(n_classes), # create enough axis slots for each class
         yticks=np.arange(n_classes), 
         xticklabels=labels, # axes will labeled with class names (if they exist) or ints
         yticklabels=labels)
  
  # Make x-axis labels appear on bottom
  ax.xaxis.set_label_position("bottom")
  ax.xaxis.tick_bottom()

  # Set the threshold for different colors
  threshold = (cm.max() + cm.min()) / 2.

  # Plot the text on each cell
  for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    if norm:
      plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)
    else:
      plt.text(j, i, f"{cm[i, j]}",
              horizontalalignment="center",
              color="white" if cm[i, j] > threshold else "black",
              size=text_size)

  # Save the figure to the current working directory
  if savefig:
    fig.savefig("confusion_matrix.png")

In [None]:
def calculate_results(y_true, y_pred):
  """
  Calculates model accuracy, precision, recall and f1 score of a binary classification model.
  Args:
      y_true: true labels in the form of a 1D array
      y_pred: predicted labels in the form of a 1D array
  Returns a dictionary of accuracy, precision, recall, f1-score.
  """
  # Calculate model accuracy
  model_accuracy = accuracy_score(y_true, y_pred) * 100
  # Calculate model precision, recall and f1 score using "weighted average
  model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(y_true, y_pred, average="weighted")
  model_results = {"accuracy": model_accuracy,
                  "precision": model_precision,
                  "recall": model_recall,
                  "f1": model_f1}
  return model_results