# AlexNet - All Features - With 10 Crossfold Validation

## Set up

In [1]:
### Load necessary libraries ###
import glob
import os
import librosa
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout

import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

from librosa import display
import librosa

2022-12-01 20:17:28.561526: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
train_data = pd.read_csv("allFeaturesData/train_data.csv", header = None)
test_data = pd.read_csv("allFeaturesData/test_data.csv", header = None)
train_lab = pd.read_csv("allFeaturesData/train_labels.csv", header = None)
test_lab = pd.read_csv("allFeaturesData/test_labels.csv", header = None)

In [4]:
len(train_data)+len(test_data)

8732

In [5]:
X=pd.concat([train_data, test_data]) # .to_numpy().reshape(8732, 40, 5, 1)
Y=pd.concat([train_lab,test_lab]) # .to_numpy().reshape(8732,)

In [7]:
folds = pd.read_csv("allFeaturesData/All_folds.csv", header=None)
folds.columns = ['folds']
folds['folds'] = folds['folds'].astype('int')

In [8]:
### Train and evaluate via 10-Folds cross-validation ###
keras.backend.clear_session()
accuracies = []

for i in range(1, 11):  # 1-10 to match values in folds df
    # obtain train and test indices
    test_idx = list(np.where(folds['folds'] == i)[0]) # find all indices for fold i and set as test indices
    train_idx = list(np.setdiff1d(folds.index.to_numpy(), np.where(folds['folds'] == i)[0])) # everything except fold i as train indices

    # use train and test indices to create train and test x/y and reshape them for training our model
    x_train = X.iloc[train_idx].to_numpy().reshape(len(train_idx), 40, 5, 1)
    y_train = Y.iloc[train_idx].to_numpy().reshape(len(train_idx),)
    x_test = X.iloc[test_idx].to_numpy().reshape(len(test_idx), 40, 5, 1)
    y_test = Y.iloc[test_idx].to_numpy().reshape(len(test_idx),)

    # insert model architecture here (just put the alexnet for reference, will need to update this for each model)
    pool_size = (2, 2)
    kernel_size = (3, 3)
    input_shape = (40, 5, 1)
    num_classes = 10

    model = Sequential()
    model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_shape))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))
    model.add(Flatten())
    model.add(Dense(1024, activation = "tanh"))
    model.add(Dense(10, activation = "softmax"))

    optimizer = keras.optimizers.Adam(lr=1e-4)
    model.compile(optimizer = optimizer, loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])
    model.fit(x_train, y_train, epochs = 50, batch_size = 50, validation_data = (x_test, y_test))


    # add accuracy to our accuracies list for comparison
    accuracies.append(model.evaluate(x_test, y_test)[1])
    # index 0 is validation loss in final epoch, index 1 is validation accuracy in final epoch
    # if we try and look into other metrics, they may be in a different index and 
    # we would need ot make a separate list for those metrics

2022-12-01 20:27:05.737662: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(name, **kwargs)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

Above is a model that is mirroring AlexNet. Testing to see how well it performs with several features. In the other notebook, `categorical_crossentropy` is used, but in this notebook, I didn't change the Dependent variable into a one-hot-encoder matrix, so I am using `sparce_categorical_crossentropy` which accounts for this difference in the data.

In future iterations, I may change the the number of features in the filters, or perhaps use the tanh activation function at the end before `softmax` as this seemed to perform well in our first model (which is in the other notebook) which used only one feature.

In [9]:
fold_acc = pd.DataFrame(list(zip(range(1,11), accuracies)),
                        columns =['Folds', 'Validation Accuracy']) 

fold_acc

Unnamed: 0,Folds,Validation Accuracy
0,1,0.561283
1,2,0.60473
2,3,0.527568
3,4,0.632323
4,5,0.648504
5,6,0.608748
6,7,0.689737
7,8,0.598015
8,9,0.670343
9,10,0.647551


In [13]:
fold_acc["Validation Accuracy"].mean()

0.6188802480697632

Below are a few helper functions in case we want to extract features over certain windows or implement a 10 fold crossvalidation.

Resouces used:
 - https://github.com/AmritK10/Urban-Sound-Classification 
 - https://www.kaggle.com/code/vortexkol/alexnet-cnn-architecture-on-tensorflow-beginner?scriptVersionId=43150351&cellId=21

In [48]:
### Define convolutional network architecture ###
def get_network():
    num_filters = [24,32,64,128] 
    pool_size = (2, 2) 
    kernel_size = (3, 3)  
    input_shape = (60, 41, 2)
    num_classes = 10
    keras.backend.clear_session()
    
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(24, kernel_size,
                padding="same", input_shape=input_shape))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))
    model.add(keras.layers.MaxPooling2D(pool_size=pool_size))

    model.add(keras.layers.Conv2D(32, kernel_size,
                                  padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))  
    model.add(keras.layers.MaxPooling2D(pool_size=pool_size))
    
    model.add(keras.layers.Conv2D(64, kernel_size,
                                  padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))  
    model.add(keras.layers.MaxPooling2D(pool_size=pool_size))
    
    model.add(keras.layers.Conv2D(128, kernel_size,
                                  padding="same"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))  

    model.add(keras.layers.GlobalMaxPooling2D())
    model.add(keras.layers.Dense(128, activation="relu"))
    model.add(keras.layers.Dense(num_classes, activation="softmax"))

    model.compile(optimizer=keras.optimizers.Adam(1e-4), 
        loss=keras.losses.SparseCategoricalCrossentropy(), 
        metrics=["accuracy"])
    return model

In [None]:
### Train and evaluate via 10-Folds cross-validation ###
accuracies = []
folds = np.array(['fold1','fold2','fold3','fold4',
                  'fold5','fold6','fold7','fold8',
                  'fold9','fold10'])
load_dir = "UrbanSounds8K/processed/"
kf = KFold(n_splits=10)
for train_index, test_index in kf.split(folds):
    x_train, y_train = [], []
    for ind in train_index:
        # read features or segments of an audio file
        train_data = np.load("{0}/{1}.npz".format(load_dir,folds[ind]), 
                       allow_pickle=True)
        # for training stack all the segments so that they are treated as an example/instance
        features = np.concatenate(train_data["features"], axis=0) 
        labels = np.concatenate(train_data["labels"], axis=0)
        x_train.append(features)
        y_train.append(labels)
    # stack x,y pairs of all training folds 
    x_train = np.concatenate(x_train, axis = 0).astype(np.float32)
    y_train = np.concatenate(y_train, axis = 0).astype(np.float32)
    
    # for testing we will make predictions on each segment and average them to 
    # produce signle label for an entire sound clip.
    test_data = np.load("{0}/{1}.npz".format(load_dir,
                   folds[test_index][0]), allow_pickle=True)
    x_test = test_data["features"]
    y_test = test_data["labels"]

    model = get_network()
    model.fit(x_train, y_train, epochs = 50, batch_size = 24, verbose = 0)
    
    # evaluate on test set/fold
    y_true, y_pred = [], []
    for x, y in zip(x_test, y_test):
        # average predictions over segments of a sound clip
        avg_p = np.argmax(np.mean(model.predict(x), axis = 0))
        y_pred.append(avg_p) 
        # pick single label via np.unique for a sound clip
        y_true.append(np.unique(y)[0]) 
    accuracies.append(accuracy_score(y_true, y_pred))    
print("Average 10 Folds Accuracy: {0}".format(np.mean(accuracies)))