### File:           Project_Model  
  
### Authors:        Brooke McWilliams, James Birch  
  
### Date Created:   11/19/2023  
  
### Last Modified:  12/04/2023  
  
### Description:    Strip features out of audio files using the librosa library and perform CNN modeling using the tensors and keras libraries  
<br>
<br>
<br>


# Audio Preprocessing

In [13]:
import os
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow

Walk through the Crema directory and read each audio file  
Extract different features from the file for training  

In [14]:
path = "./Crema Dataset/"

labels = []
wav_data = []

for file in os.listdir(path):
    if file.endswith(".wav"):               
        file_path = os.path.join(path, file)
        data, sr = librosa.load(file_path, sr=None)
        label = (file.split('_')[2])
        
        if label == 'ANG':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'SAD':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'DIS':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'FEA':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'HAP':
            y_s = librosa.effects.time_stretch(data, rate=1.3)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'NEU':
            y_s = librosa.effects.time_stretch(data, rate=1.3)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        
    wav_data.append([file, label, MEL_Feature])


# CNN Modeling

In [15]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv1D, BatchNormalization, Dropout, MaxPooling1D, Flatten, Dense
from keras.optimizers import Adam, RMSprop
from keras.utils import to_categorical
from keras.regularizers import l1, l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Build the CNN model for training  

In [27]:
def build_model(X_train, y_train, X_test, y_test, num_label):
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=3, padding='same', activation='relu', input_shape=(128,128)))

    
    model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same'))
    model.add(Dropout(0.2))
    
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same'))
    model.add(Dropout(0.2))
    
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same'))

    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(num_label, activation='softmax'))

    model.summary()

    opt = Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

Split the data into training and testing and encode the labels

In [28]:
X = [item[2] for item in wav_data]
y = [item[1] for item in wav_data]

X = np.array(X)
y = np.array(y)


encoder = LabelEncoder()
y = encoder.fit_transform(y)
num_label = len(pd.unique(y))
y = to_categorical(y, num_classes=6)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Shape of X1_train: {X_train.shape}\n")
print(f"Shape of y_train: {y_train.shape}\n")
print(f"Shape of X1_test: {X_test.shape}\n")
print(f"Shape of y_test: {y_test.shape}\n")
print(f"Number of labels: {num_label}\n")

Shape of X1_train: (5953, 128, 128, 1)

Shape of y_train: (5953, 6)

Shape of X1_test: (1489, 128, 128, 1)

Shape of y_test: (1489, 6)

Number of labels: 6



Send training data to model function and return compiled CNN model


In [29]:
model1 = build_model(X_train, y_train, X_test, y_test, num_label)


model1.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), use_multiprocessing=True)
acc1 = model1.evaluate(X_test, y_test)

print(f"Model Accuracy: {acc1[1]:.4f}\n")

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_28 (Conv1D)          (None, 128, 256)          98560     
                                                                 
 conv1d_29 (Conv1D)          (None, 128, 256)          196864    
                                                                 
 max_pooling1d_21 (MaxPooli  (None, 64, 256)           0         
 ng1D)                                                           
                                                                 
 dropout_23 (Dropout)        (None, 64, 256)           0         
                                                                 
 conv1d_30 (Conv1D)          (None, 64, 128)           98432     
                                                                 
 max_pooling1d_22 (MaxPooli  (None, 32, 128)           0         
 ng1D)                                                

Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Model Accuracy: 0.6266

