### File:           Project_Model  
  
### Authors:        Brooke McWilliams, James Birch  
  
### Date Created:   11/19/2023  
  
### Last Modified:  12/04/2023  
  
### Description:    Strip features out of audio files using the librosa library and perform CNN modeling using the tensors and keras libraries  
<br>
<br>
<br>


# Audio Preprocessing

In [None]:
import os
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow
import noisereduce as nr


Walk through the Crema directory and read each audio file  
Extract features from the file for training  

### AUDIO FINAL

In [None]:
path = "./Crema Dataset/"

wav_data = []

for file in os.listdir(path):
    if file.endswith(".wav"):               
        file_path = os.path.join(path, file)
        data, sr = librosa.load(file_path, sr=None)
        label = (file.split('_')[2])
        
        if label == 'ANG':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'SAD':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'DIS':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'FEA':
            y_s = librosa.effects.time_stretch(data, rate=1)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'HAP':
            y_s = librosa.effects.time_stretch(data, rate=1.3)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        elif label == 'NEU':
            y_s = librosa.effects.time_stretch(data, rate=1.3)
            MEL_Feature = librosa.feature.melspectrogram(y=y_s, sr=sr)
            MEL_Feature = tensorflow.image.resize(np.expand_dims(MEL_Feature,axis=-1),(128, 128))
        
    wav_data.append([file, label, MEL_Feature])


### AUDIO TESTING

In [None]:
def adjust_length(time_series_list, length):
    n = len(time_series_list)
    for i in range(n):
        audio_length = len(time_series_list[i])
        if audio_length < length:
            time_series_list[i] = np.append(time_series_list[i], [0 for i in range(length-audio_length)])
        else:
            time_series_list[i] = np.array(time_series_list[i][:length])

def check_for_nan(l):
    for x in l:
        if str(x) == 'nan':
            return True
    return False

def adjust_length(time_series_list, length):
    n = len(time_series_list)
    for i in range(n):
        audio_length = len(time_series_list[i])
        if audio_length < length:
            time_series_list[i] = np.append(time_series_list[i], [0 for i in range(length-audio_length)])
        else:
            time_series_list[i] = np.array(time_series_list[i][:length])  

def feature_extraction_1D(data, sampling_rate):

    # Zero Crossing rate
    features = librosa.feature.zero_crossing_rate(y=data)

    # Energy
    features = np.append(features, librosa.feature.rms(y=data), axis=1)

    # Mel-frequency cepstral coefficient
    l = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=13), axis=0).reshape(1, 106)
    features = np.append(features, l, axis=1)
    
    # Spectral Centroid
    features = np.append(features, librosa.feature.spectral_centroid(y=data, sr=sampling_rate), axis=1)
    
    # Spectral Bandwidth
    features = np.append(features, librosa.feature.spectral_bandwidth(y=data, sr=sampling_rate), axis=1)
    
    # Spectral Flatness
    features = np.append(features, librosa.feature.spectral_flatness(y=data), axis=1)
    
    # Spectral Rolloff maximum frequencies
    features = np.append(features, librosa.feature.spectral_rolloff(y=data, sr=sampling_rate), axis=1)
    
    # Spectral Rolloff minimum frequencies
    features = np.append(features, librosa.feature.spectral_rolloff(y=data, sr=sampling_rate, roll_percent=0.01), axis=1)
    
    return np.array(features)   

In [None]:
path = "./Crema Dataset/"

wav_data = [] 
labelList = [] 
sampling_rate = 18000 

for file in os.listdir(path):
    if file.endswith(".wav"):               
        file_path = os.path.join(path, file)
        data, sr = librosa.load(file_path, sr=sampling_rate)
        label = (file.split('_')[2])

        reduced_noise = nr.reduce_noise(y=data, sr=sampling_rate)
        if not check_for_nan(reduced_noise):
            data = reduced_noise
            
        wav_data.append(data)
        labelList.append(label)

n = len(wav_data)
adjust_length(wav_data, 3*sampling_rate)
wav_data = np.array(wav_data)

data_features_extracted_1D = []
for i in range(n):
    data_features_extracted_1D.append(np.squeeze(np.append(feature_extraction_1D(wav_data[i], sampling_rate), labelList[i])))

data_features_extracted_1D = np.array(data_features_extracted_1D)
print(data_features_extracted_1D.shape)

# CNN Modeling

In [None]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv1D, Dropout, MaxPooling1D, Flatten, Dense, BatchNormalization
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

### Final Model Section
Build the CNN model for training  
Top val_accuracy = 63%  

In [None]:
def build_model_test(num_label, inputShape):
    model = Sequential()
    model.add(Conv1D(filters=256, kernel_size=3, padding='same', activation='relu', input_shape=(128,128)))

    model.add(Conv1D(filters=256, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same', strides=2))
    
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same', strides=2))
    
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
    model.add(MaxPooling1D(pool_size=2, padding='same', strides=2))

    model.add(Flatten())
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(num_label, activation='softmax'))

    model.summary()

    opt = Adam(learning_rate=0.001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

Split the data into training and testing and encode the labels

In [None]:
X = [item[2] for item in wav_data]
y = [item[1] for item in wav_data]

X = np.array(X)
y = np.array(y)

encoder = LabelEncoder()
y = encoder.fit_transform(y)
num_label = len(pd.unique(y))
y = to_categorical(y, num_classes=6)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Shape of X1_train: {X_train.shape}\n")
print(f"Shape of y_train: {y_train.shape}\n")
print(f"Shape of X1_test: {X_test.shape}\n")
print(f"Shape of y_test: {y_test.shape}\n")
print(f"Number of labels: {num_label}\n")

Send training data to model function and return compiled CNN model


In [None]:
model1 = build_model(X_train, y_train, X_test, y_test, num_label)


model1.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))
acc1 = model1.evaluate(X_test, y_test)

print(f"Model Accuracy: {acc1[1]:.4f}\n")

### Testing Model Section  

In [None]:
def build_model_test(num_label, train):
    inputShape = (train.shape[1], 1)

    model=Sequential()

    model.add(Conv1D(input_shape=inputShape,filters=64,kernel_size=3,padding="same", activation="relu"))

    model.add(Conv1D(filters=64,kernel_size=3,padding="same", activation="relu"))
    model.add(MaxPooling1D(pool_size=2, strides=2))

    model.add(Conv1D(filters=128, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling1D(pool_size=2, strides=2))

    model.add(Conv1D(filters=256, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling1D(pool_size=2, strides=2))

    model.add(Conv1D(filters=512, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling1D(pool_size=2,strides=2))
    
    model.add(Conv1D(filters=512, kernel_size=3, padding="same", activation="relu"))
    model.add(MaxPooling1D(pool_size=2,strides=2))

    model.add(Flatten())
    model.add(Dense(units=4096,activation="relu"))
    model.add(Dense(num_label, activation="softmax"))

    opt = Adam(learning_rate=0.0001)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return model

In [None]:

X = np.array((data_features_extracted_1D[:, :-1]))
y = np.array(labelList)
X = X.astype(float)

encoder = LabelEncoder()
y = encoder.fit_transform(y)
num_label = len(pd.unique(y))
y = to_categorical(y, num_classes=6)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

print(f"Shape of X1_train: {X_train.shape}\n")
print(f"Shape of y_train: {y_train.shape}\n")
print(f"Shape of X1_test: {X_test.shape}\n")
print(f"Shape of y_test: {y_test.shape}\n")
print(f"Number of labels: {num_label}\n")


In [None]:
model1 = build_model_test(num_label, X_train)

model1.fit(X_train, y_train, epochs=150, batch_size=64, use_multiprocessing=True, validation_data=(X_test, y_test))
acc1 = model1.evaluate(X_test, y_test)

print(f"Model Accuracy: {acc1[1]:.4f}\n")