# Urban Sound Challenge

_Project designed to utilize signal processing and machine learning techniques to classifying a group of 10 different categories of urban sounds._

_December 2023, by Blake Andreou_

#### Imports and Initial Download

In [1]:
import soundata
import librosa
import numpy as np
import pickle

_The file below can be utilized if one would like to re-derive the fetaures from the raw data. Raw data can be downloaded from the soundata Python package, and path will need to replace 'data home' path_

In [2]:
#grabbing sound dataset
dataset = soundata.initialize('urbansound8k',data_home='C://Users/Blake/Documents/College_Stuff/JOBS/Projects/UrbanSoundChallenge/sound_datasets/urbansound8k')
#downloading if data not already downloaded
#dataset.download()
#validation that dataset was downloaded correctly
dataset.validate() 

100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 331.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 8732/8732 [01:21<00:00, 107.65it/s]
INFO: Success: the dataset is complete and all files are valid.
INFO: --------------------


({'metadata': {}, 'clips': {}}, {'metadata': {}, 'clips': {}})

#### Potentially usable information for featurization

In [None]:
#basic clip data
sample_clip = dataset.choice_clip()
print(sample_clip.audio) #access audio
print(sample_clip.salience)
print(sample_clip.class_id) #class label (0-9)
print(sample_clip.class_label) #class label (string)
print(sample_clip.fold) #generate which fold its in

In [None]:
#playing a given sound
from IPython.display import Audio
sample_audio = sample_clip.audio
audio_data = sample_audio[0]
audio_sample_rate = sample_audio[1]
Audio(data = audio_data, rate = audio_sample_rate)

In [None]:
#creating waveform of given sound
def printWave(clip,class_label=''):
    #imports
    from librosa import display
    import matplotlib.pyplot as plt
    plt.figure(figsize=(12, 4))
    plt.title(class_label)
    librosa.display.waveshow(y = clip[0], sr = clip[1])

#### Visualizing Waves

In [None]:
class_labels = ['air conditioner','car horn','children playing','dog bark','drilling',
                'engine idling','gun shot','jackhammer','siren','street music']
class_accessed = [False] * 10
#get all clips and ids
all_clips = dataset.load_clips()
all_ids = dataset.clip_ids
#cycle through all clip ids as necessary
for id in all_ids:
    #get clip
    clip = all_clips[id]
    #if condition isn't printed yet
    if(class_accessed[clip.class_id] == False):
        printWave(clip.audio,class_label=class_labels[clip.class_id])
        class_accessed[clip.class_id] = True

#### Feature Generation

_Skip this section if one is just interesting in modeling portion, unpickling included files will provide the output_

In [None]:
#get relevant features (mfccs)
features = []
folds = []
labels = []
#get each clip via id
for id in all_ids:
    clip = all_clips[id]
    #generate feature set
    mfccs = np.mean(librosa.feature.mfcc(y=clip.audio[0], sr=clip.audio[1]).T,axis=0)
    features.append(mfccs)
    folds.append(clip.fold)
    labels.append(clip.class_id)

In [None]:
#get additional features (mfccs)
moreFeatures = []
#get each clip via id
for id in all_ids:
    clip = all_clips[id]
    #generate feature set
    data = np.mean(librosa.feature.mfcc(y=clip.audio[0], sr=clip.audio[1]).T,axis=0) #mfcc
    data = np.append(data,clip.freesound_end_time-clip.freesound_start_time) #length of clip (relevant for shorter ones like horn/gunshot)
    data = np.append(data,sum(librosa.zero_crossings(clip.audio[0]))) #zero crossings (maybe relevant for higher freq. identifiers?)
    moreFeatures.append(data)

#### Pickling/Unpickling Features

In [None]:
#pickling of relevant data to avoid taxing cell above
with open('mfcss_only_features.pkl', 'wb') as file:
    pickle.dump(features, file)

with open('labels.pkl', 'wb') as file:
    pickle.dump(labels, file)

with open('folds.pkl', 'wb') as file:
    pickle.dump(folds, file)

with open('all_features.pkl', 'wb') as file:
    pickle.dump(moreFeatures, file)

In [3]:
#unpickling as needed
with open('mfcss_only_features.pkl', 'rb') as file:
    features = pickle.load(file)
    
with open('labels.pkl', 'rb') as file:
    labels = pickle.load(file)
    
with open('folds.pkl', 'rb') as file:
    folds = pickle.load(file)
    
with open('all_features.pkl', 'rb') as file:
    moreFeatures = pickle.load(file)

#### Model Definitions

In [17]:
def FFModel(input_size):
    #standard model,nothing special
    import numpy as np
    from keras.models import Sequential
    from keras.layers import Input, Dense, Dropout, Activation, Flatten
    from keras.optimizers import Adam
    from keras.regularizers import L2
    from keras.utils import np_utils
    from sklearn import metrics 

    num_labels = len(labels)
    filter_size = 2

    # build model
    model = Sequential()

    model.add(Input(shape=(input_size,)))
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(512,activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(512,activation='relu',kernel_regularizer=L2(l2=0.05)))
    model.add(Dropout(0.2))
    model.add(Dense(num_labels,activation='softmax'))

    model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

In [16]:
def LSTM(input_size):
    #adding LSTM
    import numpy as np
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, Flatten, LSTM
    from keras.optimizers import Adam
    from keras.regularizers import L2
    from keras.utils import np_utils
    from sklearn import metrics 

    num_labels = len(labels)
    filter_size = 2

    # build model
    model = Sequential()

    model.add(LSTM(256, input_shape=(input_size,1)))    
    model.add(Dense(512,activation='relu',kernel_regularizer=L2(l2=0.05)))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels,activation='softmax'))


    model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

    return model

In [18]:
def logisticModel(input_size):

    from keras.models import Sequential
    from keras.layers import Input, Dense, Dropout, Activation
    from keras.optimizers import Adam
    from keras.utils import np_utils
    from sklearn import metrics 

    num_labels = len(labels)
    filter_size = 2

    # build model
    model = Sequential()
    model.add(Input(shape=(input_size,)))
    model.add(Dense(num_labels,activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

    return model

#### Model Run and Evaluation

In [7]:
def runModel(modelFunc,trainFeatures,trainLabels,testFeatures,testLabels,epochs,batch,input_size):
    model = modelFunc(input_size)
    model.fit([x.tolist() for x in trainFeatures], trainLabels, batch_size=batch, epochs=epochs,verbose=0)
    model.evaluate([x.tolist() for x in testFeatures], testLabels)

In [19]:
def crossValRun(modelFunc,features,epochs=5,batchSize=32):
    #cross validation up to fold 9
    input_size = len(features[0])
    prevBreaker = 0
    for fold in range(1,10):
        print('\nFOLD ' + str(fold) + ' RUN:\n')
        #data split
        currBreaker = folds.index(fold+1)
        testFeatures = features[prevBreaker:currBreaker]
        trainFeatures = features[:prevBreaker] + features[currBreaker:]
        testLabels = labels[prevBreaker:currBreaker]
        trainLabels = labels[:prevBreaker] + labels[currBreaker:]
        #run model
        runModel(modelFunc,trainFeatures,trainLabels,testFeatures,testLabels,epochs,batchSize,input_size)
        prevBreaker = currBreaker
    #fold 10 separate
    print('\nFOLD 10 RUN:\n')
    #data split
    testFeatures = features[currBreaker:]
    trainFeatures = features[:currBreaker]
    testLabels = labels[currBreaker:]
    trainLabels = labels[:currBreaker]
    #run model
    runModel(modelFunc,trainFeatures,trainLabels,testFeatures,testLabels,epochs,batchSize,input_size)

In [20]:
#Running and testing each model with all features
for model in [logisticModel,FFModel,LSTM]:
    print("---------" + model.__name__ + " with all features-----------")
    crossValRun(model,moreFeatures)

---------logisticModel with all features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:

---------FFModel with all features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:

---------LSTM with all features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:



In [21]:
#Running and testing features with MCFSS features
for model in [logisticModel,FFModel,LSTM]:
    print("---------" + model.__name__ + " with only MFCSS features-----------")
    crossValRun(model,features)

---------logisticModel with only MFCSS features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:

---------FFModel with only MFCSS features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:

---------LSTM with only MFCSS features-----------

FOLD 1 RUN:


FOLD 2 RUN:


FOLD 3 RUN:


FOLD 4 RUN:


FOLD 5 RUN:


FOLD 6 RUN:


FOLD 7 RUN:


FOLD 8 RUN:


FOLD 9 RUN:


FOLD 10 RUN:

