In [88]:
import pandas as pd
import numpy as np
import os
import librosa
import librosa.display

from src.wavhelp import WavHelper

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical


In [89]:
metadata = pd.read_csv('UrbanSoundDatasetSample/metadata/UrbanSound8K.csv')

In [90]:
display(metadata.head())
print(metadata.shape)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class_name
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


(8732, 8)


Only need a few file names for this example. Parse with os.listdir

In [91]:
existing_files = os.listdir('UrbanSoundDatasetSample/audio')
metasub = metadata[metadata['slice_file_name'].isin(existing_files)].reset_index(drop=True)
metasub

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class_name
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100648-1-0-0.wav,100648,4.823402,5.471927,2,10,1,car_horn
3,100852-0-0-0.wav,100852,0.0,4.0,1,5,0,air_conditioner
4,101848-9-0-0.wav,101848,0.0,4.0,1,7,9,street_music
5,102305-6-0-0.wav,102305,0.0,2.61161,1,1,6,gun_shot
6,102853-8-0-0.wav,102853,0.0,4.0,2,7,8,siren
7,102857-5-0-0.wav,102857,0.0,4.0,1,10,5,engine_idling
8,103074-7-0-0.wav,103074,3.341279,7.341279,1,1,7,jackhammer
9,103199-4-0-0.wav,103199,0.0,4.0,1,3,4,drilling


In [92]:
print(metadata.class_name.value_counts())

dog_bark            1000
air_conditioner     1000
street_music        1000
jackhammer          1000
engine_idling       1000
drilling            1000
children_playing    1000
siren                929
car_horn             429
gun_shot             374
Name: class_name, dtype: int64


In [93]:
## For sample of Urban sound dataset... will have to be amended for entire dataset
# file_name = os.path.join(os.path.abspath('/UrbanSound8K/audio/'),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))

wavfilehelper = WavHelper()
audiodata = []
for index, row in metasub.iterrows():
    
    file_name = os.path.join('UrbanSoundDatasetSample/audio', str(row["slice_file_name"]))
    
    data = wavfilehelper.get_file_props(file_name)
    audiodata.append(data)

# Convert to pandas dataframe
audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])

In [94]:
audiodf

Unnamed: 0,num_channels,sample_rate,bit_depth
0,2,44100,16
1,2,44100,16
2,2,44100,16
3,2,44100,16
4,2,44100,16
5,2,44100,16
6,2,44100,16
7,2,44100,16
8,2,48000,24
9,2,44100,16


Primarily 44.1k SR, 16 bit depth, but some variation. Account for this with the librosa load functionality

In [95]:
print(audiodf.num_channels.value_counts(normalize=True))

2    1.0
Name: num_channels, dtype: float64


In [96]:
print(audiodf.sample_rate.value_counts(normalize=True))

44100    0.9
48000    0.1
Name: sample_rate, dtype: float64


In [97]:
print(audiodf.bit_depth.value_counts(normalize=True))

16    0.9
24    0.1
Name: bit_depth, dtype: float64


In [98]:
max_pad_len = 174

def extract_features(fname, features='mfccs'):
    
    try:
        signal, rate = librosa.load(fname, res_type='kaiser_fast') # default is kaiser best. Downsampling later anyways.
        if features == 'mfccs':
            vec = librosa.feature.mfcc(y=signal, sr=rate, n_mfcc=40)
        elif features == 'mels':
            vec = librosa.feature.melspectrogram(y=signal, sr=rate, n_mels=60)
#             vec = librosa.power_to_db(vec)
        pad_width = max_pad_len - vec.shape[1]
        vec = np.pad(vec, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error while parsing file: ", fname)
        return None
    
    return vec

In [109]:
datasetpath = 'UrbanSoundDatasetSample/audio/'

features = []

#iterating through each row
for index, row in metasub.iterrows():
    
    file_name = os.path.join(datasetpath, str(row["slice_file_name"]))
    label = row['class_name']
    vector = extract_features(file_name, 'mfccs')
    
    features.append([vector, label])

featuresdf = pd.DataFrame(features, columns=['feature', 'class_label'])

In [110]:
display(featuresdf.head())
print(featuresdf.shape)

Unnamed: 0,feature,class_label
0,"[[-306.77255, -177.59209, -99.13616, -65.97198...",dog_bark
1,"[[-457.69534, -451.0248, -450.68613, -445.0000...",children_playing
2,"[[-323.20044, -244.39201, -208.50298, -184.233...",car_horn
3,"[[-688.7444, -262.64093, -105.28191, -60.13772...",air_conditioner
4,"[[-205.1927, -215.90787, -209.7127, -184.89857...",street_music


(10, 2)


In [103]:
# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
y_cat = to_categorical(le.fit_transform(y)) 

X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state = 42)


In [104]:

### store the preprocessed data for use in the next notebook

%store X_train 
%store X_test 
%store y_train 
%store y_test 
%store y_cat 
%store le

Stored 'X_train' (ndarray)
Stored 'X_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'y_test' (ndarray)
Stored 'y_cat' (ndarray)
Stored 'le' (LabelEncoder)


In [114]:
### or... we can pickle that:
import pickle

vec_type = 'mfccs'
processed_data_split = (X_train, X_test, y_train, y_test)
data_path = os.path.join('pickles', 'urbansound_'+ vec_type + '.p')

with open(data_path, 'wb') as handle:
    pickle.dump(processed_data_split, handle, protocol=2)