# Urban Sound Classification
## Data Description
The dataset contains 8732 labeled sound excerpts (<=4s) of urban sounds from 10 classes: - 
- Air Conditioner 
- Car Horn 
- Children Playing 
- Dog bark 
- Drilling 
- Engine Idling 
- Gun Shot 
- Jackhammer 
- Siren Street 
- Music 

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
import glob

print(os.listdir("urban-sound-classification"))

# Any results you write to the current directory are saved as output.

['img', 'test.csv', 'train.csv', 'train', 'test']


In [4]:
df_train = pd.read_csv('urban-sound-classification/train.csv')
df_test = pd.read_csv('urban-sound-classification/test.csv')

print('Train Size: ', df_train.shape)
print('Test Size: ', df_test.shape)

Train Size:  (5435, 2)
Test Size:  (3297, 1)


In [5]:
df_train.head()

Unnamed: 0,ID,Class
0,0,siren
1,1,street_music
2,2,drilling
3,3,siren
4,4,dog_bark


In [43]:
LABELS = df_train.Class.unique()

In [20]:
import librosa,librosa.display
import matplotlib.pyplot as plt

# Loading audio file
def load_file(sound_file):
    data, sr = librosa.load(sound_file)
    return data, sr

In [11]:
def get_features(sound_file):
    data,sr = load_file(sound_file)
    stft = np.abs(librosa.stft(data.astype('float32')))
    mfccs = np.mean(librosa.feature.mfcc(y=data.astype('float32'), sr=sr, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(data.astype('float32'), sr=sr).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(data.astype('float32')),
    sr=sr).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

In [13]:
def parse_sound_files(sound_file_dir):
    features, labels = np.empty((0,193)), np.empty(0)
    li = glob.glob(str(sound_file_dir)+"*.wav")
    for file in li:
        mfccs, chroma, mel, contrast,tonnetz = get_features(file)
        ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
        features = np.vstack([features,ext_features])
        labels = np.append(labels, li[0].split('/')[3].split('.')[0])
    return np.array(features), np.array(labels, dtype = np.int)

In [39]:
def oneHotEncodeLabel(label: str) -> np.array:
    ret = np.zeros(len(LABELS))
    ret[np.where(LABELS == label)] = 1
    return ret

In [40]:
def featurization(filepath):
    #with open(filepath, 'r') as f:
    
    mfccs, chroma, mel, contrast,tonnetz = get_features(filepath)
    l = filepath.split('/')[3].split('.')[0]
    labels = df_train[df_train.ID==int(l)].Class.values[0]
    pbar.update(1)
    return mfccs, chroma, mel, contrast, tonnetz, labels

In [47]:
def featurization_test(filepath):
    mfccs, chroma, mel, contrast,tonnetz = get_features(filepath)
    l = filepath.split('/')[3].split('.')[0]
    pbar.update(1)
    return mfccs, chroma, mel, contrast, tonnetz, l

In [41]:
import logging
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING)

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [21]:
from multiprocessing import Pool
from functools import reduce
from tqdm import tqdm_notebook as tqdm
import time

In [34]:
li = glob.glob('urban-sound-classification/train/Train/'+"*.wav")
li_te = glob.glob('urban-sound-classification/test/Test/'+"*.wav")

## PREPROCESSING FOR MLP

In [44]:
logging.basicConfig(level=logging.DEBUG)
features = []
labels = []
def update(*a):
    X = np.hstack(a[0][:-1])
    y = oneHotEncodeLabel(a[0][-1])
    features.append(X)
    labels.append(y)
    pbar.update()

if __name__ == '__main__':
    num_processes = int(12)
    before = time.time()
    pool = Pool(processes=num_processes)
    pbar = tqdm(total=len(li[:10]))
    for f in li[:10]:
        pool.apply_async(featurization, args=(f,), callback=update)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

In [46]:
features, labels = np.array(features), np.array(labels)

In [None]:
print("features shape:",features.shape,"and labels shape:", labels.shape)

In [None]:
#np.save('train.npy',features)
#np.save('train_labels.npy',labels)

In [48]:
logging.basicConfig(level=logging.DEBUG)
features_te = []
file_id = []
def update(*a):
    X = np.hstack(a[0][:-1])
    features_te.append(X)
    file_id.append(a[0][-1])
    pbar.update()

# if __name__ == '__main__':
num_processes = int(14)
before = time.time()
pool = Pool(processes=num_processes)
pbar = tqdm(total=len(li_te[:10]))
for f in li_te[:10]:
    pool.apply_async(featurization_test, args=(f,), callback=update)
pool.close()
pool.join()

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

In [49]:
features_te = np.array(features_te)

In [None]:
#np.save('test.npy',features_te)
#np.save('file_id.npy',file_id)

## FOR  CREATING IMAGES

In [22]:
def save_image(filepath,file_dir):
    data, sr = librosa.load(filepath)

    window_size = 1024
    window = np.hanning(window_size)
    stft  = librosa.core.spectrum.stft(data, n_fft=window_size, hop_length=512, window=window)
    out = 2 * np.abs(stft) / np.sum(window)

    # For plotting headlessly
    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
    l = filepath.split('/')[3].split('.')[0]

    fig = plt.Figure()
    canvas = FigureCanvas(fig)
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    p = librosa.display.specshow(librosa.amplitude_to_db(out, ref=np.max), ax=ax, y_axis='log', x_axis='time')
    fig.savefig(file_dir+str(l)+'.png',dpi=50, bbox_inches='tight', pad_inches=0)

In [23]:
## TRAIN
'''logging.basicConfig(level=logging.DEBUG)
def update(*a):
    pbar.update()

# if __name__ == '__main__':
num_processes = int(14)
before = time.time()
pool = Pool(processes=num_processes)
pbar = tqdm(total=len(li))
file_directory = 'train/'
for f in li:
    pool.apply_async(save_image, args=(f,file_directory), callback=update)
pool.close()
pool.join()'''

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

In [36]:
## TEST
'''logging.basicConfig(level=logging.DEBUG)
def update(*a):
    pbar.update()

# if __name__ == '__main__':
num_processes = int(14)
before = time.time()
pool = Pool(processes=num_processes)
pbar = tqdm(total=len(li_te))
file_directory = 'test/'
for f in li_te:
    pool.apply_async(save_image, args=(f,file_directory), callback=update)
pool.close()
pool.join()'''

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

In [26]:
img_tr = glob.glob('train/'+"*.png")

In [30]:
from matplotlib import image
def save_image_array(filename,file_dir):
    img_data = image.imread(filename)
    l = filename.split('/')[1].split('.')[0]
    np.save(file_dir+str(l)+'.npy',img_data)

In [32]:
## TRAIN
logging.basicConfig(level=logging.DEBUG)
def update(*a):
    pbar.update()

# if __name__ == '__main__':
num_processes = int(14)
before = time.time()
pool = Pool(processes=num_processes)
file = img_tr
pbar = tqdm(total=len(file))
file_directory = 'train_npy_img/'
for f in file:
    pool.apply_async(save_image_array, args=(f,file_directory), callback=update)
pool.close()
pool.join()

HBox(children=(IntProgress(value=0, max=5445), HTML(value='')))

In [33]:
img_te = glob.glob('test/'+"*.png")

In [None]:
## TEST
'''logging.basicConfig(level=logging.DEBUG)
def update(*a):
    pbar.update()

# if __name__ == '__main__':
num_processes = int(14)
before = time.time()
pool = Pool(processes=num_processes)
file = img_te
pbar = tqdm(total=len(file))
file_directory = 'test_npy/'
for f in file:
    pool.apply_async(save_image_array, args=(f,file_directory), callback=update)
pool.close()
pool.join()'''