In [0]:
#downloading and extracting the dataset on colab's server
import urllib.request
urllib.request.urlretrieve ("https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz","a.tar.gz")
import tarfile
tar = tarfile.open("a.tar.gz")
tar.extractall()
tar.close()

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

In [0]:
#forming a panda dataframe from the metadata file
data=pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")

In [0]:
data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [0]:
data['fold'].value_counts()

4     990
5     936
3     925
2     888
1     873
7     838
10    837
6     823
9     816
8     806
Name: fold, dtype: int64

In [0]:
import librosa
from librosa import display

In [0]:
# extracting y = audio time series and sr = sampling rate from a random audio file
y,sr=librosa.load("UrbanSound8K/audio/fold5/100263-2-0-137.wav") # where sr is the sampling rata

In [0]:
print('sampling rrate used in files is:',sr)

sampling rrate used in files is: 22050


In [0]:
# creating a feature set of data

mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0) #Mel-frequency cepstral coefficients

melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0) #Compute a mel-scaled spectrogram

chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0) #Compute a chromagram from a waveform or power spectrogram

chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0) #Constant-Q chromagram

chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0) #Computes the chroma variant “Chroma Energy Normalized” (CENS)

In [0]:
mfccs.shape, melspectrogram.shape, chroma_stft.shape,chroma_cq.shape,chroma_cens.shape

((40,), (40,), (40,), (40,), (40,))

In [0]:
#stacking and reshaping
features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(40,5))
features.shape

(40, 5)

In [0]:
#preprocessing using entire feature set
x_train = []
x_test = []
y_train = []
y_test = []

path="UrbanSound8K/audio/fold"
for i in tqdm(range(len(data))):
  fold_number = str(data.iloc[i]['fold'])
  file = data.iloc[i]['slice_file_name']
  label = data.iloc[i]['classID']
  filename = path + fold_number + '/' + file
  y, sr = librosa.load(filename)
  
  mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0) #Mel-frequency cepstral coefficients

  melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0) #Compute a mel-scaled spectrogram

  chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0) #Compute a chromagram from a waveform or power spectrogram

  chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0) #Constant-Q chromagram

  chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0) #Computes the chroma variant “Chroma Energy Normalized” (CENS)
  
  features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(40,5))
  
  if (fold_number != '10'):
    x_train.append(features)
    y_train.append(label)
  else:
    x_test.append(features)
    y_test.append(label)

100%|██████████| 8732/8732 [1:33:09<00:00,  1.81it/s]


In [0]:
len(x_train) + len(x_test)

8732

In [0]:
len(data)

8732

In [0]:
import numpy as np
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [0]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((7895, 40, 5), (837, 40, 5), (7895,), (837,))

In [0]:
#reshaping into 2d to save in csv format
x_train_2d=np.reshape(x_train,(x_train.shape[0],x_train.shape[1]*x_train.shape[2]))
x_test_2d=np.reshape(x_test,(x_test.shape[0],x_test.shape[1]*x_test.shape[2]))
x_train_2d.shape,x_test_2d.shape

((7895, 200), (837, 200))

In [0]:
#saving the data numpy arrays
np.savetxt("urban_train_data.csv", x_train_2d, delimiter=",")
np.savetxt("urban_test_data.csv",x_test_2d,delimiter=",")
np.savetxt("urban_train_labels.csv",y_train,delimiter=",")
np.savetxt("urban_test_labels.csv",y_test,delimiter=",")

In [0]:
#downloading the saved data from colab's server
from google.colab import files

files.download("urban_train_data.csv")
files.download("urban_test_data.csv")
files.download("urban_train_labels.csv")
files.download("urban_test_labels.csv")

In [0]:
y_test.shape

(837,)