# Classification of musical rhythms of ecuador using Deep Learning
### 1 Data Exploration and Visualisation

In [1]:
# Load imports

import IPython.display as ipd
import librosa
import librosa.display
import matplotlib.pyplot as plt

In [7]:
import pandas as pd
metadata = pd.read_csv('audios/dataset.csv',delimiter=';',encoding = 'unicode_escape')
metadata.head()

Unnamed: 0,Id,Sonido,Clase,Clase_Id
0,1,1.wav,Pasillo,1
1,2,2.wav,Pasillo,1
2,3,3.wav,Pasillo,1
3,4,4.wav,Pasillo,1
4,5,5.wav,Pasillo,1


In [8]:
print(metadata.Clase.value_counts())

Capishca      60
Tonada        60
Danzante      60
Pasillo       60
Alza          60
SanJuanito    60
Yumbo         60
Albazo        60
Pasacalle     60
Name: Clase, dtype: int64


In [12]:
import struct

class WavFileHelper():
    
    def read_file_properties(self, filename):

        wave_file = open(filename,"rb")
        
        riff = wave_file.read(12)
        fmt = wave_file.read(36)
        
        num_channels_string = fmt[10:12]
        num_channels = struct.unpack('<H', num_channels_string)[0]

        sample_rate_string = fmt[12:16]
        sample_rate = struct.unpack("<I",sample_rate_string)[0]
        
        bit_depth_string = fmt[22:24]
        bit_depth = struct.unpack("<H",bit_depth_string)[0]

        return (num_channels, sample_rate, bit_depth)

In [13]:
# Load various imports 
import pandas as pd
import os
import librosa
import librosa.display

from wavfilehelper import WavFileHelper
wavfilehelper = WavFileHelper()
audiodata = []
for index, row in metadata.iterrows():
    
    file_name = os.path.join(os.path.abspath('audios/'),str(row["Sonido"]))
    data = wavfilehelper.read_file_properties(file_name)
    audiodata.append(data)

# Convert into a Panda dataframe
audiodf = pd.DataFrame(audiodata, columns=['num_channels','sample_rate','bit_depth'])

In [14]:
# num of channels 

print(audiodf.num_channels.value_counts(normalize=True))

2    0.97037
1    0.02963
Name: num_channels, dtype: float64


In [15]:
# sample rates 

print(audiodf.sample_rate.value_counts(normalize=True))

48000    0.998148
44100    0.001852
Name: sample_rate, dtype: float64


In [16]:
# bit depth

print(audiodf.bit_depth.value_counts(normalize=True))

16    1.0
Name: bit_depth, dtype: float64


### 2 Data Preprocessing

In [22]:
import librosa 
from scipy.io import wavfile as wav
import numpy as np

filename = 'audios/38.wav' 

librosa_audio, librosa_sample_rate = librosa.load(filename,mono=True) 
scipy_sample_rate, scipy_audio = wav.read(filename) 

print('Original sample rate:', scipy_sample_rate) 
print('Librosa sample rate:', librosa_sample_rate)

Original sample rate: 48000
Librosa sample rate: 22050


In [23]:
def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file)
        return None 
     
    return mfccsscaled

In [24]:
# Load various imports 
import pandas as pd
import os
import librosa

# Set the path to the full UrbanSound dataset 

metadata = pd.read_csv('audios/dataset.csv',delimiter=';',encoding = 'unicode_escape')

features = []

# Iterate through each sound file and extract the features 
for index, row in metadata.iterrows():
    
    
    file_name = os.path.join(os.path.abspath('audios/'),str(row["Sonido"]))
    class_label = row["Clase"]
    data = extract_features(file_name)
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(featuresdf), ' files')

Finished feature extraction from  540  files


In [26]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

In [27]:
# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [28]:
### store the preprocessed data for use in the next notebook

%store x_train 
%store x_test 
%store y_train 
%store y_test 
%store yy 
%store le

Stored 'x_train' (ndarray)
Stored 'x_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'y_test' (ndarray)
Stored 'yy' (ndarray)
Stored 'le' (LabelEncoder)


### 3 Model Training and Evaluation

In [38]:
import numpy as np
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, Activation, Flatten
#from keras.models import Sequential
#from keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.python.keras.optimizers import Adam
from tensorflow.python.keras.utils import np_utils
from sklearn import metrics 





num_labels = yy.shape[1]
filter_size = 2

# Construct model 
model = Sequential()

model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [39]:
# Compile the model
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [40]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 256)               10496     
_________________________________________________________________
activation_6 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               65792     
_________________________________________________________________
activation_7 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 9)                

In [41]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 
num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_mlp.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

AttributeError: 'ModelCheckpoint' object has no attribute '_implements_train_batch_hooks'

In [37]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.09490741044282913
Testing Accuracy:  0.09259258955717087
