# Preparing Dataset

In [1]:
import os
import json
import librosa 
import math
import numpy as np
from sklearn import model_selection
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [2]:
data_path="genres"
json_path="json_data.json"

In [3]:
SR=22050
dur=30
total_samples=SR*dur

In [4]:
def reverse_str(s):
    ans=""
    for i in range(len(s)):
        ans=s[i]+ans
    return ans

In [5]:
def prepare_data_mfcc(data_path,json_path,n_mfcc=13,n_fft=2048,hop_length=512,num_segments=10):
    
    data={
        "mapping":[],  # Mapping of genre with the index
        "mfcc":[],
        "labels":[]
    }
    
    for count,(dirpath,dirnames,filenames) in enumerate(os.walk(data_path)):
        if dirpath==data_path:
            continue
        else:
            
            # Extracting the genre
            
            gen=""
            dirgen=reverse_str(dirpath)
            for i in range(len(dirgen)):
                if(dirgen[i]=="\\"):
                   break
                else:
                   gen+=dirgen[i]
            print(reverse_str(gen))
            data["mapping"].append(reverse_str(gen))
            
            # Now for each genre exploring all of the files
    
            for file in filenames:
            
                # loading audio file
                
                file_path=os.path.join(dirpath,file)
                signal,sr=librosa.load(file_path,SR)
                
                # Chopping off into segments
                
                num_samples_per_segment=total_samples//num_segments
                num_mfcc_vec_seg=math.ceil(num_samples_per_segment/hop_length)
                
                for segment in range(num_segments):
                    
                    start_seg=num_samples_per_segment*segment
                    end_seg=start_seg+num_samples_per_segment
                    mfcc_seg=librosa.feature.mfcc(signal[start_seg:end_seg],sr=SR,n_fft=n_fft,n_mfcc=n_mfcc,hop_length=hop_length)
                    mfcc_seg=mfcc_seg.T 
                    
                    # Stroring the mfcc if it has the expected length
                    
                    if len(mfcc_seg)==num_mfcc_vec_seg:
                        data["mfcc"].append(mfcc_seg.tolist())
                        data["labels"].append(count-1) 
    
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)    

In [6]:
prepare_data_mfcc(data_path,json_path)

blues
classical
country
disco
hiphop
jazz
metal
pop
reggae
rock


In [7]:
json_file=open(json_path)
data=json.load(json_file)

In [8]:
# Confirming the shapes of data

print(len(data["mapping"]))
print(len(data["mfcc"]))
print(len(data["labels"])) 
print(len(data["mfcc"][0]))

10
9996
9996
130


In [9]:
# Train_Test Split

X_Train,X_Test,Y_Train,Y_Test=model_selection.train_test_split(data["mfcc"],data["labels"],test_size=0.2)
print("Size of Train_Data=",len(X_Train))
print("Size of Test_Data=",len(X_Test))

Size of Train_Data= 7996
Size of Test_Data= 2000


In [10]:
# Since CNN expects a 3rd dimension i.e the depth

np_arr_train=np.array(X_Train)
print(np_arr_train.shape)
np_arr_train=np_arr_train.reshape(7996,130,13,1)

np_arr_test=np.array(X_Test)
print(np_arr_test.shape)

np_arr_test=np_arr_test.reshape(2000,130,13,1)
print(np_arr_test.shape)

X_Train=np_arr_train
X_Test=np_arr_test

(7996, 130, 13)
(2000, 130, 13)
(2000, 130, 13, 1)


# Building the Architecture of the model

In [11]:
def build_architecture(data_shape):
    model=keras.Sequential()
    print(data_shape)
    # 1st Convolutional Layer
    
    model.add(keras.layers.Conv2D(32,(3,3),activation="relu",input_shape=data_shape,data_format="channels_last"))
    model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same"))
    
    #2nd Convolutional Layer
    
    model.add(keras.layers.Conv2D(32,(3,3),activation="relu",data_format="channels_last"))
    model.add(keras.layers.MaxPool2D((3,3),strides=(2,2),padding="same"))
    
    # Flattening the data
    
    model.add(keras.layers.Flatten())
    
    # Dense Layer
    
    model.add(keras.layers.Dense(32,activation="relu"))
    
    # Dropout
    
    model.add(keras.layers.Dropout(0.3))
    
    # Output Layer
    
    model.add(keras.layers.Dense(10,activation="softmax"))
    
    return model

In [12]:
model=build_architecture((130,13,1))

(130, 13, 1)
Instructions for updating:
If using Keras pass *_constraint arguments to layers.



# Compiling the Model

In [13]:

optimizer=keras.optimizers.Adam(learning_rate=0.0001)

# I have used sparse categorical cross entropy because we don't have to provide labels as one hot encoding in this

model.compile(optimizer=optimizer,loss="sparse_categorical_crossentropy",metrics=["accuracy"])

In [14]:
# Creating Validation Data from Train Data

X_Train,X_Valid,Y_Train,Y_Valid=model_selection.train_test_split(X_Train,Y_Train,test_size=0.2)

In [15]:
#  Training the CNN

model.fit(X_Train,Y_Train,validation_data=(X_Valid,Y_Valid),batch_size=64,epochs=100)

# Concentrate on validation_accuracy


Train on 6396 samples, validate on 1600 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100


Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x1c2b67a8518>

# Predicting on Test Data

In [16]:
Y_Pred_onehot=model.predict(X_Test)

# Y_Pred single query is a 10 sized array so we need to take the maximum one

Y_Pred=[]

for i in range(len(Y_Pred_onehot)):
    predicted_index=0
    maxpred=0
    for j in range(10):
        if(Y_Pred_onehot[i][j]>maxpred):
            maxpred=Y_Pred_onehot[i][j]
            predicted_index=j
    Y_Pred.append(predicted_index)


In [22]:
print("The actual genre is",data["mapping"][Y_Test[500]])
print("The predicted genre is",data["mapping"][Y_Pred[500]])

The actual genre is disco
The predicted genre is disco


In [18]:
test_error,test_accuracy=model.evaluate(X_Test,Y_Test)
print("The error and accuracy of the model on test set is",test_error,"and",test_accuracy)

The error and accuracy of the model on test set is 1.145840247631073 and 0.6315000057220459
