In [1]:
#File name: SmallDenseNetwork 
#Names of all members of the group: James Kaufman, Eamon Kostopulos, 
#Ahmed Mohammed, Sebastian Cortes, and Jonathan Goral 
#Project name and description: DL Music Classification - A neural network that classifies music genres
#Any special execution instruction: librosa, numpy, pydub, os, pickle, pylab, ffmeg, glob, matplotlib.pyplot
#Date: 12/4/18

import librosa
import numpy as np

from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.utils.np_utils import to_categorical
from keras.preprocessing import sequence
from keras import initializers

import librosa.feature
import librosa.display
import os
import pickle

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#code taken from https://blog.manash.me/building-a-dead-simple-word-recognition-engine-using-convnet-in-keras-25e72c19c12b
def encodeGenres(genrelist):
    uniq_ids, row_ids = np.unique(genrelist, return_inverse = True)
    row_ids = row_ids.astype(np.int32, copy = False)
    hot_labels = to_categorical(row_ids, len(uniq_ids))
    #print("hot_labels")
    #print(hot_labels)
    return hot_labels

In [3]:
#SpecPickle is the name of the folder that we stored all of our pickle files in

genre_files = os.listdir("SpecPickle")
x_train = []
y_train = []
x_test = []
y_test = []

#load the data while evenly splitting each genre into test and train

train_ratio = .8
num_genre = len(genre_files)
for file in genre_files:
    #take off the ".pkl" from the end of the file name
    genre = file[:-4]
    with open("SpecPickle/" + file, 'rb') as f:
        temp = pickle.load(f)
        train_count = int(len(temp)*train_ratio)
        count = 0
        #split the data that we are adding in according to the training ratio we picked
        for data in temp:
            if count < train_count:
                x_train.append(data)
                y_train.append(genre)
            else:
                x_test.append(data)
                y_test.append(genre)
            count+=1
            
#convert the lists to numpy arrays
x_train = np.asarray(x_train)
x_test = np.asarray(x_test)



#reshape to allow it to be used on the 2D CNN
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)

#This all takes care of making a mapping that shows the labels that the network has
#Given each genre.  We need this later to be able to test the network.

listGenres = {}
#these numbers are related to the fact that we have 5 genres with roughly 5000 snippets
#to train off of in each genre
for i in range(5):
    listGenres[i] = y_train[i*5000]

y_train = encodeGenres(y_train)
y_test = encodeGenres(y_test)

#actually create the mapping by looking at the new label for a particular genre
#and then storing it in a dictionary with the genre name as a key.
genreMapping = {}
for i in range(5):
    temp = y_train[i*5000]
    position = 0
    for j in range(len(temp)):
        if temp[j] == 1:
            position = j
            j = len(temp)+1
    genreMapping[listGenres[i]] = position

#save this mapping
with open ("mapping.pkl", 'wb') as f:
    pickle.dump(genreMapping, f)

print("Shape of x_test: " + str(x_test.shape))
print("Shape of y_test: " + str(y_test.shape))
print("Shape of x_train: " + str(x_train.shape))
print("Shape of y_train: " + str(y_train.shape))



Pop
Instrumental
Hip-Hop
Rock
International
Folk
Electronic
[0. 0. 0. 0. 0. 1. 0.]
[0. 0. 0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 1.]
[0. 0. 0. 0. 1. 0. 0.]
[0. 1. 0. 0. 0. 0. 0.]
[1. 0. 0. 0. 0. 0. 0.]
Shape of x_test: (8395, 128, 217, 1)
Shape of y_test: (8395, 7)
Shape of x_train: (33578, 128, 217, 1)
Shape of y_train: (33578, 7)


In [6]:
#Create our convolutional neural network, which has multiple convolutional layers
#with max pooling between them as well as two dense layers at the end.
#Activation functions and weight initialization were inspired by other projects that had worked well
model = Sequential()
model.add(Conv2D(64, kernel_size = (2,2), activation = 'elu', kernel_initializer='glorot_uniform', input_shape=(x_train.shape[1],x_train.shape[2],x_train.shape[3])))
model.add(MaxPooling2D(pool_size=(2,3)))
model.add(Conv2D(128, kernel_size = (2,2), activation = 'elu', kernel_initializer='glorot_uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(256, kernel_size = (2,2), activation = 'elu', kernel_initializer='glorot_uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
#model.add(Conv2D(512, kernel_size = (2,2), activation = 'elu', kernel_initializer='glorot_uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(128, activation = 'elu'))
model.add(Dropout(0.4))
model.add(Dense(num_genre, activation = 'softmax'))

In [7]:
model.compile(optimizer = 'RMSprop', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 127, 216, 64)      320       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 63, 72, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 62, 71, 128)       32896     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 31, 35, 128)       0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 30, 34, 256)       131328    
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 15, 17, 256)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 14, 16, 512)       524800    
__________

In [None]:
#due to the large amount of data the network began to overfit after 6-8 epochs.
model.fit(x_train, y_train, epochs = 8, batch_size =128, validation_data=(x_test, y_test), verbose = 2)
scores = model.evaluate(x_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Train on 38373 samples, validate on 9594 samples
Epoch 1/30
 3584/38373 [=>............................] - ETA: 30:03 - loss: 13.7554 - acc: 0.1342

In [None]:
model.save("5genreweights.h5")