In [20]:
import os, glob, sys
import numpy as np
import json

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import RMSprop
from keras.utils import np_utils

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

MFCC_FEATURES_N = 12
CHROMA_N = 12

GENRES = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal',
          'pop', 'reggae', 'rock']
nb_classes = 10

In [21]:
def read_file(filename):
    with open(filename) as data_file:    
        data = json.load(data_file)
    return data

In [22]:
def json_to_df(filename):
    json_data = read_file(filename)
    song_dataset = list()
    
    for song_data in json_data:
        data_dict = dict()
        del song_data['song_name']
        
        for feature in song_data:
            if isinstance(song_data[feature],list):
                values = song_data[feature]
                for i in range(len(values)):
                    data_dict[((feature+"{}").format(i))] = values[i]
            else:
                data_dict[feature] = song_data[feature]
    
        data_dict['genre'] = GENRES.index(data_dict['genre'])
        
        song_dataset.append(data_dict.values())
        
    return np.array(song_dataset)

In [23]:
# read the data
df = json_to_df("../feature_data.json")
print df.shape

# shuffle the data
np.random.shuffle(df)

# split into input and corresponding labels
x_data = df[:, 1:]

y_data = df[:, 0]

# convert class vectors to binary class matrices
y_data = np_utils.to_categorical(y_data, nb_classes)

print x_data.shape
print y_data.shape

(10000, 30)
(10000, 29)
(10000, 10)


In [24]:
# divide into test and train data set
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=42)

scaler = preprocessing.StandardScaler().fit(x_train)

x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

print x_train.shape
print x_test.shape

print y_train.shape
print y_test.shape

(7000, 29)
(3000, 29)
(7000, 10)
(3000, 10)


In [25]:
def get_keras_model():
    model = Sequential()
    model.add(Dense(output_dim=35, input_dim=29))
    model.add(Activation('relu'))
    model.add(Dense(output_dim=10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=RMSprop(),metrics=['accuracy'])
    
    return model

In [None]:
model = get_keras_model()

history = model.fit(x_train, y_train, nb_epoch=50, batch_size=32)
loss_and_metrics = model.evaluate(x_test, y_test, batch_size=32)

print loss_and_metrics

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100, n_jobs=2)
clf = clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)

score = accuracy_score(y_test, y_pred)

print score


0.329
