In [None]:
import json
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.models import load_model
from keras.callbacks import ModelCheckpoint   

songs = []

full_genres = [['soundtrack'],
                ['jazz'],
                ['classical'],
               ['metal'],
               ['indietronica','wave','synth'],
               ['downtempo','trip hop'],
               ['edm','electronica','idm','dubstep','techno'],
               ['house'],
               ['r&b','rnb','soul'],
               ['rock'],
               ['hip hop','rap','trap','hiphop'],
               ['pop']]

def loadSongList(filename):
    with open(filename, 'r') as myfile:
        global songs
        data=myfile.read().replace('\n', '')
        song_data  = json.loads(data)
        songs = song_data    

def loadDataFrame(filename):
    global df
    df = pd.read_pickle(filename)

def loadModel(filename):
    global model
    model = load_model(filename)

def saveModel(filename):
    model.save(filename)


In [None]:
loadSongList('sample_song_list.txt')
loadDataFrame('sample_dataframe.pkl')

for i in range(10):
    print(songs[i])
df[:10]

print(len(songs))
print(len(df[0:]))

In [None]:
processed_data = df[:]

processed_data['loudness'] = processed_data['loudness']/-20
processed_data['tempo'] = processed_data['tempo']/200
processed_data[:10]

In [None]:
sample = np.random.choice(processed_data.index, size=int(len(processed_data)*0.9), replace=False)
train_data, test_data = processed_data.iloc[sample], processed_data.drop(sample)

print("Number of training samples is", len(train_data))
print("Number of testing samples is", len(test_data))

In [None]:
# Separate data and one-hot encode the output
# Note: We're also turning the data into numpy arrays, in order to train the model in Keras
features = np.array(train_data.drop('genre', axis=1))
targets = np.array(keras.utils.to_categorical(train_data['genre'], len(full_genres)))
features_test = np.array(test_data.drop('genre', axis=1))
targets_test = np.array(keras.utils.to_categorical(test_data['genre'], len(full_genres)))

print(features[:10])
print(targets[:10])

In [None]:
# Building the model
model = Sequential()

model.add(Dense(1000, activation='relu', input_shape=(9,)))
model.add(Dropout(.2))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(.2))
model.add(Dense(1000, activation='relu'))
model.add(Dropout(.2))





model.add(Dense(len(full_genres), activation='softmax'))

# Compiling the model
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Training the model
# model.fit(features, targets, epochs=200, batch_size=1000, verbose=1)

checkpointer = ModelCheckpoint(filepath='model.best.h5', 
                               verbose=1, save_best_only=True)

model.fit(features, targets, batch_size=3000, epochs=200,
          validation_split=0.2, 
#           callbacks=[checkpointer],
          verbose=1, shuffle=True)

In [None]:
# saveModel('sample_model.h5')
loadModel('sample_model.h5')

In [None]:
# Evaluating the model on the training and testing set
score = model.evaluate(features, targets)
print("\n Training Accuracy:", score[1])
score = model.evaluate(features_test, targets_test)
print("\n Testing Accuracy:", score[1])

In [None]:
np.set_printoptions(suppress=True)
result_data=[]

def getGenre(index):
    if index == len(full_genres):
        return 'other'
    return full_genres[index][0]

for i in range(len(test_data.index.values)):
    index = test_data.index.values[i]+1
    result = []
    result.append(songs[index][0])
    result.append(songs[index][2])
    x = np.array([features_test[i]])
    for feature in x[0]:
        result.append(round(feature,3))
    y = np.array([targets_test[i]])
    prediction = model.predict(x)

    first = [-1,-1]
    second = [-2, -2]
    for j,guess in enumerate(prediction[0]):
        if guess > first[1]:
            second = first
            first = [j,guess]
        elif guess > second[1]:
            second = [j,guess]
    result.append(getGenre(first[0]))
    result.append(getGenre(second[0]))
    result.append(getGenre(songs[index][-1][0]))
    result_data.append(result)

result_array = np.array(result_data)

column_names_results= ['name','artist','danceability','energy','loudness','speechiness',
          'acousticness','instrumentalness','liveness','valence','tempo','1st guess', '2nd guess','actual']    

rf = pd.DataFrame(data=result_array[0:,0:],
                 columns=column_names_results)
rf[:50]
# with pd.option_context('display.max_rows', None, 'display.max_columns', 14):
#     display(rf)

In [None]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

def f(danceability, energy,
     loudness, speechiness,
     acousticness, instrumentalness,
     liveness, valence,
     tempo):
    
    x = np.array([[danceability, energy, loudness, speechiness, acousticness, 
                  instrumentalness, liveness, valence,tempo]])
    prediction = model.predict(x)

    first = [-1,-1]
    second = [-2, -2]
    for j,guess in enumerate(prediction[0]):
        if guess > first[1]:
            second = first
            first = [j,guess]
        elif guess > second[1]:
            second = [j,guess]
            
    first_guess = getGenre(first[0])
    second_guess = getGenre(second[0])
    
    display(first_guess,second_guess)
    return first_guess

w = interactive(f, danceability=(0,1,0.1),
                energy=(0,1,0.1),
                loudness=(0,1,0.1),
                speechiness=(0,1,0.1),
                acousticness=(0,1,0.1),
                instrumentalness=(0,1,0.1),
                liveness=(0,1,0.1),
                valence=(0,1,0.1),
                tempo=(0,1,0.1))
display(w)
