In [185]:
from __future__ import print_function
%matplotlib inline
import copy
import pandas as pd
import numpy as np
import librosa
import seaborn as sb
import matplotlib.pyplot as plt
import itertools
import re
import random
import gc
from os import listdir
from os.path import isfile, join
from numpy import median, diff
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

In [186]:
samples_back_included = 8
num_classes = 5
num_features = 40
num_features_total = (num_features * samples_back_included) + 4
save_files = listdir('data')

def get_features_for_index(beat_features, notes, index):
    if index < 0:
        return [0] * num_features
    return beat_features[index]

def get_class_for_index(notes, index):
    if index < 0:
        return (0, 0)
    return notes[index][0].count('1')
    
importance_rankings = [48, 24, 12, 16, 6, 8, 3, 4, 2, 1]
def get_beat_importance(index):
    for i in range(len(importance_rankings)):
        if index % importance_rankings[i] == 0:
            return i

def get_features_for_song(key, is_full):
    X = []
    y = []
    if '{0}_beat_features.csv'.format(key) in save_files and '{0}_notes.csv'.format(key) in save_files:
        beat_features_rotated = pd.read_csv('data/{0}_beat_features.csv'.format(key)).values
        notes = pd.read_csv('data/{0}_notes.csv'.format(key), converters={'0': lambda x: str(x)}).values
        beat_features = np.flipud(np.rot90(np.array(beat_features_rotated)))
        num_notes = min(len(notes), len(beat_features))
        for i in range(num_notes):
            row_y = get_class_for_index(notes, i)
            if is_full or (not (row_y == 0 and random.randint(0, 20) != 0) and not (row_y == 1 and random.randint(0, 3) != 0)):
                features = [feature for j in range(samples_back_included) for feature in get_features_for_index(beat_features, notes, i - j)]
                features.append(i % 48)
                features.append(get_beat_importance(i))
                features.append(i / 48)
                features.append(num_notes - i / 48)
                X.append(features)
                y.append(row_y)
    return np.array(X), np.array(y)

def build_batch_generator():
    songs_to_use = pd.read_csv('data/songs_to_use.csv').values
    for song_data in songs_to_use:
        yield (get_features_for_song(song_data[0]))

# Total 243 songs
def build_training_data(songs_start, songs_end, is_full = False):
    X = []
    y = []
    songs_to_use = pd.read_csv('data/songs_to_use.csv').values
    for song_data in songs_to_use[songs_start:songs_end]:
        song_X, song_y = get_features_for_song(song_data[0], is_full)
        X.extend(song_X)
        y.extend(song_y)
    return X, y

In [133]:
# start with each input secion maps to one note
# train model for that (just list comprhension on noets for contains 1 maps to true)

# then move to bars eg section of 4 bars maps to output for each note
# error = probability of note being true vs was it really

# try bar + prev notes (home use weird dimensioned data?) to predict next notes

# try feeding in non structured data (bpm, position of time in song, song length, 
# things about feel of song (generated features))
# try using keras merge layer to add extra features

In [134]:
# make two separate classifiers, one for note importance, then another to take importance and previous notes/importance and 
# output the notes

# can either go back and include prev samples to get importance or atual notes

In [135]:
# Try building one network to output importance of beats, another to output song from that and prev notes
# Also start building sequentially not off of stepfile and test on new songs

In [136]:
X_train, y_train = build_training_data(0, 200)
X_test, y_test = build_training_data(200, 243)

In [114]:
X_train_full, y_train_full = build_training_data(0, 200, True)
X_test_full, y_test_full = build_training_data(200, 243, True)

In [115]:
print (len(X_train))
print (len(X_train_full))

43491
540031


In [21]:
#clf_rf = RandomForestClassifier(n_estimators=20)
#clf_rf.fit(X, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [137]:
beat_feature_model = Sequential()

beat_feature_model.add(Dense(500, input_dim=num_features_total, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('tanh'))
beat_feature_model.add(Dropout(0.5))

beat_feature_model.add(Dense(500, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('tanh'))
beat_feature_model.add(Dropout(0.5))

beat_feature_model.add(Dense(500, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('tanh'))
beat_feature_model.add(Dropout(0.5))

#beat_feature_model.add(Dense(1, init='uniform'))
beat_feature_model.add(Dense(num_classes, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('softmax'))

#beat_feature_model.compile(loss='mean_squared_error',
beat_feature_model.compile(loss='categorical_crossentropy',
                           optimizer='adadelta',
                           metrics=['accuracy'])

In [138]:
y_one_hot = np.zeros((len(y_train), num_classes))
y_one_hot[np.arange(len(y_train)), y_train] = 1

beat_feature_model.fit(np.array(X_train), np.array(y_one_hot), nb_epoch=10, batch_size=64) #, class_weight=class_weight)

#y_one_hot = np.zeros((len(y_test_full), num_classes))
#y_one_hot[np.arange(len(y_test_full)), y_test_full] = 1

#beat_feature_model.fit(np.array(X_test_full), np.array(y_one_hot), nb_epoch=10, batch_size=64) #, class_weight=class_weight)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x12d2beba8>

In [187]:
def calculate_importance(row):
    return (1 - row[0]) * (row[1] + row[2] * 2 + row[3] * 30 + row[4] * 40)

In [190]:
def step_song(key, clf):
    song_X, song_y = get_features_for_song(key, True)
    new_song_y = clf.predict(song_X)
    beat_importance = [calculate_importance(row) for row in new_song_y]
    
    #print ('Length: ' + str(len(new_song_y)))
    #plt.plot([new_song_y[i] for i in range(len(new_song_y)) if i % 12 == 0])
    #plt.show()
    
    pd.DataFrame(beat_importance).to_csv('generated_data/{0}_importance_generated.csv'.format(key), index=False)

In [193]:
songs_to_use = pd.read_csv('data/songs_to_use.csv').values
for song_data in songs_to_use:
    try:
        step_song(song_data[0], beat_feature_model)
    except:
        print ('\nError loading song')
        print (song_data[0])


Error loading song
In The Groove~I Think I Like That Sound

Error loading song
In The Groove~Remember December

Error loading song
In The Groove~Torn

Error loading song
In The Groove~Walking on Fire

Error loading song
In The Groove 2~!

Error loading song
In The Groove 2~Fleadh Uncut

Error loading song
In The Groove 2~Hardcore Symphony

Error loading song
In The Groove 2~Holy Guacamole

Error loading song
In The Groove 2~Reactor

Error loading song
In The Groove 3~DJ Superstar

Error loading song
In The Groove 3~Land of Imagination

Error loading song
In The Groove 3~Online

Error loading song
In The Groove 3~Partyman

Error loading song
In The Groove Rebirth~Space Space Shooter

Error loading song
In The Groove Rebirth +~Roppongi Carillon

Error loading song
In The Groove Rebirth 2 (BETA)~Death From Above

Error loading song
In The Groove Rebirth 2 (BETA)~Elder God Shrine

Error loading song
In The Groove Rebirth 2 (BETA)~La Samba de la Vida

Error loading song
In The Groove Rebir