In [29]:
from __future__ import print_function
%matplotlib inline
import copy
import pandas as pd
import numpy as np
import librosa
import seaborn as sb
import matplotlib.pyplot as plt
import itertools
import re
import random
import gc
from os import listdir
from os.path import isfile, join
from numpy import median, diff
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier

In [30]:
from __future__ import print_function
import pandas as pd
import os

class SongFile:
    def __init__(self, key, folder, stepfile, music_file):
        misc = pd.read_csv('data/{0}_misc.csv'.format(key)).values
        raw_notes = pd.read_csv('data/{0}_notes_generated.csv'.format(key), converters={'0': lambda x: str(x)}).values
        notes = [row[0] for row in raw_notes]
        self.folder = folder
        self.name = key.split('~')[1]
        self.music_name = music_file
        self.stepfile_name = stepfile
        self.offset = misc[0][0]
        self.beat_length = 60. / misc[1][0]
        self.bpm = misc[1][0]
        self.notes = notes
        self.extension = music_file.split('.')[1]

def write_song_header(output_stepfile, song):
    keys = ['TITLE', 'MUSIC', 'OFFSET', 'SAMPLESTART', 'SAMPLELENGTH', 'SELECTABLE', 'BPMS']
    header_info = {
        'TITLE': song.name,
        'MUSIC': '{0}.{1}'.format(song.name, song.extension),
        'OFFSET': -song.offset,
        'SAMPLESTART': song.offset + 32 * song.beat_length,
        'SAMPLELENGTH': 32 * song.beat_length,
        'SELECTABLE': 'YES',
        'BPMS': '0.000={:.3f}'.format(song.bpm)
    }
    
    for key in keys:
        print ("#{0}:{1};".format(key, str(header_info[key])), file=output_stepfile)
        
def write_step_header(output_stepfile, song):
    print("\n//---------------dance-single - J. Zukewich----------------", file=output_stepfile)
    print ("#NOTES:", file=output_stepfile)
    for detail in ['dance-single', 'J. Zukewich', 'Expert', '9', '0.242,0.312,0.204,0.000,0.000']:
        print ('\t{0}:'.format(detail), file=output_stepfile)
    
    for i in range(len(song.notes)):
        row = song.notes[i]
        print (row, file=output_stepfile)
        if i % 48 == 48 - 1:
            print (",", file=output_stepfile)

    print ("0000;", file=output_stepfile)
    
def write_song_steps(song):
    if song.name + '.sm' in os.listdir(song.folder) and not song.name + '.sm.backup' in os.listdir(song.folder):
        os.rename(song.stepfile_name, song.stepfile_name + '.backup')
            
    output_stepfile=open(song.stepfile_name, 'w')
    write_song_header(output_stepfile, song)
    write_step_header(output_stepfile, song)
    output_stepfile.close()

def write_song_steps_by_key(key):
    pack, song = key.split('~')
    folder = 'StepMania/Songs/{0}/{1}/'.format(pack, song)
    stepfile = folder + '/{0}.sm'.format(song)
    music = folder + [file for file in listdir(folder) if file.endswith('.ogg') or file.endswith('.mp3')][0]

    write_song_steps(SongFile(key, folder, stepfile, music))

In [31]:
samples_back_included = 8
num_classes = 5
num_features = 40 + 2
num_features_total = (num_features * samples_back_included) + 3
save_files = listdir('data')

def get_features_for_index(beat_features, notes, index):
    if index < 0:
        return [0] * num_features
    return np.concatenate((beat_features[index], get_steps_for_index(notes, index - 1)))

def get_steps_for_index(notes, index):
    if index < 0:
        return (0, 0)
    row = notes[index][0]
    return [row.count('1'), row.count('M')]

def get_class_for_index(notes, index):
    steps, mines = get_steps_for_index(notes, index)
    if mines > 0:
        return 4
    return min(steps, 3)
    
importance_rankings = [48, 24, 12, 16, 6, 8, 3, 4, 2, 1]
def get_beat_importance(index):
    for i in range(len(importance_rankings)):
        if index % importance_rankings[i] == 0:
            return i

def get_full_features_for_song(key):
    X = []
    y = []
    if '{0}_beat_features.csv'.format(key) in save_files and '{0}_notes.csv'.format(key) in save_files:
        beat_features_rotated = pd.read_csv('data/{0}_beat_features.csv'.format(key)).values
        notes = pd.read_csv('data/{0}_notes.csv'.format(key), converters={'0': lambda x: str(x)}).values
        beat_features = np.flipud(np.rot90(np.array(beat_features_rotated)))
        for i in range(min(len(notes), len(beat_features))):
            features = [feature for j in range(samples_back_included) for feature in get_features_for_index(beat_features, notes, i - (j*3))]
            features.append(i % 48)
            features.append(get_beat_importance(i))
            features.append(i / 48)
            X.append(features)
            y.append(get_class_for_index(notes, i))
    return np.array(X), np.array(y)

def get_features_for_song(key):
    X = []
    y = []
    if '{0}_beat_features.csv'.format(key) in save_files and '{0}_notes.csv'.format(key) in save_files:
        beat_features_rotated = pd.read_csv('data/{0}_beat_features.csv'.format(key)).values
        notes = pd.read_csv('data/{0}_notes.csv'.format(key), converters={'0': lambda x: str(x)}).values
        beat_features = np.flipud(np.rot90(np.array(beat_features_rotated)))
        for i in range(min(len(notes), len(beat_features))):
            row_y = get_class_for_index(notes, i)
            if not (row_y == 0 and random.randint(0, 35) != 0) and not (row_y == 1 and random.randint(0, 5) != 0):
                features = [feature for j in range(samples_back_included) for feature in get_features_for_index(beat_features, notes, i - j)]
                features.append(i % 48)
                features.append(get_beat_importance(i))
                features.append(i / 48)
                X.append(features)
                y.append(row_y)
    return np.array(X), np.array(y)

def build_batch_generator():
    songs_to_use = pd.read_csv('data/songs_to_use.csv').values
    for song_data in songs_to_use:
        yield (get_features_for_song(song_data[0]))

# Total 243 songs
def build_training_data(songs_start, songs_end):
    X = []
    y = []
    songs_to_use = pd.read_csv('data/songs_to_use.csv').values
    for song_data in songs_to_use[songs_start:songs_end]:
        song_X, song_y = get_features_for_song(song_data[0])
        X.extend(song_X)
        y.extend(song_y)
    return X, y

In [32]:
outputs = ['0000', '1000', '1100', '1111', 'MMMM']
def prediction_to_output_class(row):
    return outputs[row]

def prediction_to_output_one_hot(row):
    return outputs[np.argmax(row)]

def step_song(key, clf, prediction_to_output):
    song_X, song_y = get_full_features_for_song(key)
    new_song_y = clf.predict(song_X)
    new_song_output = [prediction_to_output(row) for row in new_song_y]
    
    #print ('Length: ' + str(len(new_song_y)))
    #plt.plot([new_song_y[i] for i in range(len(new_song_y)) if i % 12 == 0])
    #plt.show()
    
    pd.DataFrame(new_song_output).to_csv('data/{0}_notes_generated.csv'.format(key), index=False)
    write_song_steps_by_key(key)

In [33]:
# start with each input secion maps to one note
# train model for that (just list comprhension on noets for contains 1 maps to true)

# then move to bars eg section of 4 bars maps to output for each note
# error = probability of note being true vs was it really

# try bar + prev notes (home use weird dimensioned data?) to predict next notes

# try feeding in non structured data (bpm, position of time in song, song length, 
# things about feel of song (generated features))
# try using keras merge layer to add extra features

In [34]:
X_train, y_train = build_training_data(0, 200)
X_test, y_test = build_training_data(200, 243)

In [21]:
clf_rf = RandomForestClassifier(n_estimators=20)
clf_rf.fit(X, y)

#for song in ['Anubis', 'Bend Your Mind', 'Boogie Down', 'Bouff', 'Bubble Dancer']:
#    step_song('In The Groove~{0}'.format(song), clf_rf, prediction_to_output_class)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=20, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [36]:
beat_feature_model = Sequential()

beat_feature_model.add(Dense(500, input_dim=num_features_total, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('tanh'))
beat_feature_model.add(Dropout(0.3))

beat_feature_model.add(Dense(500, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('tanh'))
beat_feature_model.add(Dropout(0.3))

beat_feature_model.add(Dense(num_classes, init='uniform'))
beat_feature_model.add(BatchNormalization())
beat_feature_model.add(Activation('softmax'))


beat_feature_model.compile(loss='categorical_crossentropy',
                           optimizer='adadelta',
                           metrics=['accuracy'])

In [37]:
y_one_hot = np.zeros((len(y_train), num_classes))
y_one_hot[np.arange(len(y_train)), y_train] = 1

beat_feature_model.fit(np.array(X_train), y_one_hot, nb_epoch=10, batch_size=64) #, class_weight=class_weight)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11b9a5b38>

In [39]:
y_one_hot = np.zeros((len(y_test), num_classes))
y_one_hot[np.arange(len(y_test)), y_test] = 1

beat_feature_model.evaluate(np.array(X_test), y_one_hot, batch_size=16)



[0.92511076703810102, 0.63001912045889097]

In [40]:
step_song('In The Groove~Anubis', beat_feature_model, prediction_to_output_one_hot)