In [1]:
from kp_data_loader import KPDataLoader, sequence
from sklearn.model_selection import KFold, LeaveOneOut
import logging
import sys
import numpy as np

np.random.seed(42)

logger = logging.getLogger()
TONIC = 1
TRAIN_TEST_RATIO = 0.5

def cross_val(XX, Y, n=10):
    """
    n : n-crossvalidation
    """

    L = len(XX)
    kf = KFold(n_splits=n)

    models = []
    scores = []

    c = 0
    
    first_note_scores = []
    tonic_scores = []

    for c, (train_indexes, val_indexes) in enumerate(kf.split(XX)):

        logger.debug("On Fold " + str(c))

        xx_train = []
        y_train = []
        xx_val = []
        y_val = []
        
        for i in train_indexes:
            xx_train.append(XX[i][:])
            y_train.append(Y[i][:])
        X_train, Y_train, L_train = sequence(xx_train, y_train)
        for j in val_indexes:
            xx_val.append(XX[j][:])
            y_val.append(Y[j][:])
        X_val, Y_val, L_val = sequence(xx_val, y_val)

        
        logger.info("First Note Baseline ...")
        first_note_baseline_prediction = guess_first_note(X_val, Y_val, L_val)
        assert len(first_note_baseline_prediction) == len(Y_val)
        first_note_baseline_accuracy = get_accuracy(first_note_baseline_prediction, Y_val)
        logger.info("Accuracy: {0}".format(first_note_baseline_accuracy))
        first_note_scores.append(first_note_baseline_accuracy)

        logger.info("Tonic Baseline ...")
        tonic_baseline_prediction = guess_tonic(Y_val)
        assert len(tonic_baseline_prediction) == len(Y_val)
        tonic_baseline_accuracy = get_accuracy(tonic_baseline_prediction, Y_val)
        logger.info("Accuracy: {0}".format(tonic_baseline_accuracy))
        tonic_scores.append(tonic_baseline_accuracy)

    return tonic_scores, first_note_scores

def make_chords_per_note(XX, Y):
    YY = []
    for i, y in enumerate(Y):
        YY_i = []
        for j, frame in enumerate(XX[i]):
            for k, note in enumerate(frame):
                YY_i.append(y[j])
        YY.append(YY_i)
    return YY

def get_rid_of_XX_frames(XX):
    output = []
    for X in XX:
        output_i = []
        for x in X:
            output_i += x
        output.append(output_i)
    return output

def guess_first_note(X, Y, L):
    song_position = 0
    chord_guess = None
    prediction = []
    for l in L:
        for i in range(l):
            index = song_position + i
            if i == 0:
                chord_guess = X[index] + 1
            prediction.append(chord_guess)
        song_position += l
    return prediction

def baseline_model_tonic(XX):
    full_guess = []
    for i, X in enumerate(XX):
        guess_i = []
        for j, x in enumerate(X):
            guess_i.append(TONIC)
        full_guess.append(guess_i)
    return full_guess

def guess_tonic(Y):
    guess = []
    for y in Y:
        guess.append(TONIC)
    return guess

def get_accuracy(prediction, Y):
    total_count = 0
    num_correct = 0
    for i, y in enumerate(Y):
        total_count += 1
        guess = prediction[i]
        if guess == y:
            num_correct += 1
    return float(num_correct) / float(total_count)

logger.info("Getting Data ... ")
# Get data again just in case I messed up somewhere
loader = KPDataLoader()
for i in range(1,46):
    loader.load_file('ex{0}a.mid.csv'.format(i))
XX, Y = loader.get_XX_and_Y()
YY = make_chords_per_note(XX, Y)
XX = get_rid_of_XX_frames(XX)

n = len(XX)
perm = np.random.permutation(n)
XX = np.asarray(XX)
YY = np.asarray(YY)
XX = XX[perm]
YY = YY[perm]

tonic_scores, first_note_scores = cross_val(XX,YY,n=10)
print "TONIC: Mean: ", np.mean(tonic_scores), " std: ", np.std(tonic_scores)
print "FIRST NOTE: Mean: ", np.mean(first_note_scores), " std: ", np.std(first_note_scores)

# -----------------------------------------------

# n = len(XX)
# j = int(n - (float(n) * TRAIN_TEST_RATIO))

# XX_train = XX[0:j]
# YY_train = YY[0:j]

# XX_test = XX[j:n]
# YY_test = YY[j:n]

# -----------------------------------------------


Getting Data ... 
On Fold 0
First Note Baseline ...
Accuracy: 0.265323992995
Tonic Baseline ...
Accuracy: 0.344133099825
On Fold 1
First Note Baseline ...
Accuracy: 0.120822622108
Tonic Baseline ...
Accuracy: 0.368894601542
On Fold 2
First Note Baseline ...
Accuracy: 0.0528523489933
Tonic Baseline ...
Accuracy: 0.240771812081
On Fold 3
First Note Baseline ...
Accuracy: 0.144873000941
Tonic Baseline ...
Accuracy: 0.355597365945
On Fold 4
First Note Baseline ...
Accuracy: 0.144648023144
Tonic Baseline ...
Accuracy: 0.239151398264
On Fold 5
First Note Baseline ...
Accuracy: 0.199696279423
Tonic Baseline ...
Accuracy: 0.297646165528
On Fold 6
First Note Baseline ...
Accuracy: 0.188235294118
Tonic Baseline ...
Accuracy: 0.478823529412
On Fold 7
First Note Baseline ...
Accuracy: 0.111344537815
Tonic Baseline ...
Accuracy: 0.300420168067
On Fold 8
First Note Baseline ...
Accuracy: 0.207877461707
Tonic Baseline ...
Accuracy: 0.253829321663
On Fold 9
First Note Baseline ...
Accuracy: 0.40977443