In [1]:
from data_loader_no_t import DataLoader
import math
import numpy as np
from seqlearn import hmm, perceptron, evaluation
import logging
import sys

rs = np.random.RandomState(42)

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)

TRAINING_RATIO = 0.5

logger.info('Getting data ...')
dl = DataLoader()
X, Y, L = dl.load()
logger.info('Data Loaded.')

kf = evaluation.SequenceKFold(L, n_folds=10, shuffle=True, random_state=rs)

accuracies = []

for train_indices, train_lengths, test_indices, test_lengths in kf:

    # Number of songs
    N = len(L)
    l_split_point = int(math.ceil(N * TRAINING_RATIO))
    xy_split_point = np.sum(L[:(l_split_point)])

    X_train = X[train_indices]
    Y_train = Y[train_indices]
    L_train = train_lengths

    len_train = np.sum(L_train)
    len_X_train = len(X_train)
    len_Y_train = len(Y_train)
    assert len_train == len_X_train, "sum of training lengths: {0} not equal to len(X_train): {1}".format(len_train, len_X_train)
    assert len_train == len_Y_train, "sum of training lengths: {0} not equal to len(Y_train): {1}".format(len_train, len_Y_train)

    X_test = X[test_indices]
    Y_test = Y[test_indices]
    L_test = test_lengths

    len_test = np.sum(L_test)
    len_X_test = len(X_test)
    len_Y_test = len(Y_test)
    assert len_test == len_X_test, "sum of testing lengths: {0} not equal to len(X_test): {1}".format(len_test, len_X_test)
    assert len_test == len_Y_test, "sum of testing lengths: {0} not equal to len(Y_test): {1}".format(len_test, len_Y_test)

    logger.info('Building HMM model ...')
    model = hmm.MultinomialHMM()
    logger.info('Training model ...')
    model.fit(X_train, Y_train, L_train)
    logger.info('Testing model ...')
    accuracy = model.score(X_test, Y_test, L_test)
    logger.info("Accuracy: {0}".format(accuracy))
    # for i in model.predict(X_test):
    #     print "{0} ".format(i),
    
    accuracies.append(accuracy)

    #     logger.info('Building model ...')
    #     model = perceptron.StructuredPerceptron()
    #     logger.info('Training model ...')
    #     model.fit(X_train, Y_train, L_train)
    #     logger.info('Testing model ...')
    #     accuracy = model.score(X_test, Y_test, L_test)
    #     logger.info("Accuracy: {0}".format(accuracy))
    #     # for i in model.predict(X_test):
    #     #     print "{0} ".format(i),
    
np.mean(accuracies)

Getting data ...
Data Loaded.
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.268112760165
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.343388284812
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.311880746229
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.465938360423
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.454481173767
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.405800673733
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.295277679519
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.315352697095
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.36066424494
Building HMM model ...
Training model ...
Testing model ...
Accuracy: 0.392165531877


0.36130621525605139