In [105]:
from MultinomialHMM import MultinomialHMM
from pickle import load
from data import get_meta_path, get_track_path
from meta import Meta
import pandas as pd
import numpy as np
from seqlearn.evaluation import SequenceKFold, whole_sequence_accuracy

In [40]:
output_dir = '../dataset'
meta = Meta.load(output_dir)
track_ids = meta.track_ids
np.shape(track_ids)

(100,)

In [39]:
def get_analysis(output_dir, track_id):
    path = get_track_path(output_dir, track_id)
    with open(path, 'rb') as f:
        return load(f)
analyses = [get_analysis(output_dir, track_id) for track_id in track_ids]
np.shape(analyses)

(100,)

In [61]:
print(analyses[0].keys())
analyses[0]['segments'][0]

dict_keys(['id', 'segments', 'duration', 'key'])


{'start': 0.0,
 'duration': 0.11279,
 'confidence': 0.0,
 'pitches': [1.0,
  0.707,
  0.374,
  0.391,
  0.39,
  0.403,
  0.306,
  0.27,
  0.138,
  0.116,
  0.309,
  0.101]}

In [117]:
map_segments = lambda analysis: [
    np.roll(segment['pitches'], -analysis['key']) for segment in analysis['segments']
]
mapped_segments = [map_segments(analysis) for analysis in analyses]
np.shape(mapped_segments)

(100,)

In [118]:
segments_per_track = [len(segment) for segment in mapped_segments]
np.shape(segments_per_track)

(100,)

In [119]:
arrays = [[[0.2, 0.3], [0.4, 0.5]], [[0.55, 0.11], [0.23, 0.11]]]
np.concatenate(arrays)

array([[0.2 , 0.3 ],
       [0.4 , 0.5 ],
       [0.55, 0.11],
       [0.23, 0.11]])

In [120]:
segments = np.concatenate(mapped_segments)
np.shape(segments)

(94196, 12)

In [121]:
keys = [analysis['key'] for analysis in analyses]
np.shape(keys)

(100,)

### Fit and predict using `MultinomialHMM`

In [122]:
hmm = MultinomialHMM(decode="viterbi", alpha=.01) # default params
hmm

MultinomialHMM(alpha=0.01, decode='viterbi')

In [123]:
X = segments
y = np.repeat(keys, lengths)
lengths = segments_per_track

In [124]:
kfold = SequenceKFold(
    lengths,
    n_folds=3, shuffle=True
)
kfold

<seqlearn.evaluation.SequenceKFold at 0x7fe8176f7f98>

In [134]:
folds = list(iter(kfold))
for train, lengths_train, test, lengths_test in folds:

    model = hmm.fit(X[train], y[train], lengths_train)
    y_pred = model.predict(X[test], lengths=lengths_test)
    y_true = y[test]
    acc = whole_sequence_accuracy(y_true, y_pred, lengths_test)
    print(f'3-fold accuracy: {acc}')

3-fold accuracy: 0.12903225806451613
3-fold accuracy: 0.11764705882352944
3-fold accuracy: 0.05714285714285716
