In [7]:
from hmmlearn import hmm
import pandas as pd
import os
import numpy as np
import pickle

In [84]:
def get_data(filename):
    X = []
    lengths = []
    file = pd.read_csv(filename, names=["score1","score2","pm", "level", "pitch_right", "note_hold_time_right",
                                         "timing_right", "n_missing_notes_right", "n_extra_notes_right",
                                         "summed_right", "pitch_left", "note_hold_time_left",
                                         "timing_left", "n_missing_notes_left", "n_extra_notes_left",
                                         "summed_left" ])
    pm = "Nothin"
    level = None
    pointer = -1
    for i, row in file.iterrows():
        if pointer >= i:
            continue
        elif i < 3:
            continue
        elif pm == row["pm"] and level == row["level"]:
            X, lengths, pointer = get_sequence(file, i-1, X, lengths, pm, level)
        else:
            pm = row["pm"]
            level = row["level"]
    return X, lengths

In [81]:
def get_sequence(file, index, X, lengths, pm, level):
    #print(file.loc[3])
    i = index
    row = file.loc[i]
    while pm == row["pm"] and level == row["level"]:
        X.append(list(row[4:]))
        i += 1
        if i != len(file):
            row = file.loc[i]
        else:
            break
    lengths.append(i - index)
    return X, lengths, i-1

In [64]:
def findBestHMM(filename):
    logProbMax = 0
    modelBest = None
    PBest = None
    for i in range(10):
        try:
            model, mus, sigmas, P, logProb, convergence = fitHMM(X, lengths)
            if logProb > logProbMax and convergence.converged == True: #wrong do not use -> all not just highest score
                logProbMax = logProb
                modelBest = model
                PBest = P 
        except:
            continue
    with open(filename + ".pkl", "wb") as file: pickle.dump(modelBest, file)
    return modelBest, logProbMax, PBest

In [66]:
def fitHMM(X, lengths):
    # fit Gaussian HMM to Q
    model = hmm.GaussianHMM(n_components=3, n_iter=1000, init_params="cm")
    
    model.startprob_ = np.array([0.0, 1.0, 0.0]) # same initial state
    model.transmat_ = np.array([[0.8, 0.2, 0.0], # initial prob matrix, important is 0 in (1,3) (3,1) (3,2)
                                [0.05, 0.8, 0.15],
                                [0.0, 0.0, 1.0]])
     
    model.fit(X, lengths)
    # classify each observation as state 0 or 1
    #hidden_states = model.predict(X)
 
    # find parameters of Gaussian HMM
    convergence = model.monitor_
    mus = np.array(model.means_)
    sigmas = np.array(np.sqrt(np.array([np.diag(model.covars_[0]),np.diag(model.covars_[1])])))
    P = np.array(model.transmat_)
 
    # find log-likelihood of Gaussian HMM
    logProb = model.score(X, lengths)
 
    return model, mus, sigmas, P, logProb, convergence #hidden_states

In [91]:
lengths = []
X = [] #, "data4"
for filename in ["data1", "data2", "data3", "data5", "data6", "data7", "data8", "data9", "data10", "data11", "data12"]:
    X, lengths = get_data("Data_Anonym" + "/" + filename + ".csv")
    model, mus, sigmas, P, logProb, convergence = fitHMM(X, lengths) #directory equals practice mode
    #model, logProb, P = findBestHMM(directory)
    print("/n", filename)
    print("P", P)
    print("logProb", logProb)
    #print("means", mus)
    print("convergences", convergence)
    print("")
    hidden_state = model.predict(X, lengths)
    index = 0
    for length in lengths:
        print(hidden_state[index:index+length])
        index += length
    #with open(filename + "_1.pkl", "wb") as file: pickle.dump(model, file)

/n data1
P [[1.00000000e+00 4.55201231e-31 0.00000000e+00]
 [1.71428571e-01 6.57142857e-01 1.71428571e-01]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
logProb 876.6674940642266
convergences ConvergenceMonitor(
    history=[47.10721970628415, 797.0604737983817, 876.6674936644526, 876.6674940642268],
    iter=4,
    n_iter=1000,
    tol=0.01,
    verbose=False,
)

[1 2 2 2 2]
[1 2 2 2 2 2 2 2]
[1 0 0 0 0 0]
[1 0 0 0 0 0]
[1 0 0]
[1 2]
[1 2 2]
[1 2 2 2]
[1 0 0 0]
[1 0 0]
[1 2 2 2 2 2]
[1 0 0]
[1 1 1 1 1 1 1 1 1 1 1 1]
[1 1 1 1 1 1 1 1 1 1 1 1 1]
/n data2
P [[1.08618810e-55 1.00000000e+00 0.00000000e+00]
 [4.13658917e-62 8.78787879e-01 1.21212121e-01]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
logProb 1149.2116505795314
convergences ConvergenceMonitor(
    history=[515.429776990216, 1117.3807582606964, 1149.2116505792392, 1149.2116505795314],
    iter=4,
    n_iter=1000,
    tol=0.01,
    verbose=False,
)

[1 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
[1 2 2 2 2 2 2 2]
[1 2 2 2 2 2 2 2 2]
[1 

In [334]:
with open("identity5.pkl", "wb") as file: pickle.dump(model, file)
    

In [25]:
directory = "left hand"
with open("BestThreeStates/" + directory + "/" + directory + "4.pkl", "rb") as file:
            model = pickle.load(file)
print("Transition matrix")
print(model.transmat_)

#print("Model means")
#print(model.means_)
mus = model.means_

state0 = mus[0][5] + mus[0][11]
state1 = mus[1][5] + mus[1][11]
state2 = mus[2][5] + mus[2][11]

print([state0, state1, state2])

max_error = np.argmax([state0, state1, state2])
min_error = np.argmin([state0, state1, state2])

new_max, new_min = new_error_comp(mus)

print("state with min_error:", min_error)
print("state with max error:", max_error)

print("state with new min_error:", new_min)
print("state with new max error:", new_max)
#means
#covariance
#transition
indizes = []


if new_min != 2:
    if new_min == 0:
        indizes = [2,1,0]
        model.transmat_ = swap_row(swap_column(model.transmat_, indizes), indizes)
        model.means_ = swap_row(model.means_, indizes)
    elif new_min == 1:
        print("Wow, hier läuft was schief! Das Model sollte raus")
"""
indizes = [2,1,0]
model.transmat_ = swap_row(swap_column(model.transmat_, indizes), indizes)
model.means_ = swap_row(model.means_, indizes)
"""
print("Transition matrix after")
print(model.transmat_)

        
with open("left hand4_new.pkl", "wb") as file: pickle.dump(model, file)


Transition matrix
[[7.52262820e-33 1.00000000e+00 0.00000000e+00]
 [1.87501749e-01 7.49995673e-01 6.25025781e-02]
 [0.00000000e+00 0.00000000e+00 1.00000000e+00]]
[1.9520265746498267, 1.0598242347352753, 1.1825672926018316]
min 3 -2 -1
state with min_error: 1
state with max error: 0
state with new min_error: 1
state with new max error: 0
Transition matrix after
[[1.00000000e+00 0.00000000e+00 0.00000000e+00]
 [6.25025781e-02 7.49995673e-01 1.87501749e-01]
 [0.00000000e+00 1.00000000e+00 7.52262820e-33]]


In [12]:
def new_error_comp(mus):
    state0 = state1 = state2 = 0
    for i in range(len(mus[0])):
        emax = np.argmax([mus[0][i], mus[1][i], mus[2][i]])
        emin = np.argmin([mus[0][i], mus[1][i], mus[2][i]])
        if emax == 0:
            state0 += 1
        elif emax == 1:
            state1 += 1
        elif emax == 2:
            state2 += 1
        if emin == 0:
            state0 -= 1
        elif emin == 1:
            state1 -= 1
        elif emin == 2:
            state2 -= 1
    new_max = np.argmax([state0, state1, state2])
    new_min = np.argmin([state0, state1, state2])
    print("min", state0, state1, state2)
    return new_max, new_min

In [6]:
def swap_column(arr, indizes):
    arr[:, [0,1,2]] = arr[:, indizes]
    return arr

In [7]:
def swap_row(arr, indizes):
    arr[[0,1,2]] = arr[indizes]
    return arr

In [None]:
"""
if new_min == 0 and new_max == 2:
    # 0 -> 2, 2-> 0
    indizes = [2,1,0]
elif new_min == 0 and new_max == 1:
    # 0 -> 2, 1-> 0, 2 -> 1
    indizes = [2,0,1]
elif new_min == 1 and new_max == 2:
    # 0 -> 1, 1 -> 2, 2 -> 0
    indizes = [1,2,0]
elif new_min == 1 and new_max == 0:
    # 1 -> 2, 2 -> 1
    indizes = [0,2,1]
elif new_min == 2 and new_max == 1:
    # 1 -> 0, 0 -> 1
    indizes = [1,0,2]
else:
    indizes = [0,1,2]
model.transmat_ = swap_row(swap_column(model.transmat_, indizes), indizes)
model.means_ = swap_row(model.means_, indizes)
#model.covars_ = swap_row(model.covars_, indizes)

print(model.transmat_)
#wit