In [1]:
import os
import pickle
import pandas as pd
import numpy as np

from dataprep import dataprep
from modeltrain import modeltrain
from hmm_test import hmm_test

In [2]:
# Load in master_list data from saved run and choose HMM models correspondingly

# master_list = [cv_train_acc] + [cv_acc] + [hmm_list] + [train_list] + [test_list]

filename = 'master_list50'

master_list = pd.read_pickle(r'master_list50')

# Choose metric to extract the best HMM model based on
metric = master_list[0]
 

best = np.zeros([2,10]) - 50  # make sure that it finds the extreme values
itr = len(metric)
for i in range(itr):
    for k in range(10):
        if metric[i][k] > best[0,k]:
            best[0,k] = metric[i][k]  # best value
            best[1,k] = i  # index of currently best model

# Choose the hmm models corresponding to the metric
hmm_list = master_list[2]
hmm_best = []

for k in range(10):
    hmm_best += [hmm_list[int(best[1,2])][k]]
    
train_acc = best[0,:]
# In this case we are using training accuracy, so save out the chosen parameters s.t. they can be plotted with the results

In [3]:
print(best)



[[-4.02032571 -8.83562523 -5.39754642 -4.49541909 -3.30549836 -2.98376423
  -3.20041036 -7.04788349 -3.11322116 -3.96053459]
 [12.         29.         39.         34.         36.         13.
   9.         13.          9.         12.        ]]


In [4]:
# Load training and testing data:
useprint = True
train_data, test_data, labels = dataprep("database_inc_sampchar", useprint=useprint, shuffle=True, max_labels=0, max_samples=0, nr_test=15)

# Evaluate trained model:
acc, res_labl_list = hmm_test(hmm_best, test_data, labels)


Database read is  database_inc_sampchar
Labels used are  ['A', 'C', 'K', 'P', 'X', 'T', '+', 'N', 'V', '4']
Total training samples are  5  and testing samples are  15 

************* CLASSIFICATION RESULTS ************* 
Classification accuracy of test samples of character A is: 0.0%
Classification accuracy of test samples of character C is: 0.0%
Classification accuracy of test samples of character K is: 73.33333333333333%
Classification accuracy of test samples of character P is: 40.0%
Classification accuracy of test samples of character X is: 0.0%
Classification accuracy of test samples of character T is: 60.0%
Classification accuracy of test samples of character + is: 66.66666666666666%
Classification accuracy of test samples of character N is: 0.0%
Classification accuracy of test samples of character V is: 100.0%
Classification accuracy of test samples of character 4 is: 20.0%


In [5]:
print("---------------- RESULTS: ---------------- ")
for k in range(len(labels)):
    print("\n Character: ", labels[k])
    print("Number of states: ", len(hmm_best[k].q))
    print("Test accuracy: ",acc[k], "and resulting labels: ",res_labl_list[k])
    print("Training accuracy: (log)", train_acc[k])

---------------- RESULTS: ---------------- 

 Character:  A
Number of states:  3
Test accuracy:  0.0 and resulting labels:  ['K', 'K', 'K', '4', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']
Training accuracy: (log) -4.0203257125453

 Character:  C
Number of states:  3
Test accuracy:  0.0 and resulting labels:  ['K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K', 'K']
Training accuracy: (log) -8.835625232092639

 Character:  K
Number of states:  3
Test accuracy:  0.7333333333333333 and resulting labels:  ['K', 'K', 'P', 'K', 'K', 'K', 'K', 'K', 'V', 'K', 'K', 'P', 'K', 'K', 'P']
Training accuracy: (log) -5.397546417128217

 Character:  P
Number of states:  4
Test accuracy:  0.4 and resulting labels:  ['K', 'K', 'K', 'P', 'K', 'P', 'K', 'P', 'P', 'K', 'K', 'P', 'K', 'K', 'P']
Training accuracy: (log) -4.4954190897610244

 Character:  X
Number of states:  4
Test accuracy:  0.0 and resulting labels:  ['V', 'V', 'V', 'V', 'T', '+', 'V', 'V', 'V', 'V', 'V', 'V', '