# Demo for data_prep, hmm_training, classifier, and hmm_test

In [1]:
import numpy as np
import pandas as pd
import scipy.stats
from matplotlib import pyplot as plt

from CharacterFeatureExtractor import featureExtractor
from dataprep import *
from modeltrain import *
from classifier import *
from hmm_test import *
from DrawCharacter import DrawCharacter
from PattRecClasses import HMM_TA
from hmm_gen import hmm_gen


In [2]:
'''
Multivariate Gaussian Distribution Class
'''
class multigaussD:
    mean = np.array([0])
    cov = np.array([[0]])
    def __init__(self, mu, C):
        if C.shape[0] is not C.shape[1]:
            print("error, non-square covariance matrix supplied")
            return
        if mu.shape[0] is not C.shape[0]:
            print("error, mismatched mean vector and covariance matrix dimensions")
            return
        self.mean = mu
        if np.where(np.diag(C)==0)[0].shape[0] != 0:
            C += np.diagflat(np.ones(C.shape[0])/10000)
        C[np.isnan(C)]=1
        self.cov = C
        return
    def random(self, num):
        return np.random.multivariate_normal(self.mean, self.cov, num)
    def rand(self):
        return np.random.multivariate_normal(self.mean, self.cov, 1)[0]
    def likelihood(self, X):
        p = scipy.stats.multivariate_normal(self.mean, self.cov, 1)
        pd = p.pdf(X)
        return pd
    def loghood(self, X):
        return np.log(self.likelihood(X))
    def getmean(self):
        return self.mean
    def getcov(self):
        return self.cov
    
def prob(x, B):
    T = x.shape[0]
    N = B.shape[0]
    res = np.zeros((T, N))
    for i in range(T):
        for j in range(N):
            res[i,j] = B[j].likelihood(x[i])
    scaled = np.zeros(res.shape)
    for i in range(scaled.shape[0]):
        for j in range(scaled.shape[1]):
            scaled[i, j] = res[i,j]/np.amax(res[i])
    return res, scaled


def logprob(x, B):
    res, scaled = prob(x,B)
    return np.log(res), np.log(scaled)

## Training Model

In [3]:
### data prep
db_name = "database_inc_sampchar"
data_features = pd.read_pickle(r'data/' + db_name + '_features.cdb')
data_labels = pd.read_pickle(r'data/' + db_name + '_labels.cdb')

train_data, test_data, data_labels = dataprep(db_name, 3)
hm_learn, train_acc = modeltrain(train_data, data_labels, 15, useprint=False)
accuracies, result_labels_list = hmm_test(hm_learn,test_data, data_labels)
print(result_labels_list)

Database read is  database_inc_sampchar
Labels used are  ['A', 'C', 'K', 'P', 'X', 'T', '+', 'N', 'V', '4']

 ------------ CHARACTER  A ------------
Estimated q:
[1. 0.]

Estimated A:
[[9.53804348e-01 4.61956522e-02]
 [4.45416669e-81 1.00000000e+00]]

Estimated means:
[[ 9.91496477  0.86449336]
 [38.11081994 -8.91460582]]

Estimated covariances:
[[[2.52530691e+00 1.88225160e+01]
  [1.88225160e+01 4.03542983e+03]]

 [[2.33121006e+01 1.22562973e+01]
  [1.22562973e+01 2.27714332e+02]]]
Avg probability over test samples is 0.017397867405393678

 ------------ CHARACTER  C ------------
Estimated q:
[0.0583938  0.00429714 0.93730906]

Estimated A:
[[2.85104195e-01 2.24533551e-01 4.90362253e-01]
 [7.74904428e-37 8.43649172e-01 1.56350828e-01]
 [1.71444318e-01 7.04397023e-02 7.58115980e-01]]

Estimated means:
[[  9.72974291  -1.84707193]
 [  9.70084982  60.38625694]
 [  9.88151498 -24.44799365]]

Estimated covariances:
[[[1.84108406e-01 1.23710763e+00]
  [1.23710763e+00 8.31304052e+00]]

 [[1.3

In [4]:
print(train_acc)

[-4.05140764318151, -12.597559496959414, -6.333355178918753, -6.874496387306048, -3.3621443234257455, -12.096625414194245, -3.316778915659423, -11.2003590447638, -8.629638330384225, -9.044346176121985]
(2, 15)
