# Import Libraries

In [24]:
import numpy as np
import pandas as pd
from hmmlearn import hmm
import warnings
from constants import *
import math
import random
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# Helper Methods

In [25]:
def generate_random_sample(X, size):
    '''
    Given a list X, 
    generate random samples of given size
    '''
    Z_temp = random.sample(list(X), size)
    
    #Concatenation
    Z = [Z_temp[0]]
    for val in Z_temp[1:]:
        Z = np.concatenate([Z,[val]])
    
    return Z

def convert_values_to_list(list_val):
    '''
    Given a list X = [1 2 3] , 
    return X = [[1],[2],[3]]
    '''
    X = []
    for i in list_val:
        X.append([i])
    return X

def conversion_list_of_list(X, DIMENSION):
    '''
    Given a list X with values in lists,
    X = [[ 3  2  2],[4 8 10]]
    Convert each value to a list
    Return list of lists, array of lengths of each sequence
    X = [[3] [2] [2] [4] [8] [10]]
    ''' 
    X_new = []
    length = len(X)
    for idx, val_list in enumerate(X):
        Y = []
        for val in val_list:
            Y.append([val])
        X_new.append(Y)

    #Concatenation
    Z = X_new[0]
    for val_list in X_new[1:]:
        Z = np.concatenate([Z,val_list])

    # assign array of lengths for HMM
    lengths = [DIMENSION]*length
    
    return Z,lengths

#Calculate likelihood for given sequence according to given HMMs and return HMM
def likelihood_sequence(sequence, HMM_array):
    '''
    Given list of K HMMs and sequence,
    determines likelihood of sequence under all HMM models
    Returns index of HMM which has max likelihood
    ''' 
    scores = []
    length = [len(sequence)]
    for i, HMM in enumerate(HMM_array):
        calculated_score = HMM.score(sequence, length)
        scores.append(calculated_score)
    idx = scores.index(max(scores))
    return idx

def HMM_model_stats(model):
    '''
    Details of HMM model
    ''' 
    print("*************************************")
    print("Transition matrix")
    print(model.transmat_)
    print("*************************************")
    print("Means and stds of each hidden state")
    for i in range(model.n_components):
        print("Hidden state {0}".format(i))
        print("mean = ", model.means_[i])
        print("std = ", [math.sqrt(model.covars_[i])])
        print()

def BIC(HMM,X):
    LogLikelihood = model.score(X)
    num_hidden_states = model.n_components
    # D counts transition matrix, emission matrix, sequences estimated (Z), covariance matrix
    D = (num_hidden_states) + 2*(num_hidden_states**2) + len(X)*DIMENSION 
    BIC = LogLikelihood - (D/2)*np.log(len(X))
    return BIC
    
def BIC_array(HMM_array,X_i):
    BIC_total = 0
    for i in range(len(X_i)):
        model = HMM_array[i]
        X, lengths = conversion_list_of_list(X_i[i],DIMENSION)
        LogLikelihood = model.score(X,lengths)
        num_hidden_states = model.n_components
        # D counts transition matrix, emission matrix, sequences estimated (Z), covariance matrix
        D = num_hidden_states + 2*(num_hidden_states**2) + len(X)*DIMENSION 
        BIC = LogLikelihood - (D/2)*np.log(len(X))
        BIC_total+= BIC
    return BIC_total

def likelihood_array(HMM_array,X_i):
    likelihood_total = 0
    for i in range(len(X_i)):
        model = HMM_array[i]
        X, lengths = conversion_list_of_list(X_i[i],DIMENSION)
        LogLikelihood = model.score(X,lengths)
        likelihood = LogLikelihood 
        likelihood_total+= likelihood
    return likelihood_total

def plot_BIC(list_k, BIC_score):
    fig = plt.subplot(111)
    plt.plot(list_k, BIC_score, marker='o')  
    plt.xlabel('Value of K')
    plt.ylabel('Objective')
    plt.title('BIC')
    plt.show() 

# Load Data and Clean

In [26]:
df = pd.read_csv('Data/merged.txt', sep=",", na_values=['-'])
df = df.dropna()
df = df[['cdReads0','cdReads1','cdReads2','cdRPKM0','cdRPKM1','cdRPKM2']]


# Filter cdReads

In [27]:
df = df[(df['cdReads0'] >= 50) & (df['cdReads1'] >= 50) & (df['cdReads2'] >= 50)]

In [28]:
#Dataset
df_main = df[['cdReads0','cdReads1','cdReads2']]
LENGTH,DIMENSION = df_main.shape
print("Dataset size is",LENGTH)
print("Features are", DIMENSION)
print(df_main.head(5))
X = np.log2(df_main.values)
print("****************************")
print("First 5 log2 values\n",X[:5])

Dataset size is 2072
Features are 3
    cdReads0  cdReads1  cdReads2
6      113.0     180.0     292.0
19     455.0     340.0     326.0
34     353.0     397.0     499.0
57     113.0     150.0     137.0
81     126.0     175.0     202.0
****************************
First 5 log2 values
 [[ 6.82017896  7.4918531   8.18982456]
 [ 8.82972274  8.40939094  8.34872815]
 [ 8.46352437  8.6329952   8.96289601]
 [ 6.82017896  7.22881869  7.09803208]
 [ 6.97727992  7.45121111  7.65821148]]


# Arrays with HMM models for 1<=K<=25

In [29]:
HMM_K_ARRAYS = []
X_i_K_ARRAYS = []

# Check likelihood and do assignments

In [30]:
K_values = range(2,25)

In [31]:
for K in K_values:
    HMM_array = []
    X_i = []
    print("**************** K = ",K,"************************")
    for i in range(K):
        X_i.append([])

    # Sequences for initial HMM estimation
    for i in range(LENGTH):
        for j in range(K):
            if(i%K==j):
                X_i[j].append(list(X[i]))
    
    NUM_ITERATIONS = 0
    NUM_CLUSTER_PREV = {}
    NUM_CLUSTER_NOW = {}
                
    for i in range(K):
        model = hmm.GaussianHMM(n_components=3,covariance_type='spherical')
        X_temp, lengths = conversion_list_of_list(X_i[i],DIMENSION)
        model.fit(X_temp, lengths)
        HMM_array.append(model)
    
    likelihood_prev = likelihood_array(HMM_array,X_i)
    print("Likelihood for iteration",NUM_ITERATIONS,"is",likelihood_prev)
    NUM_ITERATIONS+=1
    while (True):
        # Assign all sequences to HMM models

        print("************ Check likelihood of sequence in HMM  *********")
        NUM_CLUSTER_NOW = {}
        for x in X:
            sequence = convert_values_to_list(x)
            hmm_index = likelihood_sequence(sequence, HMM_array)
            X_i[hmm_index].append(list(x))
            if (hmm_index not in NUM_CLUSTER_NOW):
                NUM_CLUSTER_NOW[hmm_index] = 1
            else:
                NUM_CLUSTER_NOW[hmm_index] += 1
        print("************ Checking likelihood done  *********")


        # Re-estimate parameters for new HMMs
        print("************ Re-estimating HMM *********")
        HMM_array = []
        for i in range(K):
            model = hmm.GaussianHMM(n_components=3,covariance_type='spherical')
            X_temp, lengths = conversion_list_of_list(X_i[i], DIMENSION)
            model.fit(X_temp, lengths)
            HMM_array.append(model)
        print("************ Re-estimation done *********")
        print("Previous assignments",NUM_CLUSTER_PREV)
        print("Current assignments",NUM_CLUSTER_NOW)
        likelihood_curr = likelihood_array(HMM_array,X_i)
        print("Likelihood for iteration",NUM_ITERATIONS,"is",likelihood_curr)
        print("*****************************************")

        # if no reassignments, then break
        if ((NUM_CLUSTER_PREV == NUM_CLUSTER_NOW)):
            HMM_K_ARRAYS.append(HMM_array)
            X_i_K_ARRAYS.append(X_i)
            break
        else:
            # initialize empty subsets of data for next iteration
            X_i = []
            for i in range(K):
                X_i.append([])

            NUM_CLUSTER_PREV = NUM_CLUSTER_NOW
            print("Num iterations is:", NUM_ITERATIONS)
            NUM_ITERATIONS += 1
            likelihood_prev = likelihood_curr
    print("**********************************************************\n\n")

**************** K =  2 ************************
Likelihood for iteration 0 is -5654.61810203
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {}
Current assignments {0: 1206, 1: 866}
Likelihood for iteration 1 is -10553.2642738
*****************************************
Num iterations is: 1
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 1206, 1: 866}
Current assignments {0: 1140, 1: 932}
Likelihood for iteration 2 is -4130.14019064
*****************************************
Num iterations is: 2
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-e

************ Re-estimation done *********
Previous assignments {1: 1058, 0: 1014}
Current assignments {1: 1059, 0: 1013}
Likelihood for iteration 22 is -3729.2479998
*****************************************
Num iterations is: 22
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 1059, 0: 1013}
Current assignments {1: 1059, 0: 1013}
Likelihood for iteration 23 is -3729.2479998
*****************************************
**********************************************************


**************** K =  3 ************************
Likelihood for iteration 0 is -5655.19163873
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {}
Current assignments {0: 1236, 2: 460, 1: 

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 796, 0: 602, 2: 674}
Current assignments {1: 796, 0: 603, 2: 673}
Likelihood for iteration 20 is -2806.17741204
*****************************************
Num iterations is: 20
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 796, 0: 603, 2: 673}
Current assignments {1: 796, 0: 606, 2: 670}
Likelihood for iteration 21 is -2806.52844545
*****************************************
Num iterations is: 21
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 796, 0: 606, 2: 670}
Current assignments {1: 797, 0: 607,

************ Re-estimation done *********
Previous assignments {0: 472, 2: 473, 3: 497, 1: 630}
Current assignments {0: 478, 2: 472, 3: 496, 1: 626}
Likelihood for iteration 15 is -2276.86445852
*****************************************
Num iterations is: 15
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 478, 2: 472, 3: 496, 1: 626}
Current assignments {0: 480, 2: 470, 3: 501, 1: 621}
Likelihood for iteration 16 is -2272.60396799
*****************************************
Num iterations is: 16
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 480, 2: 470, 3: 501, 1: 621}
Current assignments {0: 483, 2: 468, 3: 506, 1: 615}
Likelihood for iteration 17 is -2

Likelihood for iteration 35 is -2151.33824928
*****************************************
Num iterations is: 35
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 484, 2: 566, 3: 481, 1: 541}
Current assignments {0: 468, 2: 585, 3: 479, 1: 540}
Likelihood for iteration 36 is -2143.56429692
*****************************************
Num iterations is: 36
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 468, 2: 585, 3: 479, 1: 540}
Current assignments {0: 458, 2: 597, 3: 477, 1: 540}
Likelihood for iteration 37 is -2137.43874453
*****************************************
Num iterations is: 37
************ Check likelihood of sequence in HMM  *********
************

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {4: 474, 0: 347, 3: 513, 1: 470, 2: 268}
Current assignments {4: 472, 0: 348, 3: 513, 1: 471, 2: 268}
Likelihood for iteration 16 is -1935.46812116
*****************************************
Num iterations is: 16
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {4: 472, 0: 348, 3: 513, 1: 471, 2: 268}
Current assignments {4: 470, 0: 349, 3: 513, 1: 472, 2: 268}
Likelihood for iteration 17 is -1934.86826813
*****************************************
Num iterations is: 17
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignmen

Likelihood for iteration 35 is -1935.99777473
*****************************************
**********************************************************


**************** K =  6 ************************
Likelihood for iteration 0 is -5629.20583045
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {}
Current assignments {0: 303, 2: 338, 1: 512, 4: 433, 3: 158, 5: 328}
Likelihood for iteration 1 is -9343.32836661
*****************************************
Num iterations is: 1
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 303, 2: 338, 1: 512, 4: 433, 3: 158, 5: 328}
Current assignments {0: 280, 2: 380, 1: 469, 4: 410, 3: 220, 5: 313}
Likelihood for iteration 2 is -19

************ Re-estimation done *********
Previous assignments {0: 430, 2: 304, 3: 348, 1: 244, 4: 398, 5: 348}
Current assignments {0: 429, 2: 306, 3: 346, 1: 245, 4: 398, 5: 348}
Likelihood for iteration 19 is -1392.4121647
*****************************************
Num iterations is: 19
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 429, 2: 306, 3: 346, 1: 245, 4: 398, 5: 348}
Current assignments {0: 429, 2: 307, 3: 345, 1: 245, 4: 399, 5: 347}
Likelihood for iteration 20 is -1391.42063835
*****************************************
Num iterations is: 20
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 429, 2: 307, 3: 345, 1: 245, 4: 399, 5: 347}
Current

Likelihood for iteration 9 is -1018.23372458
*****************************************
Num iterations is: 9
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 298, 2: 248, 5: 312, 0: 273, 6: 300, 3: 358, 4: 283}
Current assignments {1: 297, 2: 251, 5: 313, 0: 276, 6: 296, 3: 354, 4: 285}
Likelihood for iteration 10 is -1014.7288034
*****************************************
Num iterations is: 10
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 297, 2: 251, 5: 313, 0: 276, 6: 296, 3: 354, 4: 285}
Current assignments {1: 297, 2: 252, 5: 312, 0: 277, 6: 301, 3: 352, 4: 281}
Likelihood for iteration 11 is -1014.07521412
*****************************************
N

Likelihood for iteration 27 is -1014.14698888
*****************************************
Num iterations is: 27
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 297, 2: 236, 5: 325, 0: 318, 6: 303, 3: 342, 4: 251}
Current assignments {1: 296, 2: 234, 5: 328, 0: 318, 6: 305, 3: 340, 4: 251}
Likelihood for iteration 28 is -1012.73097604
*****************************************
Num iterations is: 28
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {1: 296, 2: 234, 5: 328, 0: 318, 6: 305, 3: 340, 4: 251}
Current assignments {1: 297, 2: 231, 5: 330, 0: 320, 6: 304, 3: 339, 4: 251}
Likelihood for iteration 29 is -1012.98785237
****************************************

************ Re-estimation done *********
Previous assignments {0: 251, 2: 242, 3: 316, 7: 248, 5: 271, 4: 259, 1: 301, 6: 184}
Current assignments {0: 257, 2: 246, 3: 316, 7: 245, 5: 274, 4: 252, 1: 300, 6: 182}
Likelihood for iteration 13 is -897.726669499
*****************************************
Num iterations is: 13
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 257, 2: 246, 3: 316, 7: 245, 5: 274, 4: 252, 1: 300, 6: 182}
Current assignments {0: 256, 2: 248, 3: 317, 7: 248, 5: 275, 4: 246, 1: 300, 6: 182}
Likelihood for iteration 14 is -889.664156669
*****************************************
Num iterations is: 14
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous ass

Likelihood for iteration 7 is -658.469249254
*****************************************
Num iterations is: 7
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {5: 196, 4: 237, 6: 117, 0: 262, 3: 237, 7: 271, 1: 222, 8: 273, 2: 257}
Current assignments {5: 210, 4: 230, 6: 129, 0: 263, 3: 236, 7: 266, 1: 220, 8: 271, 2: 247}
Likelihood for iteration 8 is -644.761876677
*****************************************
Num iterations is: 8
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {5: 210, 4: 230, 6: 129, 0: 263, 3: 236, 7: 266, 1: 220, 8: 271, 2: 247}
Current assignments {5: 222, 6: 146, 0: 264, 3: 230, 7: 263, 1: 217, 8: 268, 2: 238, 4: 224}
Likelihood for iteration 9

Likelihood for iteration 24 is -490.466921617
*****************************************
Num iterations is: 24
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {5: 275, 6: 194, 2: 220, 3: 217, 7: 243, 8: 270, 0: 266, 4: 212, 1: 175}
Current assignments {5: 276, 6: 194, 2: 218, 3: 215, 7: 244, 8: 271, 0: 267, 4: 212, 1: 175}
Likelihood for iteration 25 is -489.020166589
*****************************************
Num iterations is: 25
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {5: 276, 6: 194, 2: 218, 3: 215, 7: 244, 8: 271, 0: 267, 4: 212, 1: 175}
Current assignments {5: 276, 6: 194, 2: 218, 3: 215, 7: 245, 8: 271, 0: 266, 4: 212, 1: 175}
Likelihood for iterati

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 127, 2: 192, 3: 256, 7: 202, 0: 181, 1: 225, 4: 218, 5: 225, 8: 208, 6: 238}
Current assignments {9: 128, 2: 192, 3: 257, 7: 200, 0: 181, 1: 229, 4: 213, 5: 224, 8: 209, 6: 239}
Likelihood for iteration 13 is -395.736619573
*****************************************
Num iterations is: 13
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 128, 2: 192, 3: 257, 7: 200, 0: 181, 1: 229, 4: 213, 5: 224, 8: 209, 6: 239}
Current assignments {9: 129, 2: 193, 3: 253, 7: 198, 0: 181, 1: 232, 4: 216, 5: 223, 8: 208, 6: 239}
Likelihood for iteration 14 is -396.219278547
*****************************************
Num iterations is: 14
************ Check likelihood of sequence in HMM  

Likelihood for iteration 29 is -363.755317949
*****************************************
Num iterations is: 29
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 174, 2: 204, 3: 228, 7: 183, 0: 158, 1: 231, 4: 211, 5: 229, 8: 221, 6: 233}
Current assignments {9: 177, 2: 204, 3: 227, 7: 182, 0: 158, 1: 231, 4: 211, 5: 229, 8: 220, 6: 233}
Likelihood for iteration 30 is -363.566358664
*****************************************
Num iterations is: 30
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 177, 2: 204, 3: 227, 7: 182, 0: 158, 1: 231, 4: 211, 5: 229, 8: 220, 6: 233}
Current assignments {9: 179, 2: 203, 3: 226, 7: 182, 0: 158, 1: 231, 4: 211, 5: 229, 8: 220

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 219, 2: 206, 3: 223, 7: 169, 0: 132, 1: 233, 4: 211, 5: 231, 8: 215, 6: 233}
Current assignments {9: 220, 2: 205, 3: 223, 7: 169, 0: 132, 1: 232, 4: 212, 5: 231, 8: 215, 6: 233}
Likelihood for iteration 46 is -363.347880348
*****************************************
Num iterations is: 46
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {9: 220, 2: 205, 3: 223, 7: 169, 0: 132, 1: 232, 4: 212, 5: 231, 8: 215, 6: 233}
Current assignments {9: 221, 2: 205, 3: 223, 7: 168, 0: 132, 1: 232, 4: 212, 5: 232, 8: 215, 6: 232}
Likelihood for iteration 47 is -360.102144635
*****************************************
Num iterations is: 47
************ Check likelihood of sequence in HMM  

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {2: 183, 8: 122, 4: 217, 3: 185, 10: 185, 9: 144, 7: 217, 1: 144, 0: 282, 5: 175, 6: 218}
Current assignments {2: 182, 8: 123, 4: 216, 3: 186, 10: 180, 9: 147, 7: 215, 1: 148, 0: 276, 5: 178, 6: 221}
Likelihood for iteration 11 is -2.59394311411
*****************************************
Num iterations is: 11
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {2: 182, 8: 123, 4: 216, 3: 186, 10: 180, 9: 147, 7: 215, 1: 148, 0: 276, 5: 178, 6: 221}
Current assignments {2: 180, 8: 125, 4: 212, 3: 186, 10: 180, 9: 147, 7: 212, 1: 154, 0: 272, 5: 180, 6: 224}
Likelihood for iteration 12 is 25.3735439883
*****************************************
Num iterations is: 12
************ Ch

Likelihood for iteration 3 is -570.352096152
*****************************************
Num iterations is: 3
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 129, 8: 271, 4: 258, 10: 204, 5: 170, 7: 196, 0: 286, 1: 152, 9: 209, 6: 62, 11: 105, 2: 30}
Current assignments {3: 170, 8: 244, 6: 77, 4: 265, 10: 207, 5: 168, 7: 184, 0: 237, 1: 150, 9: 214, 11: 125, 2: 31}
Likelihood for iteration 4 is -364.506887051
*****************************************
Num iterations is: 4
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 170, 8: 244, 6: 77, 4: 265, 10: 207, 5: 168, 7: 184, 0: 237, 1: 150, 9: 214, 11: 125, 2: 31}
Current assignments {3: 175, 8: 227, 6: 87, 4: 

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 168, 6: 119, 8: 177, 4: 230, 10: 222, 5: 186, 1: 189, 7: 184, 0: 160, 9: 182, 2: 70, 11: 185}
Current assignments {3: 168, 6: 121, 8: 178, 4: 227, 10: 223, 5: 185, 1: 189, 7: 185, 0: 159, 9: 182, 2: 70, 11: 185}
Likelihood for iteration 19 is 8.25391680792
*****************************************
Num iterations is: 19
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 168, 6: 121, 8: 178, 4: 227, 10: 223, 5: 185, 1: 189, 7: 185, 0: 159, 9: 182, 2: 70, 11: 185}
Current assignments {3: 168, 6: 123, 8: 176, 4: 225, 10: 224, 5: 184, 1: 189, 7: 188, 0: 158, 9: 182, 2: 70, 11: 185}
Likelihood for iteration 20 is 13.3354363669
*****************************************
Num it

************ Re-estimation done *********
Previous assignments {11: 168, 5: 107, 8: 116, 9: 169, 2: 191, 3: 200, 7: 180, 4: 196, 1: 94, 6: 204, 0: 93, 12: 214, 10: 140}
Current assignments {11: 170, 5: 105, 8: 114, 9: 173, 2: 189, 3: 189, 7: 177, 4: 198, 1: 101, 6: 194, 0: 108, 12: 217, 10: 137}
Likelihood for iteration 8 is 167.649863219
*****************************************
Num iterations is: 8
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {11: 170, 5: 105, 8: 114, 9: 173, 2: 189, 3: 189, 7: 177, 4: 198, 1: 101, 6: 194, 0: 108, 12: 217, 10: 137}
Current assignments {11: 171, 5: 105, 8: 115, 9: 176, 2: 187, 3: 176, 7: 179, 4: 198, 1: 105, 6: 187, 0: 124, 12: 217, 10: 132}
Likelihood for iteration 9 is 198.823921852
*****************************************
Num iterations is: 9
************ Check likelihood of sequenc

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {11: 171, 5: 105, 8: 114, 9: 182, 2: 194, 3: 143, 0: 177, 4: 185, 1: 123, 10: 136, 6: 179, 12: 206, 7: 157}
Current assignments {11: 171, 5: 105, 8: 114, 9: 183, 2: 194, 3: 142, 0: 177, 4: 183, 1: 123, 10: 135, 6: 180, 12: 206, 7: 159}
Likelihood for iteration 23 is 323.233126614
*****************************************
Num iterations is: 23
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {11: 171, 5: 105, 8: 114, 9: 183, 2: 194, 3: 142, 0: 177, 4: 183, 1: 123, 10: 135, 6: 180, 12: 206, 7: 159}
Current assignments {11: 171, 5: 105, 8: 114, 9: 186, 2: 194, 3: 141, 0: 178, 4: 184, 1: 121, 10: 133, 6: 183, 12: 203, 7: 159}
Likelihood for iteration 24 is 325.594538766
********

Likelihood for iteration 7 is 246.146259294
*****************************************
Num iterations is: 7
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {7: 117, 2: 153, 12: 157, 4: 114, 6: 168, 0: 117, 13: 191, 11: 147, 10: 249, 3: 203, 5: 134, 8: 139, 1: 117, 9: 66}
Current assignments {7: 118, 2: 157, 12: 154, 4: 115, 6: 161, 0: 119, 13: 191, 11: 144, 10: 244, 3: 208, 5: 136, 8: 141, 1: 118, 9: 66}
Likelihood for iteration 8 is 259.926034766
*****************************************
Num iterations is: 8
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {7: 118, 2: 157, 12: 154, 4: 115, 6: 161, 0: 119, 13: 191, 11: 144, 10: 244, 3: 208, 5: 136, 8: 141, 1: 118,

************ Re-estimation done *********
Previous assignments {7: 115, 2: 183, 12: 132, 4: 99, 6: 176, 0: 140, 13: 178, 11: 167, 10: 201, 8: 120, 3: 209, 5: 161, 1: 120, 9: 71}
Current assignments {7: 114, 2: 185, 12: 132, 4: 100, 6: 177, 0: 140, 13: 178, 11: 167, 10: 201, 8: 119, 3: 209, 5: 161, 1: 118, 9: 71}
Likelihood for iteration 22 is 355.126813119
*****************************************
Num iterations is: 22
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {7: 114, 2: 185, 12: 132, 4: 100, 6: 177, 0: 140, 13: 178, 11: 167, 10: 201, 8: 119, 3: 209, 5: 161, 1: 118, 9: 71}
Current assignments {7: 114, 2: 186, 12: 132, 4: 100, 6: 179, 0: 140, 13: 178, 11: 167, 10: 201, 8: 117, 3: 209, 5: 161, 1: 117, 9: 71}
Likelihood for iteration 23 is 356.221048739
*****************************************
Num iterations is: 23
***

Likelihood for iteration 10 is 610.035710634
*****************************************
Num iterations is: 10
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {11: 101, 4: 116, 13: 134, 0: 104, 10: 125, 1: 157, 6: 198, 5: 122, 3: 156, 2: 194, 8: 129, 9: 157, 7: 148, 12: 150, 14: 81}
Current assignments {11: 103, 4: 115, 13: 134, 0: 104, 10: 126, 1: 155, 6: 196, 5: 121, 3: 154, 2: 193, 8: 131, 9: 160, 7: 148, 12: 150, 14: 82}
Likelihood for iteration 11 is 615.824599721
*****************************************
Num iterations is: 11
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {11: 103, 4: 115, 13: 134, 0: 104, 10: 126, 1: 155, 6: 196, 5: 121, 3: 154, 2: 193, 8:

Likelihood for iteration 24 is 700.930194068
*****************************************
Num iterations is: 24
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 160, 4: 116, 13: 131, 11: 111, 0: 100, 10: 114, 9: 172, 6: 187, 5: 135, 2: 190, 1: 150, 8: 148, 7: 126, 12: 141, 14: 91}
Current assignments {3: 160, 4: 116, 13: 131, 11: 113, 0: 101, 10: 114, 9: 172, 6: 187, 5: 135, 2: 190, 1: 150, 8: 148, 7: 125, 12: 141, 14: 89}
Likelihood for iteration 25 is 701.503962883
*****************************************
Num iterations is: 25
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {3: 160, 4: 116, 13: 131, 11: 113, 0: 101, 10: 114, 9: 172, 6: 187, 5: 135, 2: 190, 1:

Likelihood for iteration 9 is 618.909868753
*****************************************
Num iterations is: 9
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 88, 13: 80, 11: 164, 3: 137, 10: 95, 7: 157, 2: 172, 8: 80, 1: 109, 4: 282, 5: 111, 14: 63, 12: 103, 15: 184, 9: 128, 6: 119}
Current assignments {0: 89, 13: 79, 11: 155, 3: 139, 10: 103, 7: 148, 2: 182, 8: 80, 1: 115, 4: 263, 5: 113, 14: 63, 12: 103, 15: 194, 9: 127, 6: 119}
Likelihood for iteration 10 is 645.594131029
*****************************************
Num iterations is: 10
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 89, 13: 79, 11: 155, 3: 139, 10: 103, 7: 148, 2: 182, 8: 80, 1: 115, 4: 2

Likelihood for iteration 23 is 747.649559219
*****************************************
Num iterations is: 23
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 97, 13: 79, 11: 129, 3: 129, 10: 138, 1: 131, 2: 185, 4: 212, 7: 141, 5: 124, 14: 63, 12: 97, 15: 177, 9: 162, 8: 80, 6: 128}
Current assignments {0: 97, 13: 79, 11: 129, 3: 129, 10: 138, 1: 131, 2: 188, 4: 212, 7: 139, 5: 124, 14: 63, 12: 97, 15: 176, 9: 162, 8: 80, 6: 128}
Likelihood for iteration 24 is 749.369733161
*****************************************
Num iterations is: 24
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {0: 97, 13: 79, 11: 129, 3: 129, 10: 138, 1: 131, 2: 188, 4: 212, 7: 139, 5:

Likelihood for iteration 10 is 679.717398003
*****************************************
Num iterations is: 10
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {10: 163, 13: 98, 1: 66, 14: 156, 5: 128, 0: 192, 12: 102, 2: 133, 15: 135, 6: 158, 4: 153, 8: 134, 16: 118, 11: 41, 9: 88, 7: 133, 3: 74}
Current assignments {10: 165, 13: 97, 1: 68, 14: 157, 5: 125, 0: 191, 12: 102, 2: 143, 15: 120, 6: 154, 4: 153, 8: 140, 16: 121, 11: 43, 9: 89, 7: 130, 3: 74}
Likelihood for iteration 11 is 642.946599882
*****************************************
Num iterations is: 11
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {10: 165, 13: 97, 1: 68, 14: 157, 5: 125, 0: 191, 12: 102,

************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {10: 174, 1: 69, 14: 153, 5: 100, 0: 171, 12: 108, 2: 150, 15: 103, 6: 151, 16: 128, 4: 154, 8: 140, 11: 65, 9: 87, 13: 94, 7: 139, 3: 86}
Current assignments {10: 174, 1: 69, 14: 154, 5: 100, 0: 171, 12: 108, 2: 150, 15: 103, 6: 152, 16: 128, 4: 153, 8: 140, 11: 65, 9: 87, 13: 94, 7: 140, 3: 84}
Likelihood for iteration 24 is 912.246761813
*****************************************
Num iterations is: 24
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {10: 174, 1: 69, 14: 154, 5: 100, 0: 171, 12: 108, 2: 150, 15: 103, 6: 152, 16: 128, 4: 153, 8: 140, 11: 65, 9: 87, 13: 94, 7: 140, 3: 84}
Current assignments {10: 173, 1: 69, 14: 154, 5: 100, 0: 172, 12: 108, 2: 150, 15: 103, 

Likelihood for iteration 11 is 654.506672187
*****************************************
Num iterations is: 11
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 116, 6: 101, 2: 95, 14: 159, 10: 173, 5: 126, 16: 168, 0: 175, 3: 103, 7: 147, 4: 221, 17: 179, 1: 113, 11: 64, 8: 2, 9: 127, 13: 2, 15: 1}
Current assignments {12: 112, 6: 101, 2: 96, 14: 160, 10: 170, 5: 126, 16: 169, 0: 175, 3: 103, 7: 145, 4: 222, 17: 178, 1: 115, 11: 69, 8: 2, 9: 126, 13: 2, 15: 1}
Likelihood for iteration 12 is 656.927079519
*****************************************
Num iterations is: 12
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 112, 6: 101, 2: 96, 14: 160, 10: 170, 5: 

Likelihood for iteration 24 is 698.588840705
*****************************************
Num iterations is: 24
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 145, 10: 166, 5: 122, 16: 161, 0: 171, 9: 120, 7: 132, 4: 222, 17: 175, 1: 114, 11: 92, 8: 2, 3: 110, 13: 2, 15: 1}
Current assignments {12: 122, 6: 108, 2: 107, 14: 145, 10: 167, 5: 122, 16: 161, 0: 171, 9: 120, 7: 132, 4: 222, 17: 175, 1: 114, 11: 92, 8: 2, 3: 109, 13: 2, 15: 1}
Likelihood for iteration 25 is 698.36929681
*****************************************
Num iterations is: 25
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 145, 10: 167, 5

Likelihood for iteration 37 is 700.454720226
*****************************************
Num iterations is: 37
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 147, 10: 168, 5: 126, 16: 158, 0: 153, 9: 123, 7: 131, 4: 217, 17: 181, 1: 114, 11: 98, 8: 2, 3: 114, 13: 2, 15: 1}
Current assignments {12: 122, 6: 108, 2: 107, 14: 147, 10: 169, 5: 127, 16: 160, 0: 153, 9: 123, 7: 131, 4: 213, 17: 181, 1: 113, 11: 99, 8: 2, 3: 114, 13: 2, 15: 1}
Likelihood for iteration 38 is 701.249859528
*****************************************
Num iterations is: 38
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 147, 10: 169, 

Likelihood for iteration 50 is 713.542720655
*****************************************
Num iterations is: 50
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 157, 10: 164, 5: 124, 16: 165, 0: 147, 9: 127, 7: 137, 4: 182, 17: 181, 1: 115, 11: 103, 8: 2, 3: 128, 13: 2, 15: 1}
Current assignments {12: 122, 6: 108, 2: 107, 14: 160, 10: 164, 5: 124, 16: 165, 0: 145, 9: 128, 7: 138, 4: 182, 17: 180, 1: 112, 11: 103, 8: 2, 3: 129, 13: 2, 15: 1}
Likelihood for iteration 51 is 715.556659297
*****************************************
Num iterations is: 51
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 160, 10: 164

Likelihood for iteration 63 is 718.972876866
*****************************************
Num iterations is: 63
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 163, 10: 165, 5: 128, 16: 167, 0: 147, 9: 129, 7: 143, 4: 176, 17: 169, 1: 112, 11: 103, 8: 2, 3: 128, 13: 2, 15: 1}
Current assignments {12: 122, 6: 108, 2: 107, 14: 163, 10: 165, 5: 128, 16: 168, 0: 148, 9: 129, 7: 143, 4: 176, 17: 168, 1: 112, 11: 103, 8: 2, 3: 127, 13: 2, 15: 1}
Likelihood for iteration 64 is 718.664100763
*****************************************
Num iterations is: 64
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 163, 10: 165

Likelihood for iteration 76 is 763.211330187
*****************************************
Num iterations is: 76
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 164, 10: 165, 5: 117, 16: 178, 0: 147, 9: 129, 7: 120, 4: 182, 17: 183, 1: 112, 11: 103, 8: 2, 3: 130, 13: 2, 15: 1}
Current assignments {12: 122, 6: 108, 2: 107, 14: 164, 10: 165, 5: 116, 16: 179, 0: 147, 9: 129, 7: 119, 4: 185, 17: 183, 1: 112, 11: 103, 8: 2, 3: 128, 13: 2, 15: 1}
Likelihood for iteration 77 is 767.440218567
*****************************************
Num iterations is: 77
************ Check likelihood of sequence in HMM  *********
************ Checking likelihood done  *********
************ Re-estimating HMM *********
************ Re-estimation done *********
Previous assignments {12: 122, 6: 108, 2: 107, 14: 164, 10: 165

IndexError: list index out of range

In [33]:
import dill
dill.dump_session('notebook_env.db')

# Calculate BIC for these assignments

In [None]:
print(BIC_array(HMM_array,X_i))

In [None]:
print(BIC_array(HMM_array,X_i))

# Noise Cluster

In [None]:
# Get list of lists to fit
Z, lengths = conversion_list_of_list(X,DIMENSION)

In [None]:
# Noise cluster 

model_noise = hmm.GaussianHMM(n_components=3,covariance_type='spherical')
model_noise.fit(Z, lengths)

In [None]:
HMM_model_stats(model_noise)

# Generate subsets of data for K HMMs

In [8]:
K = 3
HMM_array = []
X_i = []

for i in range(K):
    X_i.append([])

# Sequences for initial HMM estimation
for i in range(LENGTH):
    for j in range(K):
        if(i%K==j):
            X_i[j].append(list(X[i]))

In [None]:
for i in range(K):
    model = hmm.GaussianHMM(n_components=3,covariance_type='spherical')
    X_temp, lengths = conversion_list_of_list(X_i[i],DIMENSION)
    model.fit(X_temp, lengths)
    HMM_array.append(model)

In [None]:
likelihood_prev = likelihood_array(HMM_array,X_i)
print("Likelihood for iteration",0,"is",likelihood_prev)

## Statistics for K HMMs

In [None]:
for i in range(K):
    print("Statistics for HMM Model", i)
    HMM_model_stats(HMM_array[i])
    print('\n\n')

In [None]:
NUM_ITERATIONS = 1
NUM_CLUSTER_PREV = {}
NUM_CLUSTER_NOW = {}

In [None]:
#initialize empty subsets of data
X_i = []

for i in range(K):
    X_i.append([])