In [188]:
import sys, os
import pandas as pd
import numpy as np
import ast
from hmmlearn import hmm
from sklearn.model_selection import ParameterGrid
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

import preprocess

# 1. Data Load

In [189]:
orig_df = pd.read_csv('./data/exhibition_behavior_preprocessed.csv', delimiter=',', index_col=False)

In [190]:
orig_df

Unnamed: 0,uid,date,filename,start,duration,A/C,behavior,code,M/F,appearance
0,0,11월 19일,01_20221119085958_part2,56240,2720,Child,물리적거리,Approach,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
1,0,11월 19일,01_20221119085958_part2,59360,2960,Child,상호작용시도,,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
2,0,11월 19일,01_20221119085958_part2,71420,5520,Child,물리적거리,Approach,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
3,0,11월 19일,01_20221119085958_part2,77400,6320,Child,상호작용시도,,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
4,1,11월 19일,01_20221119085958_part2,48240,9240,Child,물리적거리,Approach,Male,"검정색 점퍼, 청바지, 검정/흰색 운동화"
...,...,...,...,...,...,...,...,...,...,...
2682,371,12월 4일,01_20221204165959_part1,2567280,17280,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"
2683,371,12월 4일,01_20221204165959_part1,2893760,14160,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"
2684,372,12월 4일,01_20221204165959_part1,2567280,18080,Adult,물리적거리,Pass,Male,"검은색 롱패딩, 검은색 바지"
2685,373,12월 4일,01_20221204165959_part2,15760,13840,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"


Remove 'Pass' tag

In [191]:
orig_df = orig_df.drop(orig_df[orig_df['code'] == 'Pass'].index)

In [192]:
orig_df = orig_df.reset_index(drop=True)

# 2. Indexing
Avoid, Follow를 제거하지 않고 Approach와 결합하여 indexing

In [193]:
orig_df = preprocess.index_with_start(orig_df)
orig_df = preprocess.reindex(orig_df, ['uid', 'pid', 'date', 'filename', 'start', 'duration', 'A/C', 'M/F', 'behavior', 'code', 'appearance'])

In [194]:
orig_df

Unnamed: 0,uid,pid,date,filename,start,duration,A/C,M/F,behavior,code,appearance
0,0,0,11월 19일,01_20221119085958_part2,56240,2720,Child,Female,물리적거리,Approach,"포니테일, 아이보리색 점퍼, 검정색 바지"
1,0,0,11월 19일,01_20221119085958_part2,59360,2960,Child,Female,상호작용시도,,"포니테일, 아이보리색 점퍼, 검정색 바지"
2,0,1,11월 19일,01_20221119085958_part2,71420,5520,Child,Female,물리적거리,Approach,"포니테일, 아이보리색 점퍼, 검정색 바지"
3,0,1,11월 19일,01_20221119085958_part2,77400,6320,Child,Female,상호작용시도,,"포니테일, 아이보리색 점퍼, 검정색 바지"
4,1,2,11월 19일,01_20221119085958_part2,48240,9240,Child,Male,물리적거리,Approach,"검정색 점퍼, 청바지, 검정/흰색 운동화"
...,...,...,...,...,...,...,...,...,...,...,...
2362,367,563,12월 4일,01_20221204160000_part1,1746960,11680,Child,Male,상호작용시도,,"검은색 롱패딩, 검은색 바지"
2363,367,563,12월 4일,01_20221204160000_part1,1758640,4800,Child,Male,물리적거리,Approach,"검은색 롱패딩, 검은색 바지"
2364,367,563,12월 4일,01_20221204160000_part1,1763440,16480,Child,Male,상호작용시도,,"검은색 롱패딩, 검은색 바지"
2365,369,564,12월 4일,01_20221204165959_part1,2730640,12640,Child,Male,물리적거리,Approach,"갈색 후리스, 검은색 바지"


# 3. Make Data

In [195]:
def make_data(df):
    df = df.groupby('pid').agg({'code': list, 'start': list, 'duration': list, 'A/C': 'first', 'M/F': 'first', 'appearance': list})
    df['appearance'] = [set(data) for data in df['appearance']]
#     df = df.drop(df[df['code'].apply(lambda x: len(x) <= 1)].index)
    df.reset_index(inplace=True, drop=True)
    return df

In [196]:
df = orig_df[['pid', 'code', 'start', 'duration', 'A/C', 'M/F', 'appearance']]

In [197]:
df = make_data(df)

In [198]:
df

Unnamed: 0,code,start,duration,A/C,M/F,appearance
0,"[Approach, None]","[56240, 59360]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
1,"[Approach, None]","[71420, 77400]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
2,"[Approach, None, Approach, None]","[48240, 56640, 71200, 77400]","[9240, 15600, 5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
3,"[Approach, None]","[238160, 242320]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}"
4,"[Approach, None, Approach, None]","[47920, 56480, 71200, 77200]","[8640, 16480, 6000, 6640]",Adult,Female,"{검정 핸드백, 흰색 점퍼, 아이보리색 바지}"
...,...,...,...,...,...,...
560,"[None, Approach, None, Approach, None]","[1716560, 1729280, 1735360, 1768880, 1771040]","[12720, 6080, 33520, 2160, 10000]",Adult,Female,"{검은색 패딩, 회색 상의, 회색 바지}"
561,"[Approach, None]","[1573200, 1582960]","[9760, 135360]",Child,Female,"{회색 롱패딩, 흰색 바지}"
562,"[Approach, None, Approach, None, Approach, None]","[1727920, 1738320, 1744880, 1750160, 1755360, ...","[10400, 6560, 5280, 5200, 4560, 20800]",Child,Female,"{회색 롱패딩, 흰색 바지}"
563,"[Approach, None, Gesture, None, Approach, None...","[1566960, 1574640, 1579840, 1582640, 1585040, ...","[7680, 5200, 2800, 2400, 5360, 45360, 1840, 36...",Child,Male,"{검은색 롱패딩, 검은색 바지}"


In [199]:
df.reset_index(inplace=True, drop=True)
df.index

RangeIndex(start=0, stop=565, step=1)

## 3-1) 이상치 제거

In [200]:
print("1. code list가 상호작용시도 code로 시작하고 list 내에 approach가 없는 code list 제거")
count = 0

for i in df.index:
    try:
        if (df['code'][i][0] == 'None' or df['code'][i][0] == 'Touch' or df['code'][i][0] == 'Gesture'):
            if 'Approach' not in df['code'][i]:
                print(df['code'][i])
                df.drop(i, inplace=True)
                count += 1
    except: 
        break
        
print("drop한 데이터 개수:", count)

1. code list가 상호작용시도 code로 시작하고 list 내에 approach가 없는 code list 제거
['None']
['None', 'Touch', 'None', 'Touch']
['Gesture']
['None', 'Avoid']
['Touch', 'Avoid']
['None']
['Gesture']
['Touch', 'None']
['None']
['Gesture', 'None', 'Touch', 'None']
['Touch']
['Touch']
['Touch', 'None']
['Touch']
['Touch']
['Touch']
['Touch']
['Touch']
['Touch', 'Touch']
['None']
['None']
['None']
['None']
['None', 'Touch', 'None', 'Touch', 'None']
['None']
['Gesture', 'None', 'Gesture']
['Gesture']
['None']
['None']
['None']
['Gesture']
['None']
['None']
['None']
['None']
['None']
['None']
['None']
['None', 'Avoid']
['None']
['None']
['None', 'Touch', 'None', 'Touch', 'None']
['None']
['None']
drop한 데이터 개수: 44


In [201]:
print("2. code list가 상호작용시도 코드로 시작하지만 Approach 코드가 있는 경우 Approach 이전의 상호작용시도 code들을 제거")
index = 0
count = 0

for i in df.index:
    try:
        index = 0
        if (df['code'][i][0] == 'None' or df['code'][i][0] == 'Touch' or df['code'][i][0] == 'Gesture') \
                and ('Approach' in df['code'][i]):
            for code in df['code'][i]:
                if code != 'Approach' and code != 'Avoid' and code != 'Follow':
                    index += 1
                else:
                    break
            if index != 0:
                print("before:", df['code'][i])
#                 print("before:", df['start'][i])
#                 print("before:", df['duration'][i])
                
                print("after: ", df['code'][i][index:])
#                 print("after: ", df['start'][i][index:])
#                 print("after: ", df['duration'][i][index:])
                print("---------------------------------------")
                
                df['code'][i] = df['code'][i][index:]
                df['start'][i] = df['start'][i][index:]
                df['duration'][i] = df['duration'][i][index:]
                
                count += 1
    except: 
        break

print("값을 바꾼 데이터 개수:", count)

2. code list가 상호작용시도 코드로 시작하지만 Approach 코드가 있는 경우 Approach 이전의 상호작용시도 code들을 제거
before: ['Touch', 'Approach']
after:  ['Approach']
---------------------------------------
before: ['Gesture', 'Avoid', 'Approach', 'Touch', 'None', 'Touch', 'Avoid', 'Follow', 'Approach', 'Approach', 'Touch', 'None', 'Touch', 'None', 'Touch', 'None']
after:  ['Avoid', 'Approach', 'Touch', 'None', 'Touch', 'Avoid', 'Follow', 'Approach', 'Approach', 'Touch', 'None', 'Touch', 'None', 'Touch', 'None']
---------------------------------------
before: ['None', 'Touch', 'None', 'Touch', 'None', 'Approach', 'None', 'Touch', 'None', 'Touch']
after:  ['Approach', 'None', 'Touch', 'None', 'Touch']
---------------------------------------
before: ['Touch', 'None', 'Approach', 'None', 'Touch', 'None', 'Approach', 'None']
after:  ['Approach', 'None', 'Touch', 'None', 'Approach', 'None']
---------------------------------------
before: ['None', 'Approach', 'Touch', 'None', 'Gesture', 'Touch', 'None', 'Gesture', 'Touch', 'No

In [202]:
print("3. code list에 Avoid 또는 Follow와 상호작용시도 code로만 이루어진 code list 제거")
count = 0

for i in df.index:
    try:
        if (len(df['code'][i]) > 1) and (df['code'][i][0] == 'Avoid' or df['code'][i][0] == 'Follow'):
            if 'Approach' not in df['code'][i]:
                print(df['code'][i])
                df.drop(i, inplace=True)
                count += 1
    except:
        break

print("drop한 데이터 개수:", count)

3. code list에 Avoid 또는 Follow와 상호작용시도 code로만 이루어진 code list 제거
['Avoid', 'None']
['Follow', 'None']
['Avoid', 'None']
['Avoid', 'None', 'Touch']
['Follow', 'Avoid', 'Follow']
['Avoid', 'None']
['Follow', 'Avoid']
['Avoid', 'Follow']
['Avoid', 'Follow']
['Avoid', 'Follow']
['Follow', 'Avoid']
drop한 데이터 개수: 11


In [203]:
print("4. code list의 길이가 1 이하인 code list를 제거")
count = 0

for i in df.index:
    try:
        if len(df['code'][i]) <= 1: 
#             or 'Approach' not in df['code'][i]) \
#                 or (len(df['code'][i]) <= 1 and 'Approach' in df['code'][i]):
            print(df['code'][i])
            df.drop(i, inplace=True)
            count += 1
    except:
        break

print("drop한 데이터 개수:", count)
df.reset_index(inplace=True, drop=True)

4. code list의 길이가 1 이하인 code list를 제거
['Approach']
['Follow']
['Follow']
['Follow']
['Follow']
['Approach']
['Follow']
['Approach']
['Follow']
['Approach']
['Approach']
['Approach']
['Approach']
['Approach']
['Approach']
['Approach']
['Approach']
['Approach']
['Avoid']
['Follow']
['Approach']
['Follow']
['Approach']
['Approach']
['Approach']
drop한 데이터 개수: 25


In [204]:
df.reset_index(inplace=True, drop=True)
df.index

RangeIndex(start=0, stop=485, step=1)

In [205]:
df

Unnamed: 0,code,start,duration,A/C,M/F,appearance
0,"[Approach, None]","[56240, 59360]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
1,"[Approach, None]","[71420, 77400]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
2,"[Approach, None, Approach, None]","[48240, 56640, 71200, 77400]","[9240, 15600, 5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
3,"[Approach, None]","[238160, 242320]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}"
4,"[Approach, None, Approach, None]","[47920, 56480, 71200, 77200]","[8640, 16480, 6000, 6640]",Adult,Female,"{검정 핸드백, 흰색 점퍼, 아이보리색 바지}"
...,...,...,...,...,...,...
480,"[Approach, None, Approach, None]","[1729280, 1735360, 1768880, 1771040]","[6080, 33520, 2160, 10000]",Adult,Female,"{검은색 패딩, 회색 상의, 회색 바지}"
481,"[Approach, None]","[1573200, 1582960]","[9760, 135360]",Child,Female,"{회색 롱패딩, 흰색 바지}"
482,"[Approach, None, Approach, None, Approach, None]","[1727920, 1738320, 1744880, 1750160, 1755360, ...","[10400, 6560, 5280, 5200, 4560, 20800]",Child,Female,"{회색 롱패딩, 흰색 바지}"
483,"[Approach, None, Gesture, None, Approach, None...","[1566960, 1574640, 1579840, 1582640, 1585040, ...","[7680, 5200, 2800, 2400, 5360, 45360, 1840, 36...",Child,Male,"{검은색 롱패딩, 검은색 바지}"


In [130]:
df.to_csv('./data/behavior_hmm_data.csv')

## 3-2) One-Hot Encoding (code, duration)

In [54]:
def code_one_hot_encoding(df):
    # 행동 코드 리스트
    actions = ['Avoid', 'Follow', 'Approach', 'None', 'Touch', 'Gesture']

    # 각각의 sublist를 one-hot encoding하여 다차원 리스트로 생성
    one_hot_data = []
    for sublist in df['code']:
        one_hot_sublist = np.zeros((len(sublist), len(actions)))
        for i, code in enumerate(sublist):
            index = actions.index(code)
            one_hot_sublist[i][index] = 1
        one_hot_data.append(one_hot_sublist)
    
    df['encoded_code'] = one_hot_data
    df['encoded_code'] = df[['encoded_code']].apply(lambda x: [np.array(item).astype(int) for item in x])

    return df

In [55]:
def duration_one_hot_encoding(df):
    # 행동 코드 리스트
    actions = ['Avoid', 'Follow', 'Approach', 'None', 'Touch', 'Gesture']

    # 각각의 sublist를 one-hot encoding하여 다차원 리스트로 생성
    one_hot_data = []
    for i, sublist in enumerate(df['code']):
        one_hot_sublist = np.zeros((len(sublist), len(actions)))
        for j, code in enumerate(sublist):
            index = actions.index(code)
            one_hot_sublist[j][index] = df['duration'][i][j]
        one_hot_data.append(one_hot_sublist)
    
    df['encoded_duration'] = one_hot_data
    df['encoded_duration'] = df[['encoded_duration']].apply(lambda x: [np.array(item).astype(int) for item in x])

    return df

In [56]:
df = code_one_hot_encoding(df)
df = duration_one_hot_encoding(df)

In [57]:
df

Unnamed: 0,code,start,duration,A/C,M/F,appearance,encoded_code,encoded_duration
0,"[Approach, None]","[56240, 59360]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 2720, 0, 0, 0], [0, 0, 0, 2960, 0, 0]]"
1,"[Approach, None]","[71420, 77400]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 5520, 0, 0, 0], [0, 0, 0, 6320, 0, 0]]"
2,"[Approach, None, Approach, None]","[48240, 56640, 71200, 77400]","[9240, 15600, 5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9240, 0, 0, 0], [0, 0, 0, 15600, 0, 0]..."
3,"[Approach, None]","[238160, 242320]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 4160, 0, 0, 0], [0, 0, 0, 7760, 0, 0]]"
4,"[Approach, None, Approach, None]","[47920, 56480, 71200, 77200]","[8640, 16480, 6000, 6640]",Adult,Female,"{검정 핸드백, 흰색 점퍼, 아이보리색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 8640, 0, 0, 0], [0, 0, 0, 16480, 0, 0]..."
...,...,...,...,...,...,...,...,...
480,"[Approach, None, Approach, None]","[1729280, 1735360, 1768880, 1771040]","[6080, 33520, 2160, 10000]",Adult,Female,"{검은색 패딩, 회색 상의, 회색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 6080, 0, 0, 0], [0, 0, 0, 33520, 0, 0]..."
481,"[Approach, None]","[1573200, 1582960]","[9760, 135360]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 9760, 0, 0, 0], [0, 0, 0, 135360, 0, 0]]"
482,"[Approach, None, Approach, None, Approach, None]","[1727920, 1738320, 1744880, 1750160, 1755360, ...","[10400, 6560, 5280, 5200, 4560, 20800]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 10400, 0, 0, 0], [0, 0, 0, 6560, 0, 0]..."
483,"[Approach, None, Gesture, None, Approach, None...","[1566960, 1574640, 1579840, 1582640, 1585040, ...","[7680, 5200, 2800, 2400, 5360, 45360, 1840, 36...",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 7680, 0, 0, 0], [0, 0, 0, 5200, 0, 0],..."


## 3-3) Add Zero Padding
sequence가 가장 긴 data에 맞춰 zero padding을 더함

In [58]:
def padding_encoding_data(df, col_name, num):
    max_length = max(map(len, df[col_name]))  # 가장 긴 sequence의 길이 구하기
    padded_data = []
    
    for data in df[col_name]:
        if max_length - len(data) == 0:
            padded_data.append(data)
        else:
            # 가장 긴 sequence 길이를 기준으로 padding 적용
            padding_list = [[0 for i in range(num)]] * (max_length - len(data))
            result = np.concatenate((data, padding_list), axis=0)
            padded_data.append(result)
        
    df[col_name] = padded_data

    return df

In [59]:
df = padding_encoding_data(df, "encoded_code", 6)
df = padding_encoding_data(df, "encoded_duration", 6)

In [60]:
df

Unnamed: 0,code,start,duration,A/C,M/F,appearance,encoded_code,encoded_duration
0,"[Approach, None]","[56240, 59360]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 2720, 0, 0, 0], [0, 0, 0, 2960, 0, 0],..."
1,"[Approach, None]","[71420, 77400]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5520, 0, 0, 0], [0, 0, 0, 6320, 0, 0],..."
2,"[Approach, None, Approach, None]","[48240, 56640, 71200, 77400]","[9240, 15600, 5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9240, 0, 0, 0], [0, 0, 0, 15600, 0, 0]..."
3,"[Approach, None]","[238160, 242320]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 4160, 0, 0, 0], [0, 0, 0, 7760, 0, 0],..."
4,"[Approach, None, Approach, None]","[47920, 56480, 71200, 77200]","[8640, 16480, 6000, 6640]",Adult,Female,"{검정 핸드백, 흰색 점퍼, 아이보리색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 8640, 0, 0, 0], [0, 0, 0, 16480, 0, 0]..."
...,...,...,...,...,...,...,...,...
480,"[Approach, None, Approach, None]","[1729280, 1735360, 1768880, 1771040]","[6080, 33520, 2160, 10000]",Adult,Female,"{검은색 패딩, 회색 상의, 회색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 6080, 0, 0, 0], [0, 0, 0, 33520, 0, 0]..."
481,"[Approach, None]","[1573200, 1582960]","[9760, 135360]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9760, 0, 0, 0], [0, 0, 0, 135360, 0, 0..."
482,"[Approach, None, Approach, None, Approach, None]","[1727920, 1738320, 1744880, 1750160, 1755360, ...","[10400, 6560, 5280, 5200, 4560, 20800]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 10400, 0, 0, 0], [0, 0, 0, 6560, 0, 0]..."
483,"[Approach, None, Gesture, None, Approach, None...","[1566960, 1574640, 1579840, 1582640, 1585040, ...","[7680, 5200, 2800, 2400, 5360, 45360, 1840, 36...",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 7680, 0, 0, 0], [0, 0, 0, 5200, 0, 0],..."


## 3-4) Make Time Series Data
code list와 duration list를 병합하여 시계열 데이터로 변환

In [61]:
def make_data_for_hmm(df):
    time_series_data = []

    for encoded_code, encoded_duration in zip(df['encoded_code'], df['encoded_duration']):
        encoded_code = np.squeeze(encoded_code)
        encoded_duration = np.squeeze(encoded_duration)

        # encoded_code와 encoded_duration을 수평으로 결합합니다.
        time_series_data.append(np.hstack([encoded_code, encoded_duration]))

    df['time_series_data'] = time_series_data
    
    return df

In [62]:
df = make_data_for_hmm(df)

In [63]:
df

Unnamed: 0,code,start,duration,A/C,M/F,appearance,encoded_code,encoded_duration,time_series_data
0,"[Approach, None]","[56240, 59360]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 2720, 0, 0, 0], [0, 0, 0, 2960, 0, 0],...","[[0, 0, 1, 0, 0, 0, 0, 0, 2720, 0, 0, 0], [0, ..."
1,"[Approach, None]","[71420, 77400]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5520, 0, 0, 0], [0, 0, 0, 6320, 0, 0],...","[[0, 0, 1, 0, 0, 0, 0, 0, 5520, 0, 0, 0], [0, ..."
2,"[Approach, None, Approach, None]","[48240, 56640, 71200, 77400]","[9240, 15600, 5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9240, 0, 0, 0], [0, 0, 0, 15600, 0, 0]...","[[0, 0, 1, 0, 0, 0, 0, 0, 9240, 0, 0, 0], [0, ..."
3,"[Approach, None]","[238160, 242320]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 4160, 0, 0, 0], [0, 0, 0, 7760, 0, 0],...","[[0, 0, 1, 0, 0, 0, 0, 0, 4160, 0, 0, 0], [0, ..."
4,"[Approach, None, Approach, None]","[47920, 56480, 71200, 77200]","[8640, 16480, 6000, 6640]",Adult,Female,"{검정 핸드백, 흰색 점퍼, 아이보리색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 8640, 0, 0, 0], [0, 0, 0, 16480, 0, 0]...","[[0, 0, 1, 0, 0, 0, 0, 0, 8640, 0, 0, 0], [0, ..."
...,...,...,...,...,...,...,...,...,...
480,"[Approach, None, Approach, None]","[1729280, 1735360, 1768880, 1771040]","[6080, 33520, 2160, 10000]",Adult,Female,"{검은색 패딩, 회색 상의, 회색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 6080, 0, 0, 0], [0, 0, 0, 33520, 0, 0]...","[[0, 0, 1, 0, 0, 0, 0, 0, 6080, 0, 0, 0], [0, ..."
481,"[Approach, None]","[1573200, 1582960]","[9760, 135360]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9760, 0, 0, 0], [0, 0, 0, 135360, 0, 0...","[[0, 0, 1, 0, 0, 0, 0, 0, 9760, 0, 0, 0], [0, ..."
482,"[Approach, None, Approach, None, Approach, None]","[1727920, 1738320, 1744880, 1750160, 1755360, ...","[10400, 6560, 5280, 5200, 4560, 20800]",Child,Female,"{회색 롱패딩, 흰색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 10400, 0, 0, 0], [0, 0, 0, 6560, 0, 0]...","[[0, 0, 1, 0, 0, 0, 0, 0, 10400, 0, 0, 0], [0,..."
483,"[Approach, None, Gesture, None, Approach, None...","[1566960, 1574640, 1579840, 1582640, 1585040, ...","[7680, 5200, 2800, 2400, 5360, 45360, 1840, 36...",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 7680, 0, 0, 0], [0, 0, 0, 5200, 0, 0],...","[[0, 0, 1, 0, 0, 0, 0, 0, 7680, 0, 0, 0], [0, ..."


In [64]:
df.to_csv("./data/behavior_hmm_preprocessed.csv")

# HMM

In [65]:
time_series_data = []

for encoded_code, encoded_duration in zip(df['encoded_code'], df['encoded_duration']):
    encoded_code = np.squeeze(encoded_code)
    encoded_duration = np.squeeze(encoded_duration)

    # encoded_code와 encoded_duration을 수평으로 결합합니다.
    time_series_data.append(np.hstack([encoded_code, encoded_duration]))

time_series_data = np.array(time_series_data)
n_samples, n_timesteps, n_features = time_series_data.shape

In [66]:
X = time_series_data.reshape((n_samples, n_timesteps * n_features))

In [223]:
def store_results(df, predicted_results, filename):
    # Decode the predicted states back to engagement levels
    results = np.array(['state0' if s == 2 else 'state1' if s == 1 else 'state2' for s in predicted_results])

    # Calculate the score (you can use any evaluation metric you want)
    # score = model.score(X)
    
    behavior = df[['code', 'duration']]
    behavior['predicted_engagement_levels'] = results
    
    # 저장할 파일 경로
    file_path = './data/grid/' + filename
    # ExcelWriter 객체 생성
    writer = pd.ExcelWriter(file_path)

    state0_df = behavior[behavior['predicted_engagement_levels'] == 'state0']
    state1_df = behavior[behavior['predicted_engagement_levels'] == 'state1']
    state2_df = behavior[behavior['predicted_engagement_levels'] == 'state2']
    
    # 각 DataFrame을 다른 시트에 저장
    state0_df.to_excel(writer, sheet_name='state0', index=False)
    state1_df.to_excel(writer, sheet_name='state1', index=False)
    state2_df.to_excel(writer, sheet_name='state2', index=False)

    # 저장 및 파일 닫기
    writer.save()
    writer.close()

In [68]:
def print_trained_params(model):
    # Print trained parameters and plot
    print(">> Transition matrix")
    print(np.round(model.transmat_, 2))
    print()

    print(">> Initial Probability")
    print(np.round(model.startprob_, 2))

## Simple Grid Search   

In [77]:
# Grid search parameters
n_states = [3, 4, 5]  # Possible number of states
covariance_type = ['spherical', 'tied', 'diag', 'full']  # Possible covariance types
min_covar = [0.001, 0.01, 0.1]  # Minimum covariance value
n_iter = [10, 50, 100]  # Number of iterations
algorithm = ['viterbi', 'map']  # Decoding algorithm

# Create parameter grid
param_grid = ParameterGrid({
    'n_states': n_states,
    'covariance_type': covariance_type,
    'min_covar': min_covar,
    'n_iter': n_iter,
    'algorithm': algorithm
})

# Perform grid search
for params in param_grid:
    print("n_states : {0}\n"
      "covariance_type: {1}\n"
      "min_covar: {2}\n"
      "n_iter: {3}\n"
      "algorithm: {4}\n".format(params['n_states'], params['covariance_type'], params['min_covar'], params['n_iter'], params['algorithm']))

    # Create GaussianHMM model with current parameters
    model = hmm.GaussianHMM(n_components=params['n_states'], covariance_type=params['covariance_type'],
                        min_covar=params['min_covar'], n_iter=params['n_iter'], algorithm=params['algorithm'])
    
    # Fit the model to your data
    # Replace 'X' with your input data
    model.fit(X)
    
    # Predict the sequence of states for each observed sequence
    predicted_states = model.predict(X)
    
    store_results(df, predicted_states, \
                  'behavior_hmm_result_'+str(params['n_states'])+'_'+str(params['covariance_type'])+'_'+ \
                    str(params['min_covar'])+'_'+str(params['n_iter'])+'_'+str(params['algorithm'])+'.xlsx')
    
    print_trained_params(model)
    
    print("-------------------------------------------------------------")

n_states : 3
covariance_type: spherical
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.   0.51 0.49]
 [0.18 0.38 0.44]
 [0.1  0.33 0.57]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: spherical
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.56 0.01 0.3  0.13]
 [0.17 0.   0.   0.83]
 [0.42 0.02 0.38 0.18]
 [0.51 0.01 0.32 0.16]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: spherical
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.36 0.46 0.   0.   0.18]
 [0.12 0.54 0.23 0.01 0.1 ]
 [0.   0.38 0.44 0.04 0.14]
 [0.57 0.14 0.   0.   0.29]
 [0.16 0.5  0.17 0.02 0.15]]

>> Initial Probability
[0. 1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: spherical
min_covar: 0.001
n_iter: 50
algorithm: viter

Model is not converging.  Current: 504294.65756207565 is not greater than 504299.55402342655. Delta is -4.896461350901518


>> Transition matrix
[[0.   0.67 0.33]
 [0.   0.   1.  ]
 [0.01 0.02 0.98]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.001
n_iter: 10
algorithm: viterbi



Model is not converging.  Current: 504956.11854912434 is not greater than 504960.67208202457. Delta is -4.553532900230493


>> Transition matrix
[[0.03 0.23 0.   0.74]
 [0.38 0.   0.06 0.56]
 [0.55 0.   0.   0.45]
 [0.06 0.02 0.01 0.91]]

>> Initial Probability
[0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.   0.91 0.04 0.   0.05]
 [0.18 0.75 0.02 0.02 0.03]
 [0.6  0.3  0.   0.1  0.  ]
 [0.   0.82 0.09 0.   0.09]
 [0.18 0.71 0.   0.06 0.06]]

>> Initial Probability
[0. 1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504027.70289006655 is not greater than 504032.5722080464. Delta is -4.86931797984289


>> Transition matrix
[[0.96 0.02 0.02]
 [0.88 0.   0.12]
 [0.85 0.08 0.08]]

>> Initial Probability
[1. 0. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504513.0908583217 is not greater than 504516.91738121264. Delta is -3.826522890944034


>> Transition matrix
[[0.05 0.89 0.05 0.  ]
 [0.04 0.9  0.05 0.02]
 [0.08 0.81 0.08 0.04]
 [0.   0.64 0.09 0.27]]

>> Initial Probability
[0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 511086.338139013 is not greater than 511087.85922417126. Delta is -1.521085158281494


>> Transition matrix
[[0.95 0.02 0.02 0.   0.  ]
 [0.64 0.   0.36 0.   0.  ]
 [0.82 0.06 0.06 0.   0.06]
 [1.   0.   0.   0.   0.  ]
 [0.5  0.   0.5  0.   0.  ]]

>> Initial Probability
[1. 0. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.001
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504103.32912325434 is not greater than 504104.6093092591. Delta is -1.2801860047620721


>> Transition matrix
[[0.04 0.04 0.91]
 [0.08 0.   0.92]
 [0.04 0.07 0.88]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.001
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504264.60037358286 is not greater than 504268.3438718152. Delta is -3.7434982323320583


>> Transition matrix
[[0.93 0.01 0.04 0.02]
 [1.   0.   0.   0.  ]
 [0.82 0.18 0.   0.  ]
 [0.73 0.   0.16 0.11]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.001
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 511753.7120635987 is not greater than 511754.71321180655. Delta is -1.0011482078698464


>> Transition matrix
[[0.07 0.07 0.57 0.29 0.  ]
 [0.05 0.   0.57 0.38 0.  ]
 [0.03 0.04 0.9  0.03 0.  ]
 [0.04 0.   0.96 0.   0.  ]
 [0.   0.   1.   0.   0.  ]]

>> Initial Probability
[0. 0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.01
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.88 0.08 0.04]
 [0.97 0.   0.03]
 [0.73 0.15 0.12]]

>> Initial Probability
[1. 0. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.01
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.43 0.52 0.   0.04]
 [0.83 0.09 0.08 0.  ]
 [0.93 0.   0.   0.07]
 [0.86 0.   0.07 0.07]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.01
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.   0.   0.   0.   1.  ]
 [0.01 0.77 0.05 0.01 0.16]
 [0.   0.15 0.04 0.04 0.7

Model is not converging.  Current: 504034.8211796254 is not greater than 504038.9961147234. Delta is -4.174935097980779


>> Transition matrix
[[0.91 0.03 0.05]
 [0.88 0.06 0.06]
 [0.96 0.04 0.  ]]

>> Initial Probability
[1. 0. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.01
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504775.4188469092 is not greater than 504779.88943194645. Delta is -4.47058503725566


>> Transition matrix
[[0.   0.5  0.2  0.3 ]
 [0.   0.   0.05 0.95]
 [0.08 0.08 0.   0.83]
 [0.02 0.03 0.02 0.93]]

>> Initial Probability
[0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.01
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504953.54420568043 is not greater than 504959.01391716534. Delta is -5.469711484911386


>> Transition matrix
[[0.9  0.03 0.03 0.02 0.02]
 [0.33 0.   0.17 0.28 0.22]
 [0.77 0.17 0.06 0.   0.  ]
 [0.67 0.2  0.   0.07 0.07]
 [0.93 0.   0.   0.07 0.  ]]

>> Initial Probability
[1. 0. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.01
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504067.22799182247 is not greater than 504068.6546844203. Delta is -1.4266925978590734


>> Transition matrix
[[0.17 0.82 0.01]
 [0.99 0.01 0.  ]
 [0.   1.   0.  ]]

>> Initial Probability
[1. 0. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.01
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504686.3188246947 is not greater than 504687.31789629575. Delta is -0.9990716010215692


>> Transition matrix
[[0.95 0.04 0.01 0.  ]
 [0.95 0.   0.05 0.  ]
 [0.5  0.13 0.25 0.13]
 [0.67 0.   0.33 0.  ]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.01
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 505078.27154407714 is not greater than 505080.90246968094. Delta is -2.6309256037930027


>> Transition matrix
[[0.   0.94 0.   0.06 0.  ]
 [0.03 0.92 0.01 0.02 0.02]
 [0.   0.8  0.1  0.1  0.  ]
 [0.17 0.33 0.25 0.17 0.08]
 [0.   0.78 0.11 0.   0.11]]

>> Initial Probability
[0. 1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.1
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.08 0.42 0.5 ]
 [0.18 0.   0.82]
 [0.03 0.1  0.87]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.1
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.   1.   0.   0.  ]
 [0.   0.86 0.01 0.13]
 [0.   0.28 0.   0.72]
 [0.   0.85 0.05 0.1 ]]

>> Initial Probability
[0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.1
n_iter: 10
algorithm: viterbi



Model is not converging.  Current: 505014.2659961533 is not greater than 505015.99045969715. Delta is -1.7244635438546538


>> Transition matrix
[[0.91 0.01 0.04 0.02 0.03]
 [0.   0.   0.   0.5  0.5 ]
 [0.83 0.06 0.   0.12 0.  ]
 [0.79 0.07 0.07 0.07 0.  ]
 [0.76 0.06 0.06 0.   0.12]]

>> Initial Probability
[1. 0. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.1
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504127.46879483166 is not greater than 504127.52272592654. Delta is -0.053931094880681485


>> Transition matrix
[[0.31 0.15 0.53]
 [0.21 0.   0.79]
 [0.07 0.02 0.92]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.1
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 504291.5193235786 is not greater than 504294.90412866336. Delta is -3.384805084730033


>> Transition matrix
[[0.9  0.02 0.06 0.01]
 [0.9  0.   0.1  0.  ]
 [0.9  0.03 0.03 0.03]
 [0.88 0.   0.   0.12]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.1
n_iter: 50
algorithm: viterbi



Model is not converging.  Current: 510815.2248351346 is not greater than 510818.461152118. Delta is -3.236316983413417


>> Transition matrix
[[0.02 0.95 0.01 0.02 0.  ]
 [0.86 0.   0.06 0.08 0.  ]
 [0.33 0.57 0.05 0.05 0.  ]
 [0.   0.83 0.17 0.   0.  ]
 [1.   0.   0.   0.   0.  ]]

>> Initial Probability
[1. 0. 0. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: tied
min_covar: 0.1
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504128.22201004945 is not greater than 504130.2277176784. Delta is -2.005707628966775


>> Transition matrix
[[0.   0.87 0.13]
 [0.03 0.9  0.06]
 [0.03 0.97 0.  ]]

>> Initial Probability
[0. 1. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: tied
min_covar: 0.1
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 504589.4857616298 is not greater than 504594.8145500317. Delta is -5.3287884018500336


>> Transition matrix
[[0.95 0.04 0.01 0.  ]
 [0.95 0.   0.05 0.  ]
 [0.43 0.14 0.14 0.29]
 [1.   0.   0.   0.  ]]

>> Initial Probability
[1. 0. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: tied
min_covar: 0.1
n_iter: 100
algorithm: viterbi



Model is not converging.  Current: 505370.4667237035 is not greater than 505375.0934923777. Delta is -4.626768674235791


>> Transition matrix
[[0.07 0.   0.07 0.   0.86]
 [0.   0.   0.08 0.25 0.67]
 [0.1  0.1  0.   0.39 0.42]
 [0.   0.   0.05 0.05 0.9 ]
 [0.03 0.07 0.01 0.02 0.87]]

>> Initial Probability
[0. 0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 3
covariance_type: diag
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.   0.5  0.5 ]
 [0.04 0.31 0.65]
 [0.03 0.3  0.67]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: diag
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.08 0.32 0.08 0.52]
 [0.05 0.35 0.06 0.53]
 [0.09 0.26 0.   0.65]
 [0.05 0.26 0.08 0.61]]

>> Initial Probability
[0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 5
covariance_type: diag
min_covar: 0.001
n_iter: 10
algorithm: viterbi

>> Transition matrix
[[0.14 0.12 0.37 0.27 0.1 ]
 [0.09 0.13 0.52 0.22 0.04]
 [0.1  0.13 0.47 0.27 

Model is not converging.  Current: 417139.3277707573 is not greater than 417139.32777085336. Delta is -9.604264050722122e-08


>> Transition matrix
[[0.14 0.14 0.67 0.   0.05]
 [0.07 0.14 0.77 0.   0.02]
 [0.03 0.1  0.79 0.02 0.06]
 [0.29 0.   0.71 0.   0.  ]
 [0.03 0.28 0.59 0.   0.1 ]]

>> Initial Probability
[0. 0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: diag
min_covar: 0.001
n_iter: 100
algorithm: viterbi

>> Transition matrix
[[0.15 0.46 0.38]
 [0.04 0.6  0.36]
 [0.05 0.51 0.44]]

>> Initial Probability
[0. 1. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: diag
min_covar: 0.001
n_iter: 100
algorithm: viterbi

>> Transition matrix
[[0.36 0.04 0.09 0.5 ]
 [0.23 0.18 0.14 0.45]
 [0.35 0.06 0.16 0.43]
 [0.28 0.03 0.1  0.58]]

>> Initial Probability
[0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 5
covariance_type: diag
min_covar: 0.001
n_iter: 100
algorithm: viterbi

>> Transition matrix
[[0.08 0.59 0.03 0.18 0.13]
 [0.09 0.58 0.04 0.2  0.1 ]
 [0.04 0.44 0.2  0.

Fitting a model with 196028 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.   0.24 0.   0.71 0.06]
 [0.02 0.05 0.   0.91 0.02]
 [0.   0.   0.   1.   0.  ]
 [0.04 0.09 0.   0.85 0.02]
 [0.08 0.17 0.   0.75 0.  ]]

>> Initial Probability
[0. 0. 0. 1. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: full
min_covar: 0.001
n_iter: 10
algorithm: viterbi



Model is not converging.  Current: 877331.052664455 is not greater than 877332.889764742. Delta is -1.8371002869680524
Fitting a model with 261375 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.2  0.51 0.28]
 [0.16 0.57 0.27]
 [0.11 0.5  0.39]]

>> Initial Probability
[0. 1. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: full
min_covar: 0.001
n_iter: 10
algorithm: viterbi



Fitting a model with 326724 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.   0.27 0.4  0.33]
 [0.1  0.36 0.11 0.43]
 [0.29 0.18 0.05 0.48]
 [0.09 0.23 0.17 0.51]]

>> Initial Probability
[0. 0. 0. 1.]
-------------------------------------------------------------
n_states : 5
covariance_type: full
min_covar: 0.001
n_iter: 10
algorithm: viterbi



Fitting a model with 196028 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.13 0.28 0.2  0.34 0.06]
 [0.12 0.33 0.13 0.37 0.03]
 [0.15 0.14 0.32 0.35 0.03]
 [0.14 0.24 0.17 0.4  0.05]
 [0.4  0.25 0.1  0.25 0.  ]]

>> Initial Probability
[0. 0. 0. 1. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: full
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Fitting a model with 261375 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.1  0.44 0.45]
 [0.22 0.4  0.38]
 [0.23 0.31 0.46]]

>> Initial Probability
[0. 1. 0.]
-------------------------------------------------------------
n_states : 4
covariance_type: full
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Fitting a model with 326724 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.   0.86 0.   0.14]
 [0.02 0.55 0.06 0.37]
 [0.   0.52 0.15 0.33]
 [0.01 0.46 0.07 0.46]]

>> Initial Probability
[0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 5
covariance_type: full
min_covar: 0.001
n_iter: 50
algorithm: viterbi



Fitting a model with 196028 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.27 0.18 0.39 0.14 0.02]
 [0.19 0.43 0.33 0.05 0.  ]
 [0.17 0.34 0.41 0.08 0.01]
 [0.24 0.19 0.45 0.12 0.  ]
 [0.33 0.33 0.33 0.   0.  ]]

>> Initial Probability
[0. 0. 1. 0. 0.]
-------------------------------------------------------------
n_states : 3
covariance_type: full
min_covar: 0.001
n_iter: 100
algorithm: viterbi



Fitting a model with 261375 free scalar parameters with only 174600 data points will result in a degenerate solution.


>> Transition matrix
[[0.21 0.26 0.53]
 [0.09 0.44 0.47]
 [0.08 0.38 0.53]]

>> Initial Probability
[0. 0. 1.]
-------------------------------------------------------------
n_states : 4
covariance_type: full
min_covar: 0.001
n_iter: 100
algorithm: viterbi



ValueError: 'covars' must be symmetric, positive-definite

## Grid Search with K-Fold Cross Validation

In [209]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from hmmlearn import hmm

# Grid search parameters
n_states = [3, 4, 5]  # Possible number of states
covariance_type = ['diag', 'full']  # Possible covariance types
min_covar = [0.001, 0.01, 0.1]  # Minimum covariance value
n_iter = [10, 50, 100]  # Number of iterations
algorithm = ['viterbi', 'map']  # Decoding algorithm

parameters = {'n_components': n_states, 'covariance_type': covariance_type, 'min_covar': min_covar, 'n_iter': n_iter, 'algorithm': algorithm}

# HMM 모델 생성
model = hmm.GaussianHMM()

# GridSearchCV를 사용하여 최적의 매개변수 탐색
cv = KFold(n_splits=5)  # 5-fold cross validation
grid_search = GridSearchCV(model, parameters, cv=cv)
grid_search.fit(X)

Model is not converging.  Current: 273077.2874283442 is not greater than 273077.2877801424. Delta is -0.00035179819678887725
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Model is not converging.  Current: 231561.75505558305 is not greater than 231561.75505642578. Delta is -8.427305147051811e-07
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some rows of transmat_ have zero sum because no transition from the state was ever observed.
Some 

Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Model is not converging.  Current: 692758.1285264656 is not greater than 692758.5279996427. Delta is -0.3994731770362705
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Model is not converging.  Current: 772077.1659216045 is not greater than 772079.5753214111. Delta is -2.4093998066382483
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 196028 free scalar parameters

Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 196028 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Model is not converging.  Current: 753759.2223013869 is not greater than 753760.0965074557. Delta is -0.8742060688091442
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters wi

Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Model is not converging.  Current: 751377.5723683259 is not greater than 751378.9547310176. Delta is -1.3823626916855574
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 261375 free scalar parameters wi

Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Model is not converging.  Current: 773501.3154879325 is not greater than 773501.4342006866. Delta is -0.11871275410521775
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters with only 139680 data points will result in a degenerate solution.
Fitting a model with 326724 free scalar parameters w

In [210]:
# 최적의 매개변수 확인
best_params = grid_search.best_params_

In [211]:
best_params

{'algorithm': 'viterbi',
 'covariance_type': 'diag',
 'min_covar': 0.001,
 'n_components': 3,
 'n_iter': 10}

In [212]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from hmmlearn import hmm

# Grid search parameters
n_states = [3]  # Possible number of states
n_iter = [10, 20, 30, 40]  # Number of iterations

parameters = {'n_components': n_states, 'n_iter': n_iter}

# HMM 모델 생성
model = hmm.GaussianHMM()

# GridSearchCV를 사용하여 최적의 매개변수 탐색
cv = KFold(n_splits=5)  # 5-fold cross validation
grid_search = GridSearchCV(model, parameters, cv=cv)
grid_search.fit(X)

Model is not converging.  Current: 200332.97314676983 is not greater than 200332.98869153962. Delta is -0.015544769790722057
Model is not converging.  Current: 296066.1893739133 is not greater than 296066.1894131821. Delta is -3.926880890503526e-05


In [213]:
# 최적의 매개변수 확인
best_params = grid_search.best_params_

In [220]:
best_params

{'n_components': 3, 'n_iter': 30}

In [224]:
# 최적의 매개변수로 모델 재학습
best_model = hmm.GaussianHMM(**best_params)
best_model.fit(X)

predicted_states = best_model.predict(X)
    
store_results(df, predicted_states, \
              'behavior_hmm_result_'+str(params['n_states'])+'_'+'diag'+'_'+ \
                str(0.001)+'_'+str(30)+'_'+str('viterbi')+'.xlsx')

# 교차 검증 결과 확인
cv_scores = grid_search.cv_results_['mean_test_score']
cv_scores

array([-3.56474291e+12, -3.72826568e+12, -1.62937318e+12, -2.44840047e+12])