In [2]:
import sys, os
sys.path.append('..')

In [247]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import preprocess

# 1. Data Load

In [442]:
orig_df = pd.read_csv('../data/exhibition_behavior_preprocessed.csv', delimiter=',', index_col=False)

In [443]:
orig_df

Unnamed: 0,uid,date,filename,start,duration,A/C,behavior,code,M/F,appearance
0,0,11월 19일,01_20221119085958_part2,56240,2720,Child,물리적거리,Approach,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
1,0,11월 19일,01_20221119085958_part2,59360,2960,Child,상호작용시도,,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
2,0,11월 19일,01_20221119085958_part2,71420,5520,Child,물리적거리,Approach,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
3,0,11월 19일,01_20221119085958_part2,77400,6320,Child,상호작용시도,,Female,"포니테일, 아이보리색 점퍼, 검정색 바지"
4,1,11월 19일,01_20221119085958_part2,48240,9240,Child,물리적거리,Approach,Male,"검정색 점퍼, 청바지, 검정/흰색 운동화"
...,...,...,...,...,...,...,...,...,...,...
2682,371,12월 4일,01_20221204165959_part1,2567280,17280,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"
2683,371,12월 4일,01_20221204165959_part1,2893760,14160,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"
2684,372,12월 4일,01_20221204165959_part1,2567280,18080,Adult,물리적거리,Pass,Male,"검은색 롱패딩, 검은색 바지"
2685,373,12월 4일,01_20221204165959_part2,15760,13840,Child,물리적거리,Pass,Female,"하늘색 롱패딩, 흰색 바지"


In [444]:
orig_df = orig_df.drop(orig_df[orig_df['code'] == 'Pass'].index)

In [445]:
orig_df = orig_df.reset_index(drop=True)

# 2. Indexing
Avoid, Follow를 제거하지 않고 Approach와 결합하여 indexing

In [446]:
def indexing(df):
    # 'pid' column 초기값 설정
    pid = 0
    pid_list = []
    
    i = 0
    # 데이터프레임의 각 row에 대해 조건을 검사하고 새로운 Pid 값을 할당
    while i < len(df):
        if i == 0:
            pid_list.append(pid)
        
        elif (df['uid'][i] != df['uid'][i-1]):
            pid += 1
            pid_list.append(pid)
            
        elif (df['code'][i] == 'Avoid' or df['code'][i] == 'Follow') and (df['code'][i-1] == 'Approach'):
            pid += 1
            pid_list.append(pid)
            
        elif (df['code'][i] == 'Avoid' or df['code'][i] == 'Follow' or df['code'][i] == 'Approach') and \
                (df['code'][i-1] == 'Avoid' or df['code'][i-1] == 'Follow' or df['code'][i-1] == 'Approach'):
            pid_list.append(pid)
            
        elif (df['code'][i] == 'Avoid' or df['code'][i] == 'Follow' or df['code'][i] == 'Approach') and \
                (df['code'][i-1] != 'Avoid' or df['code'][i-1] != 'Follow' or df['code'][i-1] != 'Approach'):
            pid += 1
            pid_list.append(pid)
        
        else:
            pid_list.append(pid)
        
        i+= 1

    # 'pid' column을 DataFrame에 추가
    df['pid'] = pid_list

    return df

In [447]:
orig_df = indexing(orig_df)
orig_df = preprocess.reindex(orig_df)
orig_df.to_csv('test.csv', index=False)

# 3. Make Data

In [448]:
def make_data(df):
    df = df.groupby('pid').agg({'code': list, 'duration': list, 'A/C': 'first', 'M/F': 'first', 'appearance': list})
    df['appearance'] = [set(data) for data in df['appearance']]
#     df = df.drop(df[df['code'].apply(lambda x: len(x) <= 1)].index)
    df.reset_index(inplace=True, drop=True)
    return df

In [449]:
df = orig_df[['pid', 'code', 'duration', 'A/C', 'M/F', 'appearance']]

In [450]:
df = make_data(df)

In [451]:
df

Unnamed: 0,code,duration,A/C,M/F,appearance
0,"[Approach, None]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
1,"[Approach, None]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
2,"[Approach, None]","[9240, 15600]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
3,"[Approach, None]","[5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
4,"[Approach, None]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}"
...,...,...,...,...,...
748,"[Approach, None, Gesture, None]","[7680, 5200, 2800, 2400]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
749,"[Approach, None, Gesture, None, Gesture, None]","[5360, 45360, 1840, 36400, 6320, 62800]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
750,"[Approach, None]","[3840, 11680]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
751,"[Approach, None]","[4800, 16480]",Child,Male,"{검은색 롱패딩, 검은색 바지}"


## 3-1) 이상치 제거
1) code list가 상호작용시도로 이루어져 있는 경우
2) code list에 Approach가 없는 경우

In [452]:
print("code list의 길이가 1 이하이고 상호작용시도의 code data를 drop")
count = 0
for i in range(len(df)):
    if len(df['code'][i]) <= 1 and (df['code'][i][0] == 'None' or df['code'][i][0] == 'Touch' or df['code'][i][0] == 'Gesture'):
        print(df['code'][i-1] ,df['appearance'][i-1])
        print(df['code'][i] ,df['appearance'][i])
        print(df['code'][i+1] ,df['appearance'][i+1])
        print("--------------------------------------")
        df.drop(i, inplace=True)
        count += 1

print("drop한 데이터 개수:", count)
df.reset_index(inplace=True, drop=True)

code list의 길이가 1 이하이고 상호작용시도의 code data를 drop
['Avoid', 'Follow', 'Avoid', 'Approach', 'None', 'Touch', 'None'] {'빨간 리본, 양갈래 머리, 무지개색 상의'}
['None'] {'셔츠 위 조끼, 검정색 바지, 흰색 운동화'}
['Avoid', 'Follow', 'None'] {'셔츠 위 조끼, 검정색 바지, 흰색 운동화'}
--------------------------------------
['Avoid', 'None'] {'셔츠 위 조끼, 검정색 바지, 흰색 운동화'}
['Touch'] {'빨간 리본, 양갈래 머리, 무지개색 상의'}
['Avoid', 'Follow', 'Approach', 'None', 'Touch', 'None'] {'빨간 리본, 양갈래 머리, 무지개색 상의'}
--------------------------------------
['Follow'] {'검정색 후드(팔에 줄무늬), 검은 바지, 검정색 운동화'}
['Gesture'] {'하늘색 상의, 검은색 바지'}
['Approach', 'None'] {'회색 상의, 청바지, 흰색 운동화'}
--------------------------------------
['Avoid', 'Follow'] {'검정 모자, 파란색 상의, 검정 조끼, 파란색 바지'}
['Touch'] {'베이지색 상의, 청바지, 흰색 운동화'}
['Approach', 'None'] {'베이지색 상의, 청바지, 흰색 운동화'}
--------------------------------------
['Approach', 'None'] {'묶은 머리, 검정 상의, 흰색 조끼, 검정 바지'}
['Touch'] {'검정 상의, 흰색 조끼, 회색 바지'}
['Approach'] {'검정 상의, 흰색 조끼, 회색 바지'}
--------------------------------------
['Approach', 'None'] {'회색 맨투

In [453]:
df

Unnamed: 0,code,duration,A/C,M/F,appearance
0,"[Approach, None]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
1,"[Approach, None]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}"
2,"[Approach, None]","[9240, 15600]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
3,"[Approach, None]","[5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}"
4,"[Approach, None]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}"
...,...,...,...,...,...
725,"[Approach, None, Gesture, None]","[7680, 5200, 2800, 2400]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
726,"[Approach, None, Gesture, None, Gesture, None]","[5360, 45360, 1840, 36400, 6320, 62800]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
727,"[Approach, None]","[3840, 11680]",Child,Male,"{검은색 롱패딩, 검은색 바지}"
728,"[Approach, None]","[4800, 16480]",Child,Male,"{검은색 롱패딩, 검은색 바지}"


In [457]:
print("상호작용시도로만 이루어져있는 code list drop")
count = 0

for i in range(len(df)):
    if df['code'][i][0] == 'None' or df['code'][i][0] == 'Touch' or df['code'][i][0] == 'Gesture':
        print(df['code'][i])
        df.drop(i, inplace=True)
        count += 1
        
print("drop한 데이터 개수:", count)
df.reset_index(inplace=True, drop=True)

상호작용시도로만 이루어져있는 code list drop
['None', 'Touch', 'None', 'Touch']
['Touch', 'Touch', 'Touch', 'None', 'Touch', 'Touch', 'Touch']
['Touch', 'Touch']
['Gesture', 'None', 'Gesture']
drop한 데이터 개수: 4


In [455]:
print("code list에 Aproach가 없는 data drop")
count = 0

for i in range(len(df)):
    if (len(df['code'][i]) > 1) and (df['code'][i][0] == 'Avoid' or df['code'][i][0] == 'Follow'):
        if 'Approach' not in df['code'][i]:
            print(df['code'][i])
            df.drop(i, inplace=True)
            count += 1

print("drop한 데이터 개수:", count)
df.reset_index(inplace=True, drop=True)

code list에 Aproach가 없는 data drop
['Follow', 'Avoid', 'Follow', 'Avoid', 'None']
['Follow', 'None']
['Follow', 'Avoid', 'None']
['Follow', 'Avoid', 'None', 'Touch']
['Follow', 'Avoid', 'Follow', 'Avoid', 'None']
['Avoid', 'Follow', 'None']
['Avoid', 'None']
['Follow', 'Avoid']
['Avoid', 'Follow']
['Follow', 'None']
['Avoid', 'Follow']
['Avoid', 'Follow']
['Avoid', 'None']
['Avoid', 'Follow']
['Avoid', 'Follow']
['Avoid', 'None']
['Avoid', 'None']
['Follow', 'Touch', 'None']
['Avoid', 'None']
['Avoid', 'Follow', 'Gesture', 'Touch']
['Avoid', 'Follow', 'None']
['Avoid', 'None']
['Follow', 'Avoid', 'None']
['Avoid', 'None']
['Avoid', 'None']
['Follow', 'Avoid']
['Avoid', 'None']
drop한 데이터 개수: 27


In [459]:
df.to_csv('../data/behavior_hmm_data.csv')

## 3-2) One-Hot Encoding (code, duration)

In [464]:
def code_one_hot_encoding(df):
    # 행동 코드 리스트
    actions = ['Avoid', 'Follow', 'Approach', 'None', 'Touch', 'Gesture']

    # 각각의 sublist를 one-hot encoding하여 다차원 리스트로 생성
    one_hot_data = []
    for sublist in df['code']:
        one_hot_sublist = np.zeros((len(sublist), len(actions)))
        for i, code in enumerate(sublist):
            index = actions.index(code)
            one_hot_sublist[i][index] = 1
        one_hot_data.append(one_hot_sublist)
    
    df['encoded_code'] = one_hot_data
    df['encoded_code'] = df[['encoded_code']].apply(lambda x: [np.array(item).astype(int) for item in x])

    return df

In [501]:
def duration_one_hot_encoding(df):
    # 행동 코드 리스트
    actions = ['Avoid', 'Follow', 'Approach', 'None', 'Touch', 'Gesture']

    # 각각의 sublist를 one-hot encoding하여 다차원 리스트로 생성
    one_hot_data = []
    for i, sublist in enumerate(df['code']):
        one_hot_sublist = np.zeros((len(sublist), len(actions)))
        for j, code in enumerate(sublist):
            index = actions.index(code)
            one_hot_sublist[j][index] = df['duration'][i][j]
        one_hot_data.append(one_hot_sublist)
    
    df['encoded_duration'] = one_hot_data
    df['encoded_duration'] = df[['encoded_duration']].apply(lambda x: [np.array(item).astype(int) for item in x])

    return df

In [504]:
df = code_one_hot_encoding(df)
df = duration_one_hot_encoding(df)

In [505]:
df

Unnamed: 0,code,duration,A/C,M/F,appearance,encoded_code,encoded_duration
0,"[Approach, None]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 2720, 0, 0, 0], [0, 0, 0, 2960, 0, 0]]"
1,"[Approach, None]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 5520, 0, 0, 0], [0, 0, 0, 6320, 0, 0]]"
2,"[Approach, None]","[9240, 15600]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 9240, 0, 0, 0], [0, 0, 0, 15600, 0, 0]]"
3,"[Approach, None]","[5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 5920, 0, 0, 0], [0, 0, 0, 6360, 0, 0]]"
4,"[Approach, None]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 4160, 0, 0, 0], [0, 0, 0, 7760, 0, 0]]"
...,...,...,...,...,...,...,...
694,"[Approach, None, Gesture, None]","[7680, 5200, 2800, 2400]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 7680, 0, 0, 0], [0, 0, 0, 5200, 0, 0],..."
695,"[Approach, None, Gesture, None, Gesture, None]","[5360, 45360, 1840, 36400, 6320, 62800]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5360, 0, 0, 0], [0, 0, 0, 45360, 0, 0]..."
696,"[Approach, None]","[3840, 11680]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 3840, 0, 0, 0], [0, 0, 0, 11680, 0, 0]]"
697,"[Approach, None]","[4800, 16480]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0]]","[[0, 0, 4800, 0, 0, 0], [0, 0, 0, 16480, 0, 0]]"


## 3-3) Add Zero Padding
sequence가 가장 긴 data에 맞춰 zero padding을 더함

In [517]:
def padding_encoding_data(df, col_name, num):
    max_length = max(map(len, df[col_name]))  # 가장 긴 sequence의 길이 구하기
    padded_data = []
    
    for data in df[col_name]:
        if max_length - len(data) == 0:
            padded_data.append(data)
        else:
            # 가장 긴 sequence 길이를 기준으로 padding 적용
            padding_list = [[0 for i in range(num)]] * (max_length - len(data))
            print("padding_list")
            result = np.concatenate((data, padding_list), axis=0)
            padded_data.append(result)
        
    df[col_name] = padded_data

    return df

In [518]:
df = padding_encoding_data(df, "encoded_code", 6)
df = padding_encoding_data(df, "encoded_duration", 6)

In [519]:
df

Unnamed: 0,code,duration,A/C,M/F,appearance,encoded_code,encoded_duration
0,"[Approach, None]","[2720, 2960]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 2720, 0, 0, 0], [0, 0, 0, 2960, 0, 0],..."
1,"[Approach, None]","[5520, 6320]",Child,Female,"{포니테일, 아이보리색 점퍼, 검정색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5520, 0, 0, 0], [0, 0, 0, 6320, 0, 0],..."
2,"[Approach, None]","[9240, 15600]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 9240, 0, 0, 0], [0, 0, 0, 15600, 0, 0]..."
3,"[Approach, None]","[5920, 6360]",Child,Male,"{검정색 점퍼, 청바지, 검정/흰색 운동화}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5920, 0, 0, 0], [0, 0, 0, 6360, 0, 0],..."
4,"[Approach, None]","[4160, 7760]",Child,Male,"{검정 마스크, 흰색 후리스, 회색 트레이닝 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 4160, 0, 0, 0], [0, 0, 0, 7760, 0, 0],..."
...,...,...,...,...,...,...,...
694,"[Approach, None, Gesture, None]","[7680, 5200, 2800, 2400]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 7680, 0, 0, 0], [0, 0, 0, 5200, 0, 0],..."
695,"[Approach, None, Gesture, None, Gesture, None]","[5360, 45360, 1840, 36400, 6320, 62800]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 5360, 0, 0, 0], [0, 0, 0, 45360, 0, 0]..."
696,"[Approach, None]","[3840, 11680]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 3840, 0, 0, 0], [0, 0, 0, 11680, 0, 0]..."
697,"[Approach, None]","[4800, 16480]",Child,Male,"{검은색 롱패딩, 검은색 바지}","[[0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0...","[[0, 0, 4800, 0, 0, 0], [0, 0, 0, 16480, 0, 0]..."


In [555]:
def make_data_for_hmm(df):
    time_series_data = []

    for encoded_code, encoded_duration in zip(df['encoded_code'], df['encoded_duration']):
        encoded_code = np.squeeze(encoded_code)
        encoded_duration = np.squeeze(encoded_duration)

        # encoded_code와 encoded_duration을 수평으로 결합합니다.
        time_series_data.append(np.hstack([encoded_code, encoded_duration]))

    df['time_series_data'] = time_series_data
    
    return df

In [556]:
df = make_data_for_hmm(df)

In [571]:
print(df['time_series_data'])

0      [[0, 0, 1, 0, 0, 0, 0, 0, 2720, 0, 0, 0], [0, ...
1      [[0, 0, 1, 0, 0, 0, 0, 0, 5520, 0, 0, 0], [0, ...
2      [[0, 0, 1, 0, 0, 0, 0, 0, 9240, 0, 0, 0], [0, ...
3      [[0, 0, 1, 0, 0, 0, 0, 0, 5920, 0, 0, 0], [0, ...
4      [[0, 0, 1, 0, 0, 0, 0, 0, 4160, 0, 0, 0], [0, ...
                             ...                        
694    [[0, 0, 1, 0, 0, 0, 0, 0, 7680, 0, 0, 0], [0, ...
695    [[0, 0, 1, 0, 0, 0, 0, 0, 5360, 0, 0, 0], [0, ...
696    [[0, 0, 1, 0, 0, 0, 0, 0, 3840, 0, 0, 0], [0, ...
697    [[0, 0, 1, 0, 0, 0, 0, 0, 4800, 0, 0, 0], [0, ...
698    [[0, 0, 1, 0, 0, 0, 0, 0, 12640, 0, 0, 0], [0,...
Name: time_series_data, Length: 699, dtype: object


In [561]:
df['time_series_data'] = df['time_series_data'].astype('object')

In [565]:
df.to_csv("../data/behavior_hmm_preprocessed.csv")

# HMM

In [566]:
import pandas as pd
import numpy as np
import ast
from hmmlearn import hmm

In [633]:
df.drop(df[df['code'].apply(len) <= 1].index, inplace=True)
df.reset_index(inplace=True, drop=True)

In [634]:
time_series_data = []

for encoded_code, encoded_duration in zip(df['encoded_code'], df['encoded_duration']):
    encoded_code = np.squeeze(encoded_code)
    encoded_duration = np.squeeze(encoded_duration)

    # encoded_code와 encoded_duration을 수평으로 결합합니다.
    time_series_data.append(np.hstack([encoded_code, encoded_duration]))

In [637]:
time_series_data = np.array(time_series_data)
n_samples, n_timesteps, n_features = time_series_data.shape

In [638]:
X = time_series_data.reshape((n_samples, n_timesteps * n_features))

In [651]:
n_states = 2

# Define the HMM model
model = hmm.GaussianHMM(n_components=n_states)

In [652]:
# Train the model
model.fit(X)

In [653]:
# Predict the sequence of states for each observed sequence
predicted_states = model.predict(X)

# Decode the predicted states back to engagement levels
predicted_engagement_levels = np.array(['High' if s == 1 else 'Low' for s in predicted_states])

# Print the predicted engagement levels
print(predicted_engagement_levels)

['High' 'High' 'High' 'High' 'High' 'High' 'High' 'High' 'High' 'High'
 'High' 'High' 'High' 'High' 'High' 'Low' 'High' 'High' 'Low' 'High'
 'High' 'High' 'High' 'High' 'Low' 'Low' 'High' 'High' 'High' 'Low' 'Low'
 'High' 'Low' 'Low' 'High' 'High' 'Low' 'Low' 'Low' 'Low' 'High' 'High'
 'High' 'High' 'High' 'High' 'High' 'High' 'High' 'High' 'High' 'High'
 'Low' 'Low' 'High' 'Low' 'Low' 'High' 'Low' 'High' 'High' 'Low' 'High'
 'Low' 'Low' 'High' 'Low' 'High' 'High' 'High' 'High' 'Low' 'Low' 'High'
 'High' 'Low' 'High' 'High' 'Low' 'High' 'High' 'Low' 'High' 'Low' 'High'
 'Low' 'High' 'Low' 'Low' 'High' 'Low' 'High' 'Low' 'Low' 'Low' 'Low'
 'Low' 'High' 'High' 'High' 'High' 'Low' 'High' 'High' 'High' 'Low' 'High'
 'Low' 'High' 'Low' 'Low' 'Low' 'Low' 'Low' 'Low' 'High' 'Low' 'Low'
 'High' 'Low' 'High' 'High' 'High' 'High' 'High' 'Low' 'Low' 'High' 'Low'
 'Low' 'High' 'Low' 'Low' 'High' 'High' 'Low' 'High' 'Low' 'High' 'High'
 'High' 'Low' 'Low' 'Low' 'Low' 'Low' 'High' 'High' 'Low' 'Low'

In [654]:
df['predicted_engagement_level'] = predicted_engagement_levels

In [655]:
df[['code', 'predicted_engagement_level']]

Unnamed: 0,code,predicted_engagement_level
0,"[Approach, None]",High
1,"[Approach, None]",High
2,"[Approach, None]",High
3,"[Approach, None]",High
4,"[Approach, None]",High
...,...,...
659,"[Approach, None, Gesture, None]",High
660,"[Approach, None, Gesture, None, Gesture, None]",High
661,"[Approach, None]",High
662,"[Approach, None]",High


In [656]:
result = df[['code', 'duration', 'predicted_engagement_level']]
result.to_csv('../data/behavior_hmm_result.csv')

In [657]:
result

Unnamed: 0,code,duration,predicted_engagement_level
0,"[Approach, None]","[2720, 2960]",High
1,"[Approach, None]","[5520, 6320]",High
2,"[Approach, None]","[9240, 15600]",High
3,"[Approach, None]","[5920, 6360]",High
4,"[Approach, None]","[4160, 7760]",High
...,...,...,...
659,"[Approach, None, Gesture, None]","[7680, 5200, 2800, 2400]",High
660,"[Approach, None, Gesture, None, Gesture, None]","[5360, 45360, 1840, 36400, 6320, 62800]",High
661,"[Approach, None]","[3840, 11680]",High
662,"[Approach, None]","[4800, 16480]",High
