# 기본 패키지 불러오기

In [3]:
import os
import numpy as np

try:
    import pandas as pd
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas"])
finally:
    import pandas as pd

try:
    import pyarrow
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "pyarrow"])
finally:
    import pyarrow

try:
    from tqdm import tqdm
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "tqdm"])
finally:
    from tqdm import tqdm

try:
    from scipy.spatial.transform import Rotation
    from scipy.stats import gaussian_kde, entropy
except ImportError:
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "scipy"])
finally:
    from scipy.spatial.transform import Rotation
    from scipy.stats import gaussian_kde, entropy
    
try:
    import antropy
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "antropy"])
finally:
    import antropy

try:
    import heartpy
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "heartpy"])
finally:
    import heartpy

try:
    import mne
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "mne"])
finally:
    import mne

# 특징 추출

In [4]:
try:
    import matplotlib.pyplot as plt
except ImportError:
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])
finally:
    import matplotlib.pyplot as plt

In [5]:
from IPython.display import display

window_duration = 30

df_filtered_data = pd.read_parquet("/home/shared_home/2026-NRF-360Videos/data/total_filtered_data.parquet")
df_filtered_data.set_index(["ID", "Lv"], inplace=True)

df_filtered_data['window'] = df_filtered_data.groupby(['ID', 'Lv'])['Elapsed'].transform(
    lambda x: ((x - x.min()) // window_duration).astype(int)
)
display(df_filtered_data)

Unnamed: 0_level_0,Unnamed: 1_level_0,frame,prev_Elapsed,Elapsed,gaze-azimuth,gaze-elevation,head-speed,head-ang_vel,score,window
ID,Lv,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
P01,Lv1,0,0.00,0.12,89.270157,19.633359,,,1.0,0
P01,Lv1,1,0.12,0.24,87.378063,19.876874,0.000000,0.032917,1.0,0
P01,Lv1,2,0.24,0.36,90.060824,19.572538,0.008333,0.020576,1.0,0
P01,Lv1,3,0.36,0.48,90.458815,2.120428,0.008333,0.019626,1.0,0
P01,Lv1,4,0.48,0.60,90.744803,0.401074,0.018634,0.201511,1.0,0
...,...,...,...,...,...,...,...,...,...,...
P10,Lv2,4994,599.27,599.39,67.569929,-3.267630,0.008333,0.009542,3.0,19
P10,Lv2,4995,599.39,599.51,70.423274,0.630266,0.000000,0.010146,3.0,19
P10,Lv2,4996,599.51,599.63,70.060952,0.000000,0.000000,0.019442,3.0,19
P10,Lv2,4997,599.63,599.75,69.826055,0.114592,0.000000,0.021400,3.0,19


In [6]:
df_optical_flow: pd.DataFrame = pd.read_parquet("/home/shared_home/2026-NRF-360Videos/data/optical_flow_results.parquet")
# ID와 Lv는 df_filtered_data와 동일하다.
# df_filtered_data의 (ID, Lv, Elapsed)는 df_optical_flow의 (ID, Lv, elapsed_time)와 고유한 값을 가진다.
# 이를 통해, df_filtered_data에 df_optical_flow의 열 OF_magnitude와 OF_ang_entropy를 추가한다.

df_combined: pd.DataFrame = pd.merge(
    df_filtered_data.reset_index(),
    df_optical_flow,
    left_on=['ID', 'Lv', 'Elapsed'],
    right_on=['ID', 'Lv', 'elapsed_time']
)
df_combined.drop(columns=['elapsed_time'], inplace=True)
df_combined.set_index(['ID', 'Lv'], inplace=True)
display(df_combined)

Unnamed: 0_level_0,Unnamed: 1_level_0,frame,prev_Elapsed,Elapsed,gaze-azimuth,gaze-elevation,head-speed,head-ang_vel,score,window,OF_magnitude,OF_ang_entropy
ID,Lv,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
P01,Lv1,0,0.00,0.12,89.270157,19.633359,,,1.0,0,,
P01,Lv1,1,0.12,0.24,87.378063,19.876874,0.000000,0.032917,1.0,0,0.195014,3.088382
P01,Lv1,2,0.24,0.36,90.060824,19.572538,0.008333,0.020576,1.0,0,0.188509,3.336485
P01,Lv1,3,0.36,0.48,90.458815,2.120428,0.008333,0.019626,1.0,0,0.121048,4.551047
P01,Lv1,4,0.48,0.60,90.744803,0.401074,0.018634,0.201511,1.0,0,1.366558,2.101424
...,...,...,...,...,...,...,...,...,...,...,...,...
P10,Lv2,4994,599.27,599.39,67.569929,-3.267630,0.008333,0.009542,3.0,19,1.958745,4.039436
P10,Lv2,4995,599.39,599.51,70.423274,0.630266,0.000000,0.010146,3.0,19,2.066639,3.037501
P10,Lv2,4996,599.51,599.63,70.060952,0.000000,0.000000,0.019442,3.0,19,0.744923,2.767198
P10,Lv2,4997,599.63,599.75,69.826055,0.114592,0.000000,0.021400,3.0,19,1.658318,4.445325


In [None]:
import warnings

groups = df_combined.groupby(['ID', 'Lv', 'window'])

draw = False
fig_w_inch = 180 / 25.4
fig_h_inch = 60 / 25.4

features = []

pbar = tqdm(total=len(groups))

for group_idx, df_group in groups:
    if draw:
        display(df_group)
    pbar.update(1)

    if group_idx[2] == 0:
        continue
    
    gaze_data = df_group[["gaze-azimuth", "gaze-elevation"]].dropna()
    heatmap, _, _ = np.histogram2d(gaze_data["gaze-azimuth"], gaze_data["gaze-elevation"], bins=5, range=[[-90, 90], [-90, 90]])
    heatmap = heatmap / (heatmap.sum() + 1e-9) # 정규화
    nonzero_heatmap = heatmap[heatmap > 0]
    gaze_entropy = -np.sum(nonzero_heatmap * np.log(nonzero_heatmap))

    head_pos_ent = antropy.sample_entropy(df_group["head-speed"].dropna())
    head_rot_ent = antropy.sample_entropy(df_group["head-ang_vel"].dropna())

    if draw:
        display(gaze_entropy, head_pos_ent, head_rot_ent)

    start_time = df_group['prev_Elapsed'].min()
    end_time = df_group['Elapsed'].max()
    
    ecg_data = pd.read_csv(f"/Volumes/LocalHDD/2024-NRF-360Videos-H.T.Kim/raw/{group_idx[0]:00}_0704/{group_idx[1]}/ecg.csv")
    ecg_data['timestamp'] = pd.to_datetime(ecg_data["curr_time"], format='mixed')
    ecg_data['timestamp'] = (ecg_data['timestamp'] - ecg_data['timestamp'].values[0]).dt.total_seconds()

    if group_idx[0] == "P10" and group_idx[1] == "Lv1":
        ecg_data.loc[25500:27000, "value"] = np.nan

    ecg_window = ecg_data[
        (ecg_data['timestamp'] >= start_time) & 
        (ecg_data['timestamp'] <= end_time)]


    if group_idx[0] == "P10" and group_idx[1] == "Lv1": # 특이 케이스
        if group_idx[2] == 12:
            ecg_window = ecg_window.loc[27001:, :]
        elif group_idx[2] == 11:
            ecg_window = ecg_window.loc[:25000, :]
    
    if len(ecg_window) < 10: #약 1초보다 작으면
        hrv = np.nan

    else:
        try:
            fs = 1 / ecg_window['timestamp'].diff().mean()
            info = mne.create_info(ch_names=["ecg_ch"], sfreq=fs, ch_types=["ecg"], verbose='ERROR')
            raw  = mne.io.RawArray(ecg_window.loc[:, "value"].values.reshape(1, -1), info, verbose='ERROR')
            # A Real-Time QRSDetection Algorithm (5-15 Hz 필터)
            raw_smooth = raw.filter(picks=['ecg_ch'], l_freq=5, h_freq=15,  fir_window='blackman', verbose='ERROR').get_data(picks=['ecg_ch'])[0]
    
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=UserWarning)
                warnings.filterwarnings("ignore", category=RuntimeWarning)
                working_data, measure = heartpy.process(raw_smooth, sample_rate=fs, bpmmax=200)
    
            hr    = measure['bpm']
            ibi   = measure['ibi']
            sdnn  = measure['sdnn']
            rmssd = measure['rmssd']
            hrv = rmssd
    
            if draw:
                ax = heartpy.plotter(working_data, measure, show=False, figsize=(fig_w_inch, fig_h_inch))
                fig = ax.get_figure()
                fig.set_dpi(600)
                plt.xlim(10, 15)
                plt.title(f"RMSSD = {rmssd:.3f} ms")
                plt.show()
        except Exception as e:
            print(e)
            print(group_idx)
            print(ecg_window.shape)
            break

    if hrv > 250:
        hrv = None

    of_avg_mag = df_group["OF_magnitude"].dropna().mean()
    of_avg_ent = df_group["OF_ang_entropy"].dropna().mean()
    fms_score  = df_group["score"].dropna().mean().round()

    features.append({
        "ID": group_idx[0],
        "Lv": group_idx[1],
        "Window": group_idx[2],
        "Gaze (shannon entropy)": gaze_entropy,
        "Head velocity (sample entropy)": head_pos_ent,
        "Head angular velocity (sample entropy)": head_rot_ent,
        "HRV": hrv,
        "OF magnitude (avg)": of_avg_mag,
        "OF Sample entropy (avg)": of_avg_ent,
        "FMS": fms_score
    })
    if draw:
        print(features)
        break
pbar.close()
df_features:pd.DataFrame = pd.DataFrame(features)
df_features.set_index(["ID", "Lv", "Window"], inplace=True)
df_features.to_parquet("/home/shared_home/2026-NRF-360Videos/data/features.parquet")
display(df_features)

 90%|█████████ | 362/400 [04:19<00:27,  1.40it/s]
 62%|██████▏   | 247/400 [00:11<00:06, 23.85it/s]