In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from collections import Counter
# !pip install koreanize-matplotlib
# import koreanize_matplotlib

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, TensorDataset
import torch.utils.data as data
import torch.optim as optim
import torchvision
from torch.autograd import Variable

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from PIL import Image

import warnings
warnings.filterwarnings('ignore')

In [None]:
# CSV 파일 병합 함수
def merge_csv_files(mar_ear_path, yaw_pitch_path, output_file):
    """
    MAR/EAR 데이터와 YAW/PITCH 데이터를 병합하여 하나의 CSV로 저장
    """
    merged_data = []  # 병합된 데이터 저장 리스트

    for csv_file in csv_files:
        # 각 파일 경로 설정
        mar_ear_file = os.path.join(mar_ear_path, csv_file)
        yaw_pitch_file = os.path.join(yaw_pitch_path, csv_file)

        if not os.path.exists(mar_ear_file) or not os.path.exists(yaw_pitch_file):
            print(f"⚠️ 파일 없음: {csv_file}, 건너뜀")
            continue  # 파일이 없으면 건너뜀

        # 데이터 로드
        mar_ear_df = pd.read_csv(mar_ear_file)
        yaw_pitch_df = pd.read_csv(yaw_pitch_file)
        # 데이터 병합
        merged_df = pd.merge(mar_ear_df, yaw_pitch_df, on=['video_name', 'frame', 'label'], how='inner')
        merged_data.append(merged_df)
        print(f"✅ 병합 완료: {csv_file}")

    # 최종 데이터프레임 생성
    final_df = pd.concat(merged_data, ignore_index=True)
    print(f"📌 최종 데이터 크기: {final_df.shape}")

    # CSV 저장
    final_df.to_csv(output_file, index=False)
    print(f"✅ 저장 완료: {output_file}")

In [None]:
# mar_ear_train_path = "/content/drive/MyDrive/DMS/data/EDA/mar_ear_results_interpolated"
# yaw_pitch_train_path = "/content/drive/MyDrive/DMS/data/EDA/yaw_pitch_interpolated_results"
# mar_ear_test_path = "/content/drive/MyDrive/DMS/data/EDA/mar_ear_results_interpolated_test"
# yaw_pitch_test_path = "/content/drive/MyDrive/DMS/data/EDA/yaw_pitch_interpolated_results_test"

# csv_files = ['normal_DMD.csv', 'normal_yawDD.csv', 'phonecall_DMD.csv', 'text_DMD.csv', 'yawn_yawDD.csv']

# # train data
# train_output_file = "/content/drive/MyDrive/DMS/data/EDA/train_features.csv"
# merge_csv_files(mar_ear_train_path, yaw_pitch_train_path, train_output_file)
# # test data
# test_output_file = "/content/drive/MyDrive/DMS/data/EDA/test_features.csv"
# merge_csv_files(mar_ear_test_path, yaw_pitch_test_path, test_output_file)

✅ 병합 완료: normal_DMD.csv
✅ 병합 완료: normal_yawDD.csv
✅ 병합 완료: phonecall_DMD.csv
✅ 병합 완료: text_DMD.csv
✅ 병합 완료: yawn_yawDD.csv
📌 최종 데이터 크기: (104940, 16)
✅ 저장 완료: /content/drive/MyDrive/DMS/data/EDA/train_features.csv
✅ 병합 완료: normal_DMD.csv
✅ 병합 완료: normal_yawDD.csv
✅ 병합 완료: phonecall_DMD.csv
✅ 병합 완료: text_DMD.csv
✅ 병합 완료: yawn_yawDD.csv
📌 최종 데이터 크기: (23568, 16)
✅ 저장 완료: /content/drive/MyDrive/DMS/data/EDA/test_features.csv


## phone_yn

In [None]:
test.shape

(23568, 16)

In [None]:
detection_train_path = "/content/drive/MyDrive/DMS/phone_detection_results/detection_finetuning_2_train"
detection_test_path = "/content/drive/MyDrive/DMS/phone_detection_results/detection_finetuning_2_test"

train_list = os.listdir(detection_train_path)
test_list = os.listdir(detection_test_path)

train_df = pd.DataFrame()
for file in train_list:
    file_path = os.path.join(detection_train_path, file)
    df = pd.read_excel(file_path)
    train_df = pd.concat([train_df, df], axis=0)

test_df = pd.DataFrame()
for file in test_list:
    file_path = os.path.join(detection_test_path, file)
    df = pd.read_excel(file_path)
    test_df = pd.concat([test_df, df], axis=0)

In [None]:
train_df.shape

(104940, 4)

In [None]:
test_df.shape

(23568, 4)

In [None]:
merged_df = pd.merge(test, test_df, on=['video_name', 'frame', 'label'], how='inner')
merged_df

Unnamed: 0,label,video_name,frame,MAR,avg_EAR,mouth_landmarks,eye_landmarks,dataset_x,label_dataset_x,pitch,yaw,bbox,landmarks,score,dataset_y,label_dataset_y,Phone_Detected
0,normal,gA-1(File1)_s1_face_2019-03-08-09_31_15_1_0.av...,frame000000.jpg,0.070551,0.135765,"[(0.4438210427761078, 0.5858988165855408), (0....","[(0.4684283137321472, 0.45571446418762207), (0...",DMD,normal_DMD,-0.552230,0.222770,[ 73.838455 67.83162 127.82963 157.31305 ],[[ 90.874275 99.22951 ]\n [114.23147 96.989...,0.993374,DMD,normal_DMD,1
1,normal,gA-1(File1)_s1_face_2019-03-08-09_31_15_1_0.av...,frame000001.jpg,0.059466,0.123495,"[(0.42648833990097046, 0.5904693603515625), (0...","[(0.4488443434238434, 0.4576459228992462), (0....",DMD,normal_DMD,-0.478634,0.214337,[ 71.57822 67.70796 128.09468 160.94812],[[ 88.3256 100.67707 ]\n [111.10382 97.990...,0.985109,DMD,normal_DMD,0
2,normal,gA-1(File1)_s1_face_2019-03-08-09_31_15_1_0.av...,frame000002.jpg,0.039986,0.136494,"[(0.42261967062950134, 0.5853195190429688), (0...","[(0.4430800676345825, 0.46035414934158325), (0...",DMD,normal_DMD,-0.456675,0.258366,[ 70.33193 70.14677 124.85025 158.7283 ],[[ 85.989395 101.48747 ]\n [109.06062 98.659...,0.976770,DMD,normal_DMD,0
3,normal,gA-1(File1)_s1_face_2019-03-08-09_31_15_1_0.av...,frame000003.jpg,0.032200,0.125553,"[(0.4287946820259094, 0.5709034204483032), (0....","[(0.4521143436431885, 0.45798560976982117), (0...",DMD,normal_DMD,-0.397540,0.204536,[ 72.99715 67.587524 127.4203 157.53531 ],[[ 89.35186 100.2081 ]\n [112.25297 97.982...,0.983055,DMD,normal_DMD,0
4,normal,gA-1(File1)_s1_face_2019-03-08-09_31_15_1_0.av...,frame000004.jpg,0.058267,0.128621,"[(0.43164488673210144, 0.6094807386398315), (0...","[(0.4580366611480713, 0.4731653332710266), (0....",DMD,normal_DMD,-0.272046,-0.064560,[ 73.80947 70.99065 125.698524 158.69888 ],[[ 88.44041 104.01303 ]\n [110.65662 101.411...,0.975263,DMD,normal_DMD,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23563,yawn,mirror9-MaleNoGlasses-Yawning-clip-3_part3,frame000031.jpg,0.031393,0.134569,"[(0.5683697462081909, 0.4406026303768158), (0....","[(0.6237543821334839, 0.20145469903945923), (0...",yawDD,yawn_yawDD,-0.631788,0.059873,[ 77.124504 1.7488117 162.4057 124.55851 ],[[113.36151 48.33448 ]\n [149.5169 45.120...,0.998918,yawDD,yawn_yawDD,0
23564,yawn,mirror9-MaleNoGlasses-Yawning-clip-3_part3,frame000032.jpg,0.030392,0.133561,"[(0.5623183846473694, 0.43609729409217834), (0...","[(0.6254308223724365, 0.20248718559741974), (0...",yawDD,yawn_yawDD,-0.629613,0.084375,[ 77.33943 3.187276 162.16576 124.21048 ],[[113.033615 47.847115]\n [149.78268 45.269...,0.998938,yawDD,yawn_yawDD,0
23565,yawn,mirror9-MaleNoGlasses-Yawning-clip-3_part3,frame000033.jpg,0.024457,0.128737,"[(0.5627889633178711, 0.4427017569541931), (0....","[(0.6296990513801575, 0.20662644505500793), (0...",yawDD,yawn_yawDD,-0.602725,0.048562,[ 77.43277 3.1372948 161.81201 124.62359 ],[[114.08974 48.92663 ]\n [150.32463 46.292...,0.999184,yawDD,yawn_yawDD,0
23566,yawn,mirror9-MaleNoGlasses-Yawning-clip-3_part3,frame000034.jpg,0.031771,0.129128,"[(0.5624699592590332, 0.4468122720718384), (0....","[(0.6305323243141174, 0.20895424485206604), (0...",yawDD,yawn_yawDD,-0.576298,0.050301,[ 78.08217 3.1839714 161.67291 126.23559 ],[[114.27586 48.915222]\n [150.2045 46.654...,0.999166,yawDD,yawn_yawDD,0


In [None]:
merged_df.to_csv("/content/drive/MyDrive/DMS/data/EDA/test_features.csv",index=False)

In [None]:
deleted_df = pd.read_excel()
deleted_df

In [None]:
required_cols = ['label', 'video_name', 'frame']
feature_cols = ["pitch", "yaw", "avg_EAR", "MAR", 'Phone_Detected']

train = pd.read_csv("/content/drive/MyDrive/DMS/data/EDA/train_features.csv")
test = pd.read_csv("/content/drive/MyDrive/DMS/data/EDA/test_features.csv")

# train = train[required_cols + feature_cols]
# test = test[required_cols + feature_cols]

In [None]:
# train.dropna(inplace=True)
# test.dropna(inplace=True)
print("학습 데이터: ", train.shape)
print("테스트 데이터: ", test.shape)

학습 데이터:  (104940, 17)
테스트 데이터:  (23568, 17)


In [None]:
train.columns

Index(['label', 'video_name', 'frame', 'MAR', 'avg_EAR', 'mouth_landmarks',
       'eye_landmarks', 'dataset_x', 'label_dataset_x', 'pitch', 'yaw', 'bbox',
       'landmarks', 'score', 'dataset_y', 'label_dataset_y', 'Phone_Detected'],
      dtype='object')

In [None]:
test.columns

Index(['label', 'video_name', 'frame', 'MAR', 'avg_EAR', 'mouth_landmarks',
       'eye_landmarks', 'dataset_x', 'label_dataset_x', 'pitch', 'yaw', 'bbox',
       'landmarks', 'score', 'dataset_y', 'label_dataset_y', 'Phone_Detected'],
      dtype='object')

In [None]:
need_column = ['label', 'video_name', 'frame', 'MAR', 'avg_EAR', 'mouth_landmarks',
       'eye_landmarks', 'pitch', 'yaw', 'bbox', 'landmarks','score', 'Phone_Detected', 'dataset_y', 'label_dataset_y']
train = train[need_column]
train.rename(columns={'avg_EAR': 'EAR', # 두 눈의 평균값 = EAR 이므로
                      'pitch': 'gaze_pitch',  # 시선 각도를 명확하게
                      'yaw': 'gaze_yaw',
                      'bbox': 'face_bbox',  # '얼굴 바운딩 박스
                      'landmarks': 'face_landmarks',
                      'score': 'detection_score',  # 객체 검출 정확도를 나타내는 것이므로
                      'Phone_Detected': 'phone_detected',  # 일관성을 위해 소문자로
                      'dataset_y': 'dataset',
                      'label_dataset_y': 'label_dataset',
                      }, inplace=True)

In [None]:
test = test[need_column]
test.rename(columns={'avg_EAR': 'EAR', # 두 눈의 평균값 = EAR 이므로
                      'pitch': 'gaze_pitch',  # 시선 각도를 명확하게
                      'yaw': 'gaze_yaw',
                      'bbox': 'face_bbox',  # '얼굴 바운딩 박스
                      'landmarks': 'face_landmarks',
                      'score': 'detection_score',  # 객체 검출 정확도를 나타내는 것이므로
                      'Phone_Detected': 'phone_detected',  # 일관성을 위해 소문자로
                      'dataset_y': 'dataset',
                      'label_dataset_y': 'label_dataset',
                      }, inplace=True)

In [None]:
test.columns

Index(['label', 'video_name', 'frame', 'MAR', 'EAR', 'mouth_landmarks',
       'eye_landmarks', 'gaze_pitch', 'gaze_yaw', 'face_bbox',
       'face_landmarks', 'detection_score', 'phone_detected', 'dataset',
       'label_dataset'],
      dtype='object')

In [None]:
train.describe()

Unnamed: 0,MAR,EAR,gaze_pitch,gaze_yaw,detection_score,phone_detected
count,104652.0,104652.0,104940.0,104940.0,104833.0,104940.0
mean,0.102117,0.143321,-0.094581,0.073541,0.97862,0.308824
std,0.094685,0.020693,0.385575,0.228067,0.030572,0.462011
min,0.005048,0.029377,-2.675441,-1.369263,0.501035,0.0
25%,0.040269,0.131703,-0.268634,0.03627,0.976255,0.0
50%,0.066355,0.143965,-0.095511,0.100966,0.986785,0.0
75%,0.123901,0.15584,0.072385,0.177552,0.992385,1.0
max,0.700324,0.40618,2.666869,0.741876,0.999704,1.0


In [None]:
train['dataset'].value_counts()

Unnamed: 0_level_0,count
dataset,Unnamed: 1_level_1
DMD,97548
yawDD,7392


In [None]:
train['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
normal,65196
phonecall,25296
text,10044
yawn,4404


In [None]:
deleted_df = pd.read_excel("/content/drive/MyDrive/DMS/deleted_list.xlsx")
deleted_df.head()

Unnamed: 0,video_path,이유,지금 label,바뀌어야할 label
0,/normal_DMD/gZ-37(File31)_s2_face_2019-04-08-1...,text 가 포함되어있음,normal,text
1,/text_DMD/gZ-33(File27)_s4_face_2019-04-03-09_...,text 행동이 포함되어있지 않음,text,normal
2,/text_DMD/gZ-33(File27)_s2_face_2019-04-08-16_...,text 행동이 포함되어있지 않음,text,normal
3,/text_DMD/gZ-33(File27)_s2_face_2019-04-08-16_...,text 행동이 포함되어있지 않음,text,normal
4,/text_DMD/gZ-33(File27)_s4_face_2019-04-03-09_...,text 행동이 포함되어있지 않음,text,normal


In [None]:
change_data = deleted_df[deleted_df['바뀌어야할 label'].notnull()]

normal_list = []
for video in change_data['video_path']:
    normal_list.append(video.split('/')[-1])

drop_video = normal_list[0]
normal_list = normal_list[1:]

In [None]:
droped_train = train.copy()
tonormal_idx = droped_train[(droped_train["video_name"].isin(normal_list)) & (droped_train['label']=='text')].index
droped_train.loc[tonormal_idx, 'label'] = 'normal'
len(tonormal_idx)

60

In [None]:
drop_idx = droped_train[(droped_train["video_name"] == drop_video)].index
droped_train = droped_train.drop(drop_idx)

In [None]:
len(drop_idx)

12

In [None]:
64932-64980

-48

In [None]:
droped_train['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
normal,65244
phonecall,25296
text,9984
yawn,4404


In [None]:
train.shape

(104940, 15)

In [None]:
droped_train.shape

(104928, 15)

In [None]:
train.isna().sum()

Unnamed: 0,0
label,0
video_name,0
frame,0
MAR,288
EAR,288
mouth_landmarks,0
eye_landmarks,3687
gaze_pitch,0
gaze_yaw,0
face_bbox,107


In [None]:
train = train.reset_index(drop=True)
train.to_csv("/content/drive/MyDrive/DMS/data/EDA/train_processed.csv", index=False)

In [None]:
test.to_csv("/content/drive/MyDrive/DMS/data/EDA/test_processed.csv", index=False)

In [None]:
train

Unnamed: 0,label,video_name,frame,pitch,yaw,avg_EAR,MAR
0,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000000.jpg,-0.802380,-0.178544,0.188577,0.152621
1,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000001.jpg,-0.258022,-0.444779,0.115461,0.098821
2,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000002.jpg,0.173530,-0.746606,0.101308,0.080448
3,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000003.jpg,-0.022168,-0.725515,0.101270,0.099019
4,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000004.jpg,-0.187156,-0.672688,0.110530,0.118148
...,...,...,...,...,...,...,...
104647,yawn,mirror47-MaleNoGlasses-Yawning-clip-1_part2,frame000019.jpg,-0.576143,0.053985,0.105928,0.092374
104648,yawn,mirror47-MaleNoGlasses-Yawning-clip-1_part2,frame000020.jpg,-0.582594,0.045665,0.096795,0.101725
104649,yawn,mirror47-MaleNoGlasses-Yawning-clip-1_part2,frame000021.jpg,-0.575541,-0.049977,0.102856,0.186143
104650,yawn,mirror47-MaleNoGlasses-Yawning-clip-1_part2,frame000022.jpg,-0.617350,-0.017204,0.136577,0.261266


In [None]:
droped_train.shape

(104928, 15)

In [None]:
test.shape

(23568, 15)

## video_clip 변수 추가

In [None]:
train = pd.read_csv("/content/drive/MyDrive/DMS/data/EDA/train_processed.csv")
test = pd.read_csv("/content/drive/MyDrive/DMS/data/EDA/test_processed.csv")

In [None]:
train.shape

(104940, 17)

In [None]:
# video name -> video clip 이름 추출
def create_clip_name(df):
        # segment_id (part 번호) 추출
    df['segment_id'] = df['video_name'].apply(lambda x: x.split('_')[-1].replace("part", ""))

    # video_clip 추출 (뒤에서 1번만 "_" 기준으로 분할)
    df['video_clip'] = df['video_name'].str.rsplit('_', n=1).str[0]

    return df

In [None]:
train = create_clip_name(train)
test = create_clip_name(test)

In [None]:
train.iloc[0]

Unnamed: 0,0
label,normal
video_name,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...
frame,frame000000.jpg
MAR,0.152621
EAR,0.188577
mouth_landmarks,"[(0.5373177528381348, 0.6072497367858887), (0...."
eye_landmarks,"[(0.563341498374939, 0.4690839350223541), (0.5..."
gaze_pitch,-0.80238
gaze_yaw,-0.178544
face_bbox,[ 86.03364 65.879135 143.4479 162.76117 ]


In [None]:
train.to_csv("/content/drive/MyDrive/DMS/data/EDA/train_processed.csv", index=False)
test.to_csv("/content/drive/MyDrive/DMS/data/EDA/test_processed.csv", index=False)

In [None]:
train.rename(columns={'part': 'segment_id'}, inplace=True)
test.rename(columns={'part': 'segment_id'}, inplace=True)

In [None]:
train.columns

Index(['label', 'video_name', 'frame', 'MAR', 'EAR', 'mouth_landmarks',
       'eye_landmarks', 'gaze_pitch', 'gaze_yaw', 'face_bbox',
       'face_landmarks', 'detection_score', 'phone_detected', 'dataset',
       'label_dataset', 'segment_id', 'video_clip'],
      dtype='object')

In [None]:
test.shape

(23568, 17)

In [None]:
train.head()

Unnamed: 0,label,video_name,frame,MAR,EAR,mouth_landmarks,eye_landmarks,gaze_pitch,gaze_yaw,face_bbox,face_landmarks,detection_score,phone_detected,dataset,label_dataset,segment_id,video_clip
0,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000000.jpg,0.152621,0.188577,"[(0.5373177528381348, 0.6072497367858887), (0....","[(0.563341498374939, 0.4690839350223541), (0.5...",-0.80238,-0.178544,[ 86.03364 65.879135 143.4479 162.76117 ],[[115.35201 99.411316]\n [129.58809 96.632...,0.991986,0,DMD,normal_DMD,1,gB-10(File10)s1face2019-03-11-152454100.avi
1,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000001.jpg,0.098821,0.115461,"[(0.4927932024002075, 0.6534214019775391), (0....","[(0.5468105673789978, 0.5043249130249023), (0....",-0.258022,-0.444779,[ 85.94774 69.89338 141.70164 173.82745],[[108.4074 108.82792]\n [128.76982 108.30778]...,0.982868,0,DMD,normal_DMD,1,gB-10(File10)s1face2019-03-11-152454100.avi
2,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000002.jpg,0.080448,0.101308,"[(0.4749829173088074, 0.683114230632782), (0.4...","[(0.5377631783485413, 0.5218435525894165), (0....",0.17353,-0.746606,[ 83.76038 74.02278 142.2394 178.90962],[[104.08678 114.05342 ]\n [128.643 115.441...,0.980093,0,DMD,normal_DMD,1,gB-10(File10)s1face2019-03-11-152454100.avi
3,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000003.jpg,0.099019,0.10127,"[(0.4676503837108612, 0.6920359134674072), (0....","[(0.5339944362640381, 0.529312014579773), (0.5...",-0.022168,-0.725515,[ 84.322174 75.72479 141.48697 181.58836 ],[[102.33694 116.483734]\n [126.94715 117.517...,0.973711,0,DMD,normal_DMD,1,gB-10(File10)s1face2019-03-11-152454100.avi
4,normal,gB-10(File10)_s1_face_2019-03-11-15_24_54_10_0...,frame000004.jpg,0.118148,0.11053,"[(0.46873900294303894, 0.6865314245223999), (0...","[(0.5324735641479492, 0.5256364345550537), (0....",-0.187156,-0.672688,[ 83.90695 74.74411 141.61597 182.33232],[[103.035934 115.61689 ]\n [126.724915 116.420...,0.970456,0,DMD,normal_DMD,1,gB-10(File10)s1face2019-03-11-152454100.avi


In [None]:
train.iloc[101500]

Unnamed: 0,101500
label,yawn
video_name,dash7-FemaleNoGlasses-clip-0_part1
frame,frame000004.jpg
MAR,0.041139
EAR,0.129799
mouth_landmarks,"[(0.4847598969936371, 0.4435986876487732), (0...."
eye_landmarks,"[(0.5503451824188232, 0.3068740963935852), (0...."
gaze_pitch,0.028741
gaze_yaw,0.007136
face_bbox,[ 89.435844 40.679012 142.43706 118.89299 ]
