In [1]:
import cv2
import pandas as pd
import numpy as np
import os
from glob import glob
import matplotlib.pyplot as plt
from tqdm import tqdm
from models import LSTM
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

In [2]:
dataset = 'pointsCSV_lite'
fight_train_video_names = os.listdir(f'./{dataset}/train/fight/')
fight_val_video_names = os.listdir(f'./{dataset}/val/fight/')
normal_train_video_names = os.listdir(f'./{dataset}/train/normal/')
normal_val_video_names = os.listdir(f'./{dataset}/val/normal/')

fight_train_csv_paths = list(map(lambda x:f'./{dataset}/train/fight/'+x, fight_train_video_names))
fight_val_csv_paths = list(map(lambda x:f'./{dataset}/val/fight/'+x, fight_val_video_names))
normal_train_csv_paths = list(map(lambda x:f'./{dataset}/train/normal/'+x, normal_train_video_names))
normal_val_csv_paths = list(map(lambda x:f'./{dataset}/val/normal/'+x, normal_val_video_names))

In [3]:
# CSV 경로
# csv_path = "./pose_results_3fps.csv"  # ← 파일 경로 입력
csv_path = './pointsCSV_lite/train/fight/'

# CSV 로드
# df = pd.read_csv(csv_path)
df = pd.read_csv(csv_path+os.listdir(csv_path)[0])

# frame 열 제거
data_only = df.drop(columns=['frame'])
if 'video' in data_only.columns:
    data_only = data_only.drop(columns=['video'])

# numpy 배열로 변환
data_array = data_only.values
print(data_array.shape)

# reshape to (num_frames, 33, 3)
all_data = data_array.reshape(-1, 33, 3)  # 33 keypoints, 3 values (x, y, vis)

# numpy로 변환
np_data = np.array(all_data, dtype=np.float32)

# 사용할 keypoint 인덱스: [0, 11~32]
selected_indices = list(range(11, 33))

# x 값만 추출 → shape: (num_frames, 23)
x_only_selected = np_data[:, selected_indices, 0:2]

print(data_array.shape)
print(x_only_selected.shape)  # (num_frames, 23)

(181, 99)
(181, 99)
(181, 22, 2)


In [4]:
def getKeypoints(csv_path):
    df = pd.read_csv(csv_path)

    # frame 열 제거
    data_only = df.drop(columns=['frame'])
    if 'video' in data_only.columns:
        data_only = data_only.drop(columns=['video'])

    # numpy 배열로 변환
    data_array = data_only.values

    # reshape to (num_frames, 33, 3)
    all_data = data_array.reshape(-1, 33, 3)  # 33 keypoints, 3 values (x, y, vis)

    # numpy로 변환
    np_data = np.array(all_data, dtype=np.float32)

    # 사용할 keypoint 인덱스: [0, 11~32]
    selected_indices = list(range(11, 33))

    # x 값만 추출 → shape: (num_frames, 23)
    x_only_selected = np_data[:, selected_indices, 0:2]

    return x_only_selected

In [5]:
getKeypoints('./pose_results_3fps.csv').shape

(180, 22, 2)

In [6]:
def csvs_to_flattened_numpy(csv_dir):
    """
    각 CSV 파일을 (180, 23) → (4140,)으로 평탄화하고 전체를 (B, 4140) 형태로 반환

    Parameters:
        csv_dir (str): CSV 파일들이 있는 디렉토리 경로
        
    Returns:
        data_np (np.ndarray): shape = (B, 4140), x좌표만 포함된 평탄화된 벡터
        meta_list (list): 각 행에 해당하는 (video_name) 정보
    """
    keypoint_indices = list(range(11, 33))
    x_columns = [f'x_{i}' for i in keypoint_indices]
    y_columns = [f'y_{i}' for i in keypoint_indices]

    csv_files = sorted(glob(os.path.join(csv_dir, "*.csv")))
    data_list = []
    meta_list = []

    for csv_file in csv_files:
        df = pd.read_csv(csv_file)

        # 원하는 x값만 추출: shape (180, 23)
        x_data = df[x_columns+y_columns].values.astype(np.float32)[:180]

        # 평탄화: (180, 23) → (4140,)
        flat_vector = x_data.flatten()  # row-major order by default

        data_list.append(flat_vector)
        meta_list.append(os.path.basename(csv_file))  # 또는 video 이름

    data_np = np.stack(data_list)  # shape: (B, 4140)

    return data_np, meta_list

In [7]:
csvs_to_flattened_numpy('./pointsCSV_lite/train/fight/')[0].shape

(60, 7920)

In [8]:
train_fight = csvs_to_flattened_numpy('./pointsCSV/train/fight/')[0]
val_fight = csvs_to_flattened_numpy('./pointsCSV/val/fight/')[0]
train_normal = csvs_to_flattened_numpy('./pointsCSV/train/normal/')[0]
# val_normal = csvs_to_flattened_numpy('./pointsCSV/val/normal/')[0]

train_x = np.concatenate([train_fight[:630], train_normal[:700]])
train_y = [1 for i in range(630)]+[0 for i in range(700)]
# val_x = np.concatenate([val_fight, val_normal])
# val_y = [1 for i in val_fight]+[0 for i in val_normal]

In [9]:
train_x = np.concatenate([train_x, val_fight[:70]])
train_y = train_y+[1 for i in range(70)]

In [10]:
len(train_fight), 

(630,)

In [11]:
train_x.shape, len(train_y), 

((1400, 7920), 1400)

In [12]:
train_x, train_y = shuffle(train_x, train_y, random_state=100)
# val_x, val_y = shuffle(val_x, val_y, random_state=100)

In [13]:
train_x, test_x, train_y, test_y = train_test_split(
    train_x,
    train_y,
    test_size=0.3,
    shuffle=True,
    stratify=train_y
)

In [14]:
len(train_x), len(test_x)

(980, 420)

In [15]:
train_x[0]

array([0.        , 0.        , 0.        , ..., 0.56717116, 0.55217636,
       0.5731965 ], dtype=float32)

In [16]:
val_x, test_x, val_y, test_y = train_test_split(
    test_x,
    test_y,
    test_size=1/3,
    shuffle=True
)

In [17]:
len(val_x), len(test_x)

(280, 140)

In [18]:
svm = SVC(
    C=0.1, 
    probability=True
    )
rf = RandomForestClassifier(
    max_depth=5,
    min_samples_split=10,
    min_samples_leaf=10
    )

In [19]:
svm.fit(train_x, train_y)

In [20]:
svm_result = svm.predict(val_x)

In [21]:
def evaluation(y_true, y_pred):
    acc  = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec  = recall_score(y_true, y_pred)
    f1   = f1_score(y_true, y_pred)

    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-score : {f1:.4f}")

In [22]:
evaluation(val_y, svm_result)

Accuracy : 0.9464
Precision: 0.9262
Recall   : 0.9718
F1-score : 0.9485


In [23]:
rf.fit(train_x, train_y)

In [24]:
result_rf = rf.predict(val_x)

In [25]:
evaluation(val_y, result_rf)

Accuracy : 0.9714
Precision: 0.9589
Recall   : 0.9859
F1-score : 0.9722


In [26]:
svm_result = svm.predict(train_x)
result_rf = rf.predict(train_x)

evaluation(train_y, svm_result)
evaluation(train_y, result_rf)

Accuracy : 0.9459
Precision: 0.9146
Recall   : 0.9837
F1-score : 0.9479
Accuracy : 0.9888
Precision: 0.9819
Recall   : 0.9959
F1-score : 0.9889


In [27]:
svm_result = svm.predict(val_x)
result_rf = rf.predict(val_x)

evaluation(val_y, svm_result)
evaluation(val_y, result_rf)

Accuracy : 0.9464
Precision: 0.9262
Recall   : 0.9718
F1-score : 0.9485
Accuracy : 0.9714
Precision: 0.9589
Recall   : 0.9859
F1-score : 0.9722


In [28]:
svm_result = svm.predict(test_x)
result_rf = rf.predict(test_x)

evaluation(test_y, svm_result)
evaluation(test_y, result_rf)

Accuracy : 0.9357
Precision: 0.8933
Recall   : 0.9853
F1-score : 0.9371
Accuracy : 0.9714
Precision: 0.9571
Recall   : 0.9853
F1-score : 0.9710


In [47]:
sample_csv = getKeypoints('./pose_results_3fps.csv')
sample_np = sample_csv.reshape([1, -1])

In [48]:
# sample_result_svm = svm.predict(sample_np)
# sample_result_rf = rf.predict(sample_np)

In [49]:
sample_prob_svm = svm.predict_proba(sample_np)
sample_prob_rf = rf.predict_proba(sample_np)

In [50]:
# sample_result_svm

In [51]:
# sample_result_rf

In [52]:
sample_prob_svm

array([[0.46304114, 0.53695886]])

In [53]:
sample_prob_rf

array([[0.25406716, 0.74593284]])

In [54]:
sample_csv = getKeypoints('./pose_results_3fps2.csv')
sample_np = sample_csv[:180, :].reshape([1, -1])

In [55]:
# sample_result_svm = svm.predict(sample_np)
# sample_result_rf = rf.predict(sample_np)

# sample_result_svm

In [56]:
# sample_result_rf

In [57]:
sample_prob_svm = svm.predict_proba(sample_np)
sample_prob_rf = rf.predict_proba(sample_np)

In [58]:
sample_prob_svm

array([[0.99824841, 0.00175159]])

In [59]:
sample_prob_rf

array([[0.99017467, 0.00982533]])