In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import shutil
import cv2 as cv
import numpy as np
import pandas as pd
import random
from tqdm.notebook import tqdm

# ----------------------------------------
# 1) 경로 설정 (Colab + Drive 환경)
# ----------------------------------------
DRIVE_ROOT = '/content/drive/MyDrive/Capstone/GrayScale'
BASE_DIR = DRIVE_ROOT  # Capstone 폴더 자체를 가리킵니다

# 로컬 임시 작업 디렉토리 (Colab VM 안)
# Colab 환경에서 /content/tmp_sample 에 쓰기가 가능해야 합니다.
LOCAL_TMP_ROOT = '/content/tmp_sample'
os.makedirs(LOCAL_TMP_ROOT, exist_ok=True)

# 결과 CSV 및 완료 체크 파일 경로 (Drive에 저장)
OUTPUT_CSV = os.path.join(DRIVE_ROOT, 'features_tcn.csv')
COMPLETED_TXT = os.path.join(DRIVE_ROOT, 'processed_samples.txt')



In [None]:
# ----------------------------------------
# 2) Optical Flow 계산을 위한 함수 정의
# ----------------------------------------
GRID_SPACING = 20
lk_params = dict(
    winSize  = (15, 15),
    maxLevel = 2,
    criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03)
)

def generate_grid_points(w, h, spacing):
    offset = spacing // 2
    pts = []
    for y in range(offset, h, spacing):
        for x in range(offset, w, spacing):
            pts.append([[x, y]])
    return np.array(pts, dtype=np.float32)

def compute_dxdy_sequence(gray_frames):
    """
    gray_frames: shape=(num_frames, H, W), dtype=uint8
    returns: dx_frames, dy_frames  each shape=(num_frames-1, num_pts)
    """
    num_frames, H, W = gray_frames.shape
    p0 = generate_grid_points(W, H, GRID_SPACING)
    num_pts = p0.shape[0]
    dx_frames = np.zeros((num_frames - 1, num_pts), dtype=np.float32)
    dy_frames = np.zeros((num_frames - 1, num_pts), dtype=np.float32)
    old_gray = gray_frames[0]
    for i in range(1, num_frames):
        curr_gray = gray_frames[i]
        p1, st, _ = cv.calcOpticalFlowPyrLK(old_gray, curr_gray, p0, None, **lk_params)
        if p1 is not None:
            st_flat = st.flatten()
            p0_flat = p0.reshape(-1, 2)
            p1_flat = p1.reshape(-1, 2)
            valid = (st_flat == 1)
            mv = p1_flat[valid] - p0_flat[valid]
            dx_frames[i - 1, valid] = mv[:, 0]
            dy_frames[i - 1, valid] = mv[:, 1]
        old_gray = curr_gray.copy()
        p0 = generate_grid_points(W, H, GRID_SPACING)
    return dx_frames, dy_frames

def find_peak_window(dx_frames, dy_frames, window_size=44):
    """
    dx_frames, dy_frames: shape=(num_steps, num_pts)
    window_size: 44 (45프레임간 OF 스텝 수)
    returns: peak 시작 인덱스 (0-based OF 스텝)
    """
    num_steps, _ = dx_frames.shape
    speeds = np.sqrt(dx_frames**2 + dy_frames**2)  # per-step per-pt speed
    step_sums = speeds.sum(axis=1)                # per-step 합
    max_score = -1.0
    peak_idx = 0
    for i in range(num_steps - window_size + 1):
        score = float(step_sums[i : i + window_size].sum())
        if score > max_score:
            max_score = score
            peak_idx = i
    return peak_idx

def select_random_windows(exclude_idx, num_steps, window_size=44, count=2):
    """
    exclude_idx: 메인 윈도우 start (0-based OF 스텝)
    num_steps: 전체 OF 스텝 수
    returns: 겹치지 않는 랜덤 윈도우 시작 인덱스 리스트 (length <= count)
    """
    all_indices = set(range(0, num_steps - window_size + 1))
    excluded = set(range(exclude_idx - window_size + 1, exclude_idx + window_size))
    excluded = {i for i in excluded if 0 <= i <= num_steps - window_size}
    valid_indices = list(all_indices - excluded)
    if len(valid_indices) < count:
        return []
    return random.sample(valid_indices, count)

MIN_SPEED = 5    # pixel/frame 하한
MAX_SPEED = 300 # pixel/frame 상한

def compute_of_sequence(gray_frames_45):
    """
    gray_frames_45: NumPy array, shape = (45, H, W), dtype=uint8
      - 1.5초 구간에 해당하는 45프레임
    returns:
      seq_arr: NumPy array, shape = (44, 7), dtype=float32
        per-step 7개 통계: [cnt, mean_sp, std_sp, ang_mean, ang_std, sum_vert, sum_horiz]
    """
    H, W = gray_frames_45.shape[1], gray_frames_45.shape[2]
    old_gray = gray_frames_45[0]
    p0 = generate_grid_points(W, H, GRID_SPACING)

    stats = []
    for i in range(1, 45):
        frame_gray = gray_frames_45[i]
        p1, st, _ = cv.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)

        if p1 is None:
            # Optical flow 계산 실패 시 모두 0
            stats.append([0.0]*7)
        else:
            st_flat = st.flatten()
            p0_flat = p0.reshape(-1, 2)   # shape = (num_pts, 2)
            p1_flat = p1.reshape(-1, 2)   # shape = (num_pts, 2)

            valid = (st_flat == 1)        # shape = (num_pts,)
            mv_all = p1_flat[valid] - p0_flat[valid]
            # mv_all.shape == (N_valid, 2)  → N_valid이 예: 81

            if mv_all.size == 0:
                stats.append([0.0]*7)
            else:
                # 1) 원시 Optical Flow 벡터에 대해 속력과 각도를 계산
                dx = mv_all[:, 0]                 # shape = (N_valid,)
                dy = mv_all[:, 1]                 # shape = (N_valid,)
                sp = np.linalg.norm(mv_all, axis=1)  # shape = (N_valid,)
                ang = np.degrees(np.arctan2(dy, dx)) # shape = (N_valid,)

                # 2) 속력에 대해 상한/하한 필터링
                mask = (sp > MIN_SPEED) & (sp < MAX_SPEED)
                # mask.shape == (N_valid,)

                mv = mv_all[mask]  # shape = (N_filtered, 2)
                # 여기서는 반드시 mv_all과 동일한 차원(첫 차원)에서 마스크를 씌웠으므로 크기가 맞음

                if mv.size == 0:
                    stats.append([0.0]*7)
                else:
                    dx_f = mv[:, 0]                       # shape = (N_filtered,)
                    dy_f = mv[:, 1]                       # shape = (N_filtered,)
                    sp_f = np.linalg.norm(mv, axis=1)     # shape = (N_filtered,)
                    ang_f = np.degrees(np.arctan2(dy_f, dx_f))  # shape = (N_filtered,)

                    cnt = float(mv.shape[0])              # 필터링 후 벡터 개수

                    # 속력 평균/표준편차
                    mean_sp = float(np.mean(sp_f))
                    std_sp  = float(np.std(sp_f))

                    # 각도 평균 → wrap-around 보정 없이 단순 평균
                    ang_mean = float(np.mean(ang_f))
                    # 각도 표준편차 → wrap-around 거리 계산
                    diff = np.abs(ang_f - ang_mean)
                    diff = np.where(diff > 180, 360 - diff, diff)
                    ang_std = float(np.sqrt(np.mean(diff**2)))

                    # 수직/수평 성분 합
                    sum_vert  = float(np.sum(dy_f))
                    sum_horiz = float(np.sum(dx_f))

                    stats.append([
                        cnt,
                        mean_sp, std_sp,
                        ang_mean, ang_std,
                        sum_vert, sum_horiz
                    ])

        # 다음 스텝 준비
        old_gray = frame_gray.copy()
        p0 = generate_grid_points(W, H, GRID_SPACING)

    # stats: 리스트 길이 44, 각 원소는 길이 7 리스트
    return np.array(stats, dtype=np.float32)  # shape = (44, 7)


In [None]:
# ----------------------------------------
# 3) 완료 체크 파일 불러오기
# ----------------------------------------
processed = set()
if os.path.exists(COMPLETED_TXT):
    with open(COMPLETED_TXT, 'r') as f:
        for line in f:
            processed.add(line.strip())

# CSV 헤더 생성 (없으면)
if not os.path.exists(OUTPUT_CSV):
    columns = ["파일명", "시작프레임", "T/F"] + [f"feat_{i}" for i in range(44 * 7)]
    pd.DataFrame(columns=columns).to_csv(OUTPUT_CSV, index=False, encoding="utf-8-sig")



In [None]:
# ----------------------------------------
# 4) 처리 함수: 한 샘플 폴더만 로컬 tmp로 복사 → 처리 → 삭제
# ----------------------------------------
def process_sample(sample_path, sample_name, label):
    """
    sample_path: Drive상의 '…/Y/샘플명' 또는 '…/N/샘플명'
    sample_name: 샘플 폴더 이름
    label: 1 (Y) 또는 0 (N)
    """
    # 1) 로컬 tmp 폴더로 샘플 폴더 복사
    local_path = os.path.join(LOCAL_TMP_ROOT, sample_name)
    if os.path.exists(local_path):
        shutil.rmtree(local_path)
    shutil.copytree(sample_path, local_path)

    # 2) local_path 내 PNG 파일 목록
    frame_files = sorted([f for f in os.listdir(local_path) if f.lower().endswith(".png")])
    if label == 1:
        # Y 샘플: 프레임 45개가 있는지 확인
        if len(frame_files) < 45:
            shutil.rmtree(local_path)
            return
        gray_list = [cv.imread(os.path.join(local_path, f), cv.IMREAD_GRAYSCALE) for f in frame_files[:45]]
        gray_arr = np.stack(gray_list, axis=0)  # shape=(45, H, W)
        seq_44_7 = compute_of_sequence(gray_arr)
        flat_308 = seq_44_7.flatten().tolist()  # length = 44*7 = 308
        row = [sample_name, 1, 1] + flat_308
        pd.DataFrame([row], columns=pd.read_csv(OUTPUT_CSV, nrows=0).columns).to_csv(
            OUTPUT_CSV, mode='a', index=False, header=False, encoding="utf-8-sig"
        )
    else:
        # N 샘플: 최소 300 프레임 필요
        if len(frame_files) < 300:
            shutil.rmtree(local_path)
            return
        gray_list_300 = [cv.imread(os.path.join(local_path, f), cv.IMREAD_GRAYSCALE) for f in frame_files[:300]]
        gray_arr300 = np.stack(gray_list_300, axis=0)  # shape=(300, H, W)
        dx_frames, dy_frames = compute_dxdy_sequence(gray_arr300)
        peak_idx = find_peak_window(dx_frames, dy_frames, window_size=44)
        random_idxs = select_random_windows(
            exclude_idx=peak_idx,
            num_steps=dx_frames.shape[0],
            window_size=44,
            count=2
        )
        # peak 윈도우 + 랜덤 윈도우 2개
        for start_f in [peak_idx] + random_idxs:
            segment = gray_arr300[start_f : start_f + 45]  # shape=(45, H, W)
            seq_44_7 = compute_of_sequence(segment)
            flat_308 = seq_44_7.flatten().tolist()
            row = [sample_name, start_f + 1, 0] + flat_308
            pd.DataFrame([row], columns=pd.read_csv(OUTPUT_CSV, nrows=0).columns).to_csv(
                OUTPUT_CSV, mode='a', index=False, header=False, encoding="utf-8-sig"
            )
    # 3) 로컬 tmp 폴더 삭제
    shutil.rmtree(local_path)



In [None]:
# ----------------------------------------
# 5) 메인: tqdm으로 Y, N 순회
# ----------------------------------------
# Y 폴더 (label = 1)
dir_Y = os.path.join(BASE_DIR, "Y")
if os.path.isdir(dir_Y):
    y_samples = sorted(os.listdir(dir_Y))
    for sample_name in tqdm(y_samples, desc="Processing Y samples"):
        if sample_name in processed:
            continue
        sample_path = os.path.join(dir_Y, sample_name)
        if not os.path.isdir(sample_path):
            continue
        try:
            process_sample(sample_path, sample_name, label=1)
            with open(COMPLETED_TXT, 'a') as f:
                f.write(sample_name + '\n')
        except Exception as e:
            print(f"[오류] Y/{sample_name} 처리 중 에러: {e}")
            tmp_dir = os.path.join(LOCAL_TMP_ROOT, sample_name)
            if os.path.exists(tmp_dir):
                shutil.rmtree(tmp_dir)

# N 폴더 (label = 0)
dir_N = os.path.join(BASE_DIR, "N")
if os.path.isdir(dir_N):
    n_samples = sorted(os.listdir(dir_N))
    for sample_name in tqdm(n_samples, desc="Processing N samples"):
        if sample_name in processed:
            continue
        sample_path = os.path.join(dir_N, sample_name)
        if not os.path.isdir(sample_path):
            continue
        try:
            process_sample(sample_path, sample_name, label=0)
            with open(COMPLETED_TXT, 'a') as f:
                f.write(sample_name + '\n')
        except Exception as e:
            print(f"[오류] N/{sample_name} 처리 중 에러: {e}")
            tmp_dir = os.path.join(LOCAL_TMP_ROOT, sample_name)
            if os.path.exists(tmp_dir):
                shutil.rmtree(tmp_dir)

print("모든 샘플 처리가 완료되었습니다.")


Processing Y samples:   0%|          | 0/15288 [00:00<?, ?it/s]

[오류] Y/00487_H_D_FY_C5 처리 중 에러: all input arrays must have the same shape
[오류] Y/01684_Y_E_FY_C3 처리 중 에러: all input arrays must have the same shape
[오류] Y/02279_H_A_FY_C1 처리 중 에러: all input arrays must have the same shape
[오류] Y/02283_H_A_FY_C4 처리 중 에러: all input arrays must have the same shape
[오류] Y/02699_H_A_BY_C6 처리 중 에러: all input arrays must have the same shape


Processing N samples:   0%|          | 0/5112 [00:00<?, ?it/s]

[오류] N/00117_H_A_N_C8 처리 중 에러: all input arrays must have the same shape
[오류] N/00751_O_E_N_C4 처리 중 에러: all input arrays must have the same shape
[오류] N/00811_O_E_N_C1 처리 중 에러: all input arrays must have the same shape
모든 샘플 처리가 완료되었습니다.
