In [None]:
!pip install insightface

Collecting insightface
  Downloading insightface-0.7.3.tar.gz (439 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m439.5/439.5 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting onnx (from insightface)
  Downloading onnx-1.20.1-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading onnx-1.20.1-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (17.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.5/17.5 MB[0m [31m131.5 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: insightface
  Building wheel for insightface (pyproject.toml) ... [?25l[?25hdone
  Created wheel for insightface: filename=insightface-0.7.3-cp312-cp312-linux_x86_64.whl size=1071491 sha256=f115af346638df3945f0dc51253cb592bf2f0f328b

In [None]:
!pip install onnxruntime

Collecting onnxruntime
  Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.23.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (17.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m125.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pac

In [None]:
import os
import cv2
import numpy as np
import torch
import pandas as pd
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from insightface.app import FaceAnalysis
import zipfile
import gc
from google.colab import drive
import random
from pathlib import Path

In [None]:
drive.mount('/content/drive')

SAMPLE_CSV_PATH = "/content/drive/MyDrive/HECTO/Dataset/sample_submission.csv"
ZIP_SAVE_PATH = "/content/drive/MyDrive/HECTO/Dataset/test_pp.zip"
CKPT_PATH = "/content/drive/MyDrive/HECTO/checkpoints/01_backbone_selection/convnext_tiny_best.pth.tar"
CSV_SAVE_PATH = "/content/drive/MyDrive/HECTO/Dataset/submission.csv"

Mounted at /content/drive


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# 1️⃣ 파이썬에서 디렉토리 만들기 (권장)
os.makedirs("/content/TEST_DIR", exist_ok=True)

# 2️⃣ 셸 명령으로 압축 풀기
!unzip -q "/content/drive/MyDrive/HECTO/Dataset/test_pp.zip" -d /content/TEST_DIR


In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [None]:
IMG_EXTS = ['jpg', 'jpeg', 'png', 'jfif']
VID_EXTS = ['mp4', 'mov', 'avi']

In [None]:
def predict_single_image(model, image, tta=False):
    """
    단일 이미지 예측
    - tta=True이면 사진에서만 TTA 적용
    """
    if tta:
        # 사진용 TTA
        tta_transform = A.Compose([
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.ShiftScaleRotate(
                shift_limit=0.02,
                scale_limit=0.02,
                rotate_limit=0,
                border_mode=cv2.BORDER_CONSTANT,
                p=0.3
            ),
            A.CenterCrop(height=224, width=224, p=1.0),
            A.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
        base_transform = A.Compose([
            A.CenterCrop(height=224, width=224, p=1.0),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])

        imgs = [base_transform(image=image)['image']]
        for _ in range(9):  # 총 10장
            imgs.append(tta_transform(image=image)['image'])

        input_tensor = torch.stack(imgs).to(device)
        with torch.no_grad():
            outputs = model(input_tensor)
            probs = torch.sigmoid(outputs).squeeze()
        return probs.mean().item()

    else:
        # 영상 프레임용, TTA 없이 단일 이미지
        transform = A.Compose([
            A.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
        img_tensor = transform(image=image)['image'].unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(img_tensor)
            prob = torch.sigmoid(output).item()
        return prob


In [None]:
model = timm.create_model("convnext_tiny", pretrained=False, num_classes=1)
checkpoint = torch.load(CKPT_PATH, map_location=device)
model.load_state_dict(checkpoint["state_dict"])
model.to(device)
model.eval()

ConvNeXt(
  (stem): Sequential(
    (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
    (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
  )
  (stages): Sequential(
    (0): ConvNeXtStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (norm): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=96, out_features=384, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=384, out_features=96, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): Identity()
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)


In [None]:
TEST_DIR = "/content/TEST_DIR"

In [None]:
sample_df = pd.read_csv(SAMPLE_CSV_PATH)
final_probs = []

# ---------------- 추론 (Inference) 시작 ----------------
for filename in tqdm(sample_df['filename'], desc="Inference"):
    file_stem = Path(filename).stem  # 예: TEST_000
    file_ext = filename.lower().split('.')[-1]

    # ---------- [CASE 1] 영상 처리 (폴더 구조: TEST_000 / frame_0.jpg ...) ----------
    if file_ext in VID_EXTS:
        # 영상 이름과 동일한 이름을 가진 '폴더' 경로 설정
        video_folder = os.path.join(TEST_DIR, file_stem)

        if os.path.exists(video_folder):
            # 폴더 내의 프레임 파일들을 숫자 순서대로 정렬 (frame0, frame1...)
            matched_frames = sorted(
                [f for f in os.listdir(video_folder) if f.lower().endswith('.jpg')],
                key=lambda x: int(''.join(filter(str.isdigit, x))) # 파일명에서 숫자만 추출해 정렬
            )

            frame_scores = []
            for f_name in matched_frames:
                img_path = os.path.join(video_folder, f_name)
                img_bgr = cv2.imread(img_path)
                if img_bgr is None: continue
                image = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

                # 영상은 이미 224로 전처리되었으므로 TTA 적용 시에도 일관성 유지
                frame_scores.append(predict_single_image(model, image, tta=False))

            # 프레임들의 평균 점수 계산
            final_probs.append(np.mean(frame_scores) if frame_scores else 0.5)
        else:
            final_probs.append(0.5) # 폴더 자체가 없는 경우

    # ---------- [CASE 2] 사진 처리 (파일 구조: TEST_001.jpg ...) ----------
    elif file_ext in IMG_EXTS:
        img_path = os.path.join(TEST_DIR, filename)

        # 파일이 없을 경우를 대비해 확장자 무시하고 탐색
        if not os.path.exists(img_path):
            possible_files = [f for f in os.listdir(TEST_DIR) if f.startswith(file_stem)]
            if possible_files:
                img_path = os.path.join(TEST_DIR, possible_files[0])
            else:
                final_probs.append(0.5)
                continue

        img_bgr = cv2.imread(img_path)
        if img_bgr is None:
            final_probs.append(0.5)
            continue

        image = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        # 사진은 256 -> 224 CenterCrop 포함된 TTA 적용
        final_probs.append(predict_single_image(model, image, tta=True))

    # ---------- [CASE 3] 기타 ----------
    else:
        final_probs.append(0.5)
# ---------------- CSV 저장 ----------------
sample_df['prob'] = final_probs
sample_df.to_csv(CSV_SAVE_PATH, index=False)
print(f"✅ submission.csv 생성 완료 → {CSV_SAVE_PATH}")

  original_init(self, **validated_kwargs)
Inference: 100%|██████████| 500/500 [00:49<00:00, 10.13it/s]

✅ submission.csv 생성 완료 → /content/drive/MyDrive/HECTO/Dataset/submission.csv



