<a href="https://colab.research.google.com/github/hida1211/paper_signate/blob/main/APTOS_base.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================================================
# 0. 環境セットアップ
# =========================================================
!pip install -q fsspec huggingface_hub pandas pillow matplotlib tqdm pytorchvideo
!pip install -q --upgrade torchcodec

from google.colab import drive
drive.mount("/content/drive")

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/766.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m766.4/766.4 kB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[?25hDrive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# =========================================================
# 1. import & 定数
# =========================================================
import os, subprocess, random, textwrap, torch, torch.nn as nn
from pathlib import Path
import pandas as pd
import torchvision.transforms as T
import torchvision.transforms.v2.functional as F
from torchvision import models
from torch.utils.data import DataLoader
from tqdm import tqdm
from datetime import datetime
from pytorchvideo.data.encoded_video import EncodedVideo
from PIL import Image                                ### ★ 追加

DRIVE_ROOT   = Path("/content/drive/MyDrive/kaggle/APTOS")
SHARD_DIR    = DRIVE_ROOT / "APTOS_train-val"
TRAIN_CSV    = DRIVE_ROOT / "APTOS_train-val_annotation.csv"
VAL2_CSV     = DRIVE_ROOT / "APTOS_val2.csv"

VIDEO_DIR        = Path("/content/aptos_videos")
VAL2_VIDEO_DIR   = Path("/content/val2_videos")      # zip 展開先
FRAMES_DIR       = VAL2_VIDEO_DIR / "aptos_val2" / "frames"   ### ★ 追加

for p in (VIDEO_DIR, VAL2_VIDEO_DIR): p.mkdir(parents=True, exist_ok=True)

NUM_CLASSES  = 35
IMAGE_SIZE   = 224
BATCH_SIZE   = 32
EPOCHS       = 3
FPS_ORIG     = 30
DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True; random.seed(0)
print("Device →", DEVICE)

Device → cuda


In [None]:
# =========================================================
# 2. split‑tar をストリーム展開
# =========================================================
# extract_cmd = f"""
# set -e
# cd "{SHARD_DIR}"
# cat aptos_ophnet.tar.gz.* | tar --strip-components=1 -xzf - -C "{VIDEO_DIR}"
# """
# subprocess.run(textwrap.dedent(extract_cmd), shell=True, check=True)
# print("✅ training videos extracted")

# 一時 tar.gz を作る
!cat /content/drive/MyDrive/kaggle/APTOS/APTOS_train-val/aptos_ophnet.tar.gz.* > /tmp/aptos_ophnet.tar.gz

# 展開（こっちのほうが速いこと多い）
!tar -xzf /tmp/aptos_ophnet.tar.gz -C /content/aptos_videos --strip-components=1


# =========================================================
# 2.1 val2.zip を展開
# =========================================================
VAL2_ZIP = DRIVE_ROOT / "APTOS_val2.zip"
if VAL2_ZIP.exists() and not FRAMES_DIR.exists():
    subprocess.run(f'unzip -q "{VAL2_ZIP}" -d "{VAL2_VIDEO_DIR}"', shell=True, check=True)
    print("✅ val2 frames extracted →", FRAMES_DIR)



set -e
cd "/content/drive/MyDrive/kaggle/APTOS/APTOS_train-val"
# --strip-components=1 で先頭の aptos_videos/ を捨てる
cat aptos_ophnet.tar.gz.* | tar --strip-components=1 -xzf - -C "/content/aptos_videos/aptos_videos"

✅ streaming extract done


In [None]:
# =========================================================
# 3. CSV 読み込み & 動画リスト突合
# =========================================================
ann_df   = pd.read_csv(TRAIN_CSV)
disk_vids = {p.stem for p in VIDEO_DIR.glob("*.mp4")}
ann_df   = ann_df[ann_df.video_id.isin(disk_vids)].reset_index(drop=True)
train_df = ann_df[ann_df.split == "train"]
val_df   = ann_df[ann_df.split == "val"]
print(f"train clips: {len(train_df)} / val clips: {len(val_df)}")


train clips: 8768 / val clips: 2180


In [None]:
# =========================================================
# 4. Dataset 定義
# =========================================================
T_RESIZE = T.Resize((IMAGE_SIZE, IMAGE_SIZE), antialias=True)

# ---- TorchCodec 版 (mp4 用、変更なし)
from torchcodec.decoders import VideoDecoder
class OphNetDS_TC(torch.utils.data.Dataset):
    def __init__(self, df, center=True, gpu_decode=True, root=VIDEO_DIR):
        self.df, self.center, self.root = df.reset_index(drop=True), center, root
        self.device = "cuda" if (gpu_decode and torch.cuda.is_available()) else "cpu"

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        fp    = self.root / f"{row.video_id}.mp4"
        sec   = (row.start + row.end) / 2 if self.center else row.start
        try:
            frame = VideoDecoder(fp, device=self.device, num_ffmpeg_threads=1,
                                 seek_mode="approximate").get_frame_played_at(sec).data
        except Exception:
            frame = torch.zeros((3, IMAGE_SIZE, IMAGE_SIZE), dtype=torch.uint8)
        frame = T_RESIZE(frame).float().div(255.)
        frame = F.normalize(frame,
                             mean=[0.485, 0.456, 0.406],
                             std =[0.229, 0.224, 0.225])
        label = int(row.phase_id) if "phase_id" in row else idx
        return frame, label

# ---- ★ val2 専用：jpg フレーム読み込み版
class OphNetFrameDS(torch.utils.data.Dataset):
    def __init__(self, df, frames_dir=FRAMES_DIR):
        self.df = df.reset_index(drop=True)
        self.frames_dir = Path(frames_dir)

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(self.frames_dir / row.Frame_id).convert("RGB")
        img = F.pil_to_tensor(img)            # uint8 [0,255]
        img = T_RESIZE(img).float().div(255.)
        img = F.normalize(img,
                          mean=[0.485, 0.456, 0.406],
                          std =[0.229, 0.224, 0.225])
        return img, idx                       # idx をそのまま戻す

In [None]:
# =========================================================
# 5. model / train util
# =========================================================
def get_model():
    m = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
    m.fc = nn.Linear(m.fc.in_features, NUM_CLASSES)
    return m

@torch.no_grad()
def eval_epoch(m, loader):
    m.eval(); acc = tot = 0
    for x, y in tqdm(loader, desc="🧪 val", leave=False):
        acc += (m(x.to(DEVICE)).argmax(1) == y.to(DEVICE)).sum().item()
        tot += y.size(0)
    return acc / tot

def train_epoch(m, loader, crit, opt):
    m.train(); running = 0
    for step, (x, y) in enumerate(tqdm(loader, desc="🔧 train", leave=False), 1):
        x, y = x.to(DEVICE), y.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        loss = crit(m(x), y); loss.backward(); opt.step()
        running += loss.item() * y.size(0)
    return running / len(loader.dataset)

In [None]:
# =========================================================
# 6. DataLoader & 学習
# =========================================================
train_loader = DataLoader(OphNetDS_TC(train_df), batch_size=BATCH_SIZE,
                          shuffle=True, num_workers=8, pin_memory=True)
val_loader   = DataLoader(OphNetDS_TC(val_df),   batch_size=BATCH_SIZE,
                          shuffle=False, num_workers=8, pin_memory=True)

model = get_model().to(DEVICE)
freq = ann_df.phase_id.value_counts().sort_index().values
criterion = nn.CrossEntropyLoss(weight=(1/torch.tensor(freq, dtype=torch.float)
                                        * NUM_CLASSES / (1/torch.tensor(freq, dtype=torch.float)).sum()
                                       ).to(DEVICE))
opt = torch.optim.AdamW(model.parameters(), lr=3e-4)

for ep in range(1, EPOCHS + 1):
    print(f"\n[{datetime.now().strftime('%H:%M:%S')}] ⏩ Epoch {ep}/{EPOCHS}")
    tl = train_epoch(model, train_loader, criterion, opt)
    va = eval_epoch(model, val_loader)
    print(f"✅ Epoch {ep}: train_loss {tl:.4f} | val_acc {va:.4f}")

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 204MB/s]



[15:22:51] ⏩ Epoch 1/3




✅ Epoch 1: train_loss 3.5484 | val_acc 0.0344

[15:23:37] ⏩ Epoch 2/3




✅ Epoch 2: train_loss 3.5168 | val_acc 0.0275

[15:24:21] ⏩ Epoch 3/3


                                                       

✅ Epoch 3: train_loss 3.5233 | val_acc 0.0541




In [None]:
# =========================================================
# 7. val2 推論 & 提出 CSV 生成
# =========================================================
val2_df = pd.read_csv(VAL2_CSV)              # オリジナル順を維持
assert len(val2_df) == len(list(FRAMES_DIR.glob("*.jpg")))

val2_loader = DataLoader(
    OphNetFrameDS(val2_df),
    batch_size=BATCH_SIZE, shuffle=False,
    num_workers=8, pin_memory=True, persistent_workers=True
)

preds = torch.zeros(len(val2_df), dtype=torch.long)
model.eval()
with torch.no_grad():
    for imgs, idx in tqdm(val2_loader, desc="infer"):
        preds[idx] = model(imgs.to(DEVICE)).argmax(1).cpu()

out_df = val2_df.copy()
out_df["Predict_phase_id"] = preds.numpy().astype(int)
save_path = DRIVE_ROOT / "APTOS_val2_pred.csv"
out_df.to_csv(save_path, index=False)
print("✅ saved →", save_path)

infer: 100%|██████████| 1403/1403 [01:05<00:00, 21.57it/s]

✅ saved → /content/drive/MyDrive/kaggle/APTOS/APTOS_val2_pred.csv





In [None]:
out_df

Unnamed: 0,Video_name,Frame_id,Predict_phase_id
0,case_2000,case_2000_0.jpg,18
1,case_2000,case_2000_1.jpg,18
2,case_2000,case_2000_2.jpg,18
3,case_2000,case_2000_3.jpg,18
4,case_2000,case_2000_4.jpg,18
...,...,...,...
44890,case_2101,case_2101_44890.jpg,1
44891,case_2101,case_2101_44891.jpg,1
44892,case_2101,case_2101_44892.jpg,1
44893,case_2101,case_2101_44893.jpg,1


In [None]:
out_df['Predict_phase_id'].unique()

array([18,  1,  4])