1. Colab 런타임 준비
GPU 활성화

메뉴 > 런타임 > 런타임 유형 변경에서 하드웨어 가속기: GPU 선택

구글 드라이브 마운트 (선택 사항)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


2. HRNet-Image-Classification 레포지토리 클론 및 의존성 설치

In [1]:
# 레포지토리 클론
!git clone https://github.com/HRNet/HRNet-Image-Classification.git
%cd HRNet-Image-Classification

# 필요 패키지 설치 (torch, torchvision 등)
!pip install -r requirements.txt

# Hugging Face Datasets 설치
!pip install datasets

Cloning into 'HRNet-Image-Classification'...
remote: Enumerating objects: 169, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (24/24), done.[K
remote: Total 169 (delta 52), reused 41 (delta 41), pack-reused 104 (from 1)[K
Receiving objects: 100% (169/169), 186.75 KiB | 26.68 MiB/s, done.
Resolving deltas: 100% (94/94), done.
/content/HRNet-Image-Classification
Collecting EasyDict==1.7 (from -r requirements.txt (line 1))
  Downloading easydict-1.7.tar.gz (6.2 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
[31mERROR: Ignored the following yanked versions: 3.4.11.39, 3.4.17.61, 4.4.0.42, 4.4.0.44, 4.5.4.58, 4.5.5.62, 4.7.0.68[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement opencv-python==3.4.1.15 (from versions: 3.4.0.14, 3.4.10.37, 3.4.11.41, 3.4.11.43, 3.4.11.45, 3.4.13.47, 3.4.15.55, 3.4.16.57, 3.4.16.59, 3.4.17.63, 3.4.18.65, 4.3.0.38, 4.4.0.40, 4.4.0.46, 4.5.1.48, 4.5.3.56, 4.5.4.60, 4.5.5.64, 

3. 데이터셋 로딩 및 분류용 전처리
Hugging Face segmentation 데이터셋을 로드한 뒤, 마스크 유무로 0/1 레이블을 만든 뒤 로컬에 ImageFolder 구조로 저장합니다.

In [2]:
from datasets import load_dataset
from PIL import Image
import os

# 1) 데이터 로드 및 train/val split (원본 train 1502장을 80/20으로)
ds = load_dataset(
    "dwb2023/brain-tumor-image-dataset-semantic-segmentation"
)
train_ds, val_ds, test_ds = (
    ds["train"].train_test_split(test_size=0.2, seed=42)["train"],
    ds["train"].train_test_split(test_size=0.2, seed=42)["test"],
    ds["test"],  # test 스플릿 215장
)

# 2) 저장할 폴더 구조 생성 (train, val, test)
base = "/content/HRNet-Image-Classification/brain_tumor_classification/imagenet/images"
for phase in ("train", "val", "test"):
    for cls in ("no_tumor", "tumor"):
        os.makedirs(f"{base}/{phase}/{cls}", exist_ok=True)

# 3) category_id 로 레이블링 후 저장 함수
def save_split(split_ds, phase):
    for i, item in enumerate(split_ds):
        img = item["image"]
        cid = int(item["category_id"])
        # 1 → no_tumor, 2 → tumor
        label = "tumor"    if cid == 2 else \
                "no_tumor" if cid == 1 else None
        if label is None:
            continue
        img.save(f"{base}/{phase}/{label}/{i}.png")

# 4) 각 스플릿 저장
save_split(train_ds, "train")
save_split(val_ds,   "val")
save_split(test_ds,  "test")

# 5) 결과 확인
print("Train/Tumor   :", len(os.listdir(f"{base}/train/tumor")))
print("Train/NoTumor :", len(os.listdir(f"{base}/train/no_tumor")))
print("Val/Tumor     :", len(os.listdir(f"{base}/val/tumor")))
print("Val/NoTumor   :", len(os.listdir(f"{base}/val/no_tumor")))
print("Test/Tumor    :", len(os.listdir(f"{base}/test/tumor")))
print("Test/NoTumor  :", len(os.listdir(f"{base}/test/no_tumor")))


README.md:   0%|          | 0.00/6.22k [00:00<?, ?B/s]

(…)-00000-of-00001-1d7ef230dfbdad78.parquet:   0%|          | 0.00/113M [00:00<?, ?B/s]

(…)-00000-of-00001-0031debf8df4462b.parquet:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

(…)-00000-of-00001-bce5cef16a5cce11.parquet:   0%|          | 0.00/32.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1502 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/215 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/429 [00:00<?, ? examples/s]

Train/Tumor   : 578
Train/NoTumor : 623
Val/Tumor     : 153
Val/NoTumor   : 148
Test/Tumor    : 97
Test/NoTumor  : 118


In [6]:
# Colab 셀에서
!find . -maxdepth 2 -type f -name "*.yaml"

./experiments/cls_hrnet_w30_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w32_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_brain_tumor.yaml
./experiments/cls_hrnet_w64_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w40_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w44_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w18_small_v2_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w48_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w18_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
./experiments/cls_hrnet_w18_small_v1_sgd_lr5e-2_wd1e-4_bs32_x100.yaml


In [4]:
%cd /content/HRNet-Image-Classification
!cp experiments/cls_hrnet_w64_sgd_lr5e-2_wd1e-4_bs32_x100.yaml experiments/cls_hrnet_brain_tumor.yaml


/content/HRNet-Image-Classification


In [None]:
%%bash
sed -i 's|^  ROOT:.*|  ROOT: "/content/brain_tumor_classification"|' \
    experiments/cls_hrnet_brain_tumor.yaml
sed -i 's|^  NUM_CLASSES:.*|  NUM_CLASSES: 2|' \
    experiments/cls_hrnet_brain_tumor.yaml

In [11]:
!pip install tensorboardX
!pip install yacs
!pip install _init_paths

[31mERROR: Invalid requirement: '_init_paths': Expected package name at the start of dependency specifier
    _init_paths
    ^[0m[31m
[0m

In [None]:
%pwd

'/content/HRNet-Image-Classification'

In [None]:
!ls experiments

cls_hrnet_brain_tumor.yaml
cls_hrnet_w18_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w18_small_v1_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w18_small_v2_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w30_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w32_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w40_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w44_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w48_sgd_lr5e-2_wd1e-4_bs32_x100.yaml
cls_hrnet_w64_sgd_lr5e-2_wd1e-4_bs32_x100.yaml


In [None]:
!find /content/brain_tumor_classification/images -type d -name ".ipynb_checkpoints" -exec rm -rf {} +

find: ‘/content/brain_tumor_classification/images’: No such file or directory


In [None]:
%%bash
# brain_tumor_classification 아래 모든 .ipynb_checkpoints 폴더를 찾아 삭제
find /content/HRNet-Image-Classification/brain_tumor_classification \
     -type d -name ".ipynb_checkpoints" \
     -exec rm -rf {} +

In [11]:
!python tools/train.py \
  --cfg experiments/cls_hrnet_brain_tumor.yaml \
  --dataDir "/content/HRNet-Image-Classification/brain_tumor_classification"


=> creating /content/drive/MyDrive/Tumor_output/imagenet/cls_hrnet_brain_tumor
=> creating log/imagenet/cls_hrnet/cls_hrnet_brain_tumor_2025-05-01-20-28
Namespace(cfg='experiments/cls_hrnet_brain_tumor.yaml', modelDir='', logDir='', dataDir='/content/HRNet-Image-Classification/brain_tumor_classification', testModel='')
CfgNode({'OUTPUT_DIR': '/content/drive/MyDrive/Tumor_output/', 'LOG_DIR': 'log/', 'DATA_DIR': '/content/HRNet-Image-Classification/brain_tumor_classification', 'GPUS': (0,), 'WORKERS': 4, 'PRINT_FREQ': 1000, 'AUTO_RESUME': False, 'PIN_MEMORY': True, 'RANK': 0, 'CUDNN': CfgNode({'BENCHMARK': True, 'DETERMINISTIC': False, 'ENABLED': True}), 'MODEL': CfgNode({'NAME': 'cls_hrnet', 'INIT_WEIGHTS': True, 'PRETRAINED': '', 'NUM_JOINTS': 17, 'NUM_CLASSES': 1000, 'TAG_PER_JOINT': True, 'TARGET_TYPE': 'gaussian', 'IMAGE_SIZE': [224, 224], 'HEATMAP_SIZE': [64, 64], 'SIGMA': 2, 'EXTRA': CfgNode({'STAGE1': CfgNode({'NUM_MODULES': 1, 'NUM_RANCHES': 1, 'BLOCK': 'BOTTLENECK', 'NUM_BLOCK

In [17]:
%cd /content/HRNet-Image-Classification

/content/HRNet-Image-Classification


In [28]:
import sys, os

# 레포 루트와 lib 디렉터리를 모듈 탐색 경로에 추가
root = "/content/HRNet-Image-Classification"
lib  = os.path.join(root, "lib")
sys.path.insert(0, root)
sys.path.insert(0, lib)

# 올바르게 추가됐는지 확인
print([p for p in sys.path if "HRNet-Image-Classification" in p])

['/content/HRNet-Image-Classification/lib', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification/lib', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification/lib', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification', '/content/HRNet-Image-Classification']


In [29]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score, f1_score, recall_score, jaccard_score

# HRNet 의 config/모델 모듈 import
from config import config, update_config
import models
import argparse, os

# 1) Config 업데이트 (train.py 와 동일한 방식)
parser = argparse.ArgumentParser()
parser.add_argument('--cfg',     type=str,
                    default="experiments/cls_hrnet_brain_tumor.yaml")
parser.add_argument('--modelDir',type=str, default="")
parser.add_argument('--logDir',  type=str, default="")
parser.add_argument('--dataDir', type=str,
                    default="/content/HRNet-Image-Classification/brain_tumor_classification")
parser.add_argument('--testModel', type=str, default="")
args = parser.parse_args([])
update_config(config, args)

# 2) 모델 생성 & 체크포인트 로드
model = eval(f"models.{config.MODEL.NAME}.get_cls_net")(config)
ckpt = torch.load(
    "/content/drive/MyDrive/Tumor_output/imagenet/cls_hrnet_brain_tumor/model_best.pth.tar",
    map_location="cuda"
)
state_dict = ckpt.get("state_dict", ckpt)
model.load_state_dict(state_dict)
model = torch.nn.DataParallel(model, device_ids=list(config.GPUS)).cuda().eval()

# 3) Test DataLoader 준비
transform = transforms.Compose([
    transforms.Resize((config.MODEL.IMAGE_SIZE[1], config.MODEL.IMAGE_SIZE[0])),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
test_dir    = os.path.join(config.DATASET.ROOT, config.DATASET.TEST_SET)
test_ds     = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(
    test_ds,
    batch_size=config.TEST.BATCH_SIZE_PER_GPU,
    shuffle=False,
    num_workers=config.WORKERS,
    pin_memory=True
)

# 4) 예측 & 메트릭 계산
all_probs, all_preds, all_labels = [], [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.cuda()
        logits = model(imgs)
        probs = torch.softmax(logits, dim=1)[:, 1]  # tumor 확률
        preds = (probs > 0.5).long()
        all_probs .extend(probs.cpu().numpy())
        all_preds .extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

rocauc = roc_auc_score(all_labels, all_probs)
f1     = f1_score    (all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
iou    = jaccard_score(all_labels, all_preds)

print(f"ROC-AUC : {rocauc:.4f}")
print(f"F1      : {f1:.4f}")
print(f"Recall  : {recall:.4f}")
print(f"IoU     : {iou:.4f}")


ROC-AUC : 0.9933
F1      : 0.9705
Recall  : 0.9673
IoU     : 0.9427


In [26]:
test_dir = os.path.join(config.DATASET.ROOT, "test")
print(test_ds.class_to_idx)      # {'no_tumor': 0, 'tumor': 1}
print(test_ds.classes)           # ['no_tumor','tumor']

{'no_tumor': 0, 'tumor': 1}
['no_tumor', 'tumor']


In [27]:
from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:\n", cm)

print(classification_report(
    all_labels, all_preds,
    target_names=test_ds.classes,
    digits=4
))

Confusion Matrix:
 [[115   3]
 [  0  97]]
              precision    recall  f1-score   support

    no_tumor     1.0000    0.9746    0.9871       118
       tumor     0.9700    1.0000    0.9848        97

    accuracy                         0.9860       215
   macro avg     0.9850    0.9873    0.9859       215
weighted avg     0.9865    0.9860    0.9861       215

