<a href="https://colab.research.google.com/github/forexms78/AI-05-/blob/main/%EB%AF%B8%EC%85%987_7%ED%8C%80_%EB%B0%95%EB%B3%91%ED%98%B8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from urllib.request import urlretrieve; urlretrieve("https://raw.githubusercontent.com/c0z0c/jupyter_hangul/refs/heads/beta/helper_c0z0c_dev.py", "helper_c0z0c_dev.py")
import helper_c0z0c_dev as helper

🌐 https://c0z0c.github.io/jupyter_hangul
ℹ️ NumPy 2.0.2 (v2.x+): 호환성 모드 적용됨
install fonts-nanum...
Mounted at /content/drive
✅ 설정 완료: 한글 폰트, plt 전역 등록, pandas 확장, 캐시 기능
pd commit 저장 경로 = /content/drive/MyDrive


In [2]:
# @title 데이터셋 다운로드

import kagglehub
dataset_identifier = "zippyz/cats-and-dogs-breeds-classification-oxford-dataset"

path = kagglehub.dataset_download(dataset_identifier)

print(path)

Using Colab cache for faster access to the 'cats-and-dogs-breeds-classification-oxford-dataset' dataset.
/kaggle/input/cats-and-dogs-breeds-classification-oxford-dataset


In [3]:
# @title device 설정

import torch
import os
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
# @title 데이터셋 경로 설정

trainval_file_path = os.path.join(path, "annotations", "annotations", "trainval.txt")
test_file_path = os.path.join(path, "annotations", "annotations", "test.txt")

image_dir = os.path.join(path, "images", "images")
xml_dir = os.path.join(path, "annotations", "annotations", "xmls")

In [5]:
# @title  trainval.txt 및 test.txt 파일 로드

df_trainval = pd.read_csv(trainval_file_path, sep="\\s+", header=None)
df_trainval.columns = ["Image", "ClassID", "Species", "BreedID"]

df_test = pd.read_csv(test_file_path, sep="\\s+", header=None)
df_test.columns = ["Image", "ClassID", "Species", "BreedID"]

print(f"Train/Validation 데이터 수: {len(df_trainval)}개")
print(f"Test 데이터 수: {len(df_test)}개")
print(df_trainval.head())

Train/Validation 데이터 수: 3680개
Test 데이터 수: 3669개
            Image  ClassID  Species  BreedID
0  Abyssinian_100        1        1        1
1  Abyssinian_101        1        1        1
2  Abyssinian_102        1        1        1
3  Abyssinian_103        1        1        1
4  Abyssinian_104        1        1        1


In [7]:
# @title 데이터 불균형 확인
print(df_trainval['Species'].value_counts())

Species
2    2492
1    1188
Name: count, dtype: int64


In [30]:
# @title XML 파일 파싱 함수

import xml.etree.ElementTree as ET

classes = ["background", "cat", "dog"]
num_classes = len(classes)

def parse_xml(xml_path: str, species_id: int):

    tree = ET.parse(xml_path)
    root = tree.getroot()

    boxes = []
    labels = [] #

    for obj in root.findall('object'):


        if obj.find('name').text == 'face':
            bndbox = obj.find('bndbox')

            xmin = int(bndbox.find('xmin').text)
            ymin = int(bndbox.find('ymin').text)
            xmax = int(bndbox.find('xmax').text)
            ymax = int(bndbox.find('ymax').text)

            boxes.append([xmin, ymin, xmax, ymax])


            labels.append(species_id)

    return boxes, labels

In [32]:
# @title XML 파싱 함수 테스트 실행

import os
import pandas as pd

first_row = df_trainval.iloc[0]
image_name = first_row['Image']
species_id = first_row['Species']

xml_file_name = image_name + '.xml'
test_xml_path = os.path.join(xml_dir, xml_file_name)


test_boxes, test_labels = parse_xml(test_xml_path, species_id)

print(f"파싱 결과")
print(f"{image_name}.jpg")
print(f"종(Species ID){species_id} ({CLASSES[species_id]})")
print(f"추출된 바인딩 박스{test_boxes}")
print(f"추출된 레이블{test_labels}")

파싱 결과
Abyssinian_100.jpg
종(Species ID)1 (cat)
추출된 바인딩 박스[]
추출된 레이블[]


In [33]:
# @title 파이토치 데이터셋 클래스 정의

from torch.utils.data import Dataset
import cv2
import numpy as np

class PetFaceDetectionDataset(Dataset):

    def __init__(self, df, image_dir, xml_dir, species_map, transform=None):
        self.df = df
        self.image_dir = image_dir
        self.xml_dir = xml_dir
        self.species_map = species_map
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_name = row['Image']
        species_id = row['Species']

        image_path = os.path.join(self.image_dir, image_name + '.jpg')
        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        xml_file_name = image_name + '.xml'
        xml_path = os.path.join(self.xml_dir, xml_file_name)

        boxes_list, labels_list = parse_xml(xml_path, self.species_map.get(species_id))

        if not boxes_list:
             boxes = torch.zeros((0, 4), dtype=torch.float32)
             labels = torch.zeros((0,), dtype=torch.int64)
             image = np.zeros((224, 224, 3), dtype=np.uint8)
        else:
            boxes = torch.as_tensor(boxes_list, dtype=torch.float32)
            labels = torch.as_tensor(labels_list, dtype=torch.int64)

        if self.transform:
            image, boxes, labels = self.transform(image, boxes, labels)

        image = image.transpose((2, 0, 1)).astype(np.float32) / 255.0
        image = torch.as_tensor(image)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        return image, target, image_name + '.jpg'

In [34]:
# @title 데이터셋 매핑 정보 정의

SPECIES_TO_LABEL = {
    1: 1,
    2: 2
}

pet_dataset = PetFaceDetectionDataset(
    df=df_trainval,
    image_dir=image_dir,
    xml_dir=xml_dir,
    species_map=SPECIES_TO_LABEL
)

print(f"데이터셋 수: {len(pet_dataset)}")

데이터셋 수: 3680


In [35]:
# @title 바운딩 박스 좌표 변환을 위한 함수 생성

def transform_boxes(boxes, original_size, new_size):
    orig_width, orig_height = original_size
    new_width, new_height = new_size

    scale_w = new_width / orig_width
    scale_h = new_height / orig_height

    transformed_boxes = boxes.clone()

    transformed_boxes[:, 0] = boxes[:, 0] * scale_w
    transformed_boxes[:, 2] = boxes[:, 2] * scale_w

    transformed_boxes[:, 1] = boxes[:, 1] * scale_h
    transformed_boxes[:, 3] = boxes[:, 3] * scale_h

    return transformed_boxes

In [36]:
# @title 전처리 및 증강 클래스 정의

import torch
import cv2
import numpy as np

class ComposeTransforms:

    def __init__(self, target_size=(300, 300)):
        self.target_size = target_size

    def __call__(self, image, boxes, labels):
        original_height, original_width = image.shape[:2]

        target_width, target_height = self.target_size

        resized_image = cv2.resize(image, self.target_size, interpolation=cv2.INTER_LINEAR)

        if boxes.numel() > 0:
            orig_size = (original_width, original_height)
            new_size = self.target_size
            boxes = transform_boxes(boxes, orig_size, new_size)

        return resized_image, boxes, labels

In [37]:
# @title 바운딩 박스 개수와 크기를 배치 내의 데이터 하나로 묶어주는 함수 생성

from torch.utils.data import DataLoader

def custom_collate_fn(batch):
    images = []
    targets = []
    filenames = []

    for img, target, filename in batch:
        images.append(img)
        targets.append(target)
        filenames.append(filename)
    images_tensor = torch.stack(images, 0)

    return images_tensor, targets, filenames

In [38]:
# @title 학습용 Dataset 및 DataLoader 생성

from torch.utils.data import random_split

train_transform = ComposeTransforms(target_size=(300, 300))
val_transform = ComposeTransforms(target_size=(300, 300))

# 80 대 20 비율로 나눔
total_size = len(pet_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

torch.manual_seed(42)
train_dataset, val_dataset = random_split(
    pet_dataset, [train_size, val_size]
)

# 학습용 데이터셋
train_df = df_trainval.iloc[train_dataset.indices]
train_dataset_with_transform = PetFaceDetectionDataset(
    df=train_df,
    image_dir=image_dir,
    xml_dir=xml_dir,
    species_map=SPECIES_TO_LABEL,
    transform=train_transform
)

# 검증용 데이터셋
val_df = df_trainval.iloc[val_dataset.indices]
val_dataset_with_transform = PetFaceDetectionDataset(
    df=val_df,
    image_dir=image_dir,
    xml_dir=xml_dir,
    species_map=SPECIES_TO_LABEL,
    transform=val_transform
)

In [39]:
# @title 데이터로더 생성
BATCH_SIZE = 8 # 배치 크기 (한 번에 모델에 넣어 학습시킬 데이터의 양)

train_loader = DataLoader(
    train_dataset_with_transform,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=custom_collate_fn,
    num_workers=2
)

val_loader = DataLoader(
    val_dataset_with_transform,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=custom_collate_fn,
    num_workers=2
)

print(f"배치 크기: {BATCH_SIZE}")
print(f"학습 샘플 수: {len(train_dataset_with_transform)}개")
print(f"검증 샘플 수: {len(val_dataset_with_transform)}개")

배치 크기: 8
학습 샘플 수: 2944개
검증 샘플 수: 736개


In [40]:
# @title SSD 모델 준비

import torchvision
from torchvision.models.detection.ssd import SSD300_VGG16_Weights

model = torchvision.models.detection.ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT).to(device)

num_classes = len(classes)
model.head.classification_head.num_classes = num_classes

In [42]:
# @title 옵티마이저설정

import torch.optim as optim
import torch.nn as nn

LEARNING_RATE = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4

optimizer = optim.SGD(
    model.parameters(),
    lr=LEARNING_RATE,
    momentum=MOMENTUM,
    weight_decay=WEIGHT_DECAY
)

scheduler = optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

In [43]:
# @title 모델 학습 루프

from tqdm import tqdm
import time
import torch

NUM_EPOCHS = 10

def train_model(model, train_loader, optimizer, scheduler, device, num_epochs):

    model.train()

    for epoch in range(num_epochs):
        start_time = time.time()
        total_loss = 0.0

        for images, targets, _ in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            optimizer.zero_grad()
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            losses.backward()
            optimizer.step()
            total_loss += losses.item()

        scheduler.step()

        avg_loss = total_loss / len(train_loader)
        end_time = time.time()
        current_lr = optimizer.param_groups[0]['lr']

        print(f"Epoch {epoch+1} |  Learning Rate: {current_lr:.6f}")

In [48]:
# @title torchmetrics 다운로드

!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.15.2 torchmetrics-1.8.2


In [49]:
# @title 모델 평가 성능지표 정의

import torch
import torchmetrics

def validate_model(model, val_loader, device, num_classes):

    model.eval()
    metric = torchmetrics.detection.MeanAveragePrecision(
        iou_thresholds=[0.5, 0.75],
        max_detection_thresholds=[1, 10, 100],
        class_metrics=True,
        num_classes=num_classes,
        box_format='xyxy'
    ).to(device)

    with torch.no_grad():
        for images, targets, _ in tqdm(val_loader, desc="Validation"):

            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            predictions = model(images)

            metric.update(predictions, targets)

    results = metric.compute()
    model.train()

    return results