**개발환경 구성**

In [2]:
import os
import sys
import torch
import torchvision
import numpy as np
import pandas as pd
import cv2
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch import ToTensorV2

# Ensure detection helper files are available locally
# This will download the reference detection helpers from torchvision if missing
detection_dir = os.path.join(os.getcwd(), "detection")
os.makedirs(detection_dir, exist_ok=True)
urls = {
    "engine.py": "https://raw.githubusercontent.com/pytorch/vision/main/references/detection/engine.py",
    "utils.py": "https://raw.githubusercontent.com/pytorch/vision/main/references/detection/utils.py",
    "coco_utils.py": "https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_utils.py",
    "coco_eval.py": "https://raw.githubusercontent.com/pytorch/vision/main/references/detection/coco_eval.py",
    "transforms.py": "https://raw.githubusercontent.com/pytorch/vision/main/references/detection/transforms.py",
}
import urllib.request
for name, url in urls.items():
    dest = os.path.join(detection_dir, name)
    if not os.path.exists(dest):
        try:
            print(f"Downloading {name}...")
            urllib.request.urlretrieve(url, dest)
        except Exception as e:
            print(f"Failed to download {name}: {e}")

# Add to python path and import
if detection_dir not in sys.path:
    sys.path.insert(0, detection_dir)

try:
    from engine import train_one_epoch, evaluate
    print("Imported engine successfully.")
except Exception as e:
    print("Import engine failed:", e)


Imported engine successfully.


- kaggle 데이터 로드

In [3]:
%pip install kagglehub --quiet

import importlib

try:
    kagglehub = importlib.import_module('kagglehub')
    # Download latest version using kagglehub
    path = kagglehub.dataset_download('tarunbisht11/yolo-animal-detection-small')
    print('Path to dataset files:', path)
except Exception as e:
    print('kagglehub 설치/사용 실패:', e)
    print('\n대체 방법 안내:')
    print('1) Kaggle CLI 사용:')
    print('   - pip install kaggle')
    print('   - Kaggle API 토큰을 ~/.kaggle/kaggle.json에 저장')
    print('   - 명령: kaggle datasets download -d tarunbisht11/yolo-animal-detection-small')
    print('2) 브라우저에서 직접 다운로드 후 프로젝트에 복사')
    print('\n원하시면 제가 Kaggle CLI 설치/예시 명령을 대신 실행해 드릴 수 있습니다.')


Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\SVT\.cache\kagglehub\datasets\tarunbisht11\yolo-animal-detection-small\versions\2


- 데이터 준비

In [4]:
train_csv_path = "C:\\Users\\SVT\\Desktop\\PyTorch\\data\\kaggle\\train.csv"
test_csv_path = "C:\\Users\\SVT\\Desktop\\PyTorch\\data\\kaggle\\test.csv"
train_images =  "C:\\Users\\SVT\\Desktop\\PyTorch\\data\\kaggle\\yolo-animal-detection-small\\train"
test_images = "C:\\Users\\SVT\\Desktop\\PyTorch\\data\\kaggle\\yolo-animal-detection-small\\test"

- 데이터 파악

In [5]:
train_csv = pd.read_csv(train_csv_path)
train_csv.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,cats_001.jpg,474,266,cat,132,1,347,264
1,cats_002.jpg,474,474,cat,176,44,467,433
2,cats_003.jpg,474,314,cat,53,1,397,314
3,cats_004.jpg,474,355,cat,1,1,393,335
4,cats_005.jpg,474,316,cat,80,1,407,316


In [5]:
# 데이터의 행과 열 파악
train_csv.shape

(1309, 8)

In [6]:
#  데이터 타입 파악
train_csv.dtypes

filename    object
width        int64
height       int64
class       object
xmin         int64
ymin         int64
xmax         int64
ymax         int64
dtype: object

In [7]:
# 데이터 구성 정보 파악
train_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  1309 non-null   object
 1   width     1309 non-null   int64 
 2   height    1309 non-null   int64 
 3   class     1309 non-null   object
 4   xmin      1309 non-null   int64 
 5   ymin      1309 non-null   int64 
 6   xmax      1309 non-null   int64 
 7   ymax      1309 non-null   int64 
dtypes: int64(6), object(2)
memory usage: 81.9+ KB


In [8]:
# 클래스 종류 파악 (고유값)
print(train_csv['class'].unique())

['cat' 'monkey' 'dog']


In [6]:
test_csv = pd.read_csv(test_csv_path)
test_csv.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,cats_000.jpg,474,632,cat,103,72,436,603
1,cats_007.jpg,474,266,cat,301,37,460,265
2,cats_007.jpg,474,266,cat,90,1,392,266
3,cats_050.jpg,474,237,cat,157,40,314,205
4,cats_072.jpg,474,266,cat,187,1,474,266


In [10]:
test_csv.shape

(113, 8)

In [7]:
categories = train_csv['class'].unique()    # 고유값 추출
print(categories)

['cat' 'monkey' 'dog']


- encoding classes to int (클래스를 문자열 -> 숫자형(정수로) 엔코딩)
    - 0 은 기본적 백그라운드

In [8]:
class LabelMap:
    def __init__(self, categories):
        self.map_dict = {}
        self.reverse_map_dict = {}
        for i, cat in enumerate(categories):
            self.map_dict[cat] = i + 1
            self.reverse_map_dict[i] = cat
    def fit(self, df, column):
        df[column] = df[column].map(self.map_dict)
        return df
    def inverse(self, df, column):
        df[column] = df[column].map(self.map_dict)
        return df

In [9]:
label_map = LabelMap(categories)
print(categories)

['cat' 'monkey' 'dog']


In [10]:
train_csv = label_map.fit(train_csv, 'class')
train_csv.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,cats_001.jpg,474,266,1,132,1,347,264
1,cats_002.jpg,474,474,1,176,44,467,433
2,cats_003.jpg,474,314,1,53,1,397,314
3,cats_004.jpg,474,355,1,1,1,393,335
4,cats_005.jpg,474,316,1,80,1,407,316


In [11]:
test_csv = label_map.fit(test_csv, 'class')
test_csv.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,cats_000.jpg,474,632,1,103,72,436,603
1,cats_007.jpg,474,266,1,301,37,460,265
2,cats_007.jpg,474,266,1,90,1,392,266
3,cats_050.jpg,474,237,1,157,40,314,205
4,cats_072.jpg,474,266,1,187,1,474,266


**creating torch dataset**

In [12]:
class AnimalDataset(torch.utils.data.Dataset):
    def __init__(self, df, image_path, categories, transforms=None, **kwargs):
        super().__init__(**kwargs)
        self.df = df.reset_index(drop=True)  # Reset index for consistency
        self.image_path = image_path
        self.categories = categories
        self.images = self.df["filename"].unique()
        self.transforms = transforms

        # Precompute image -> row indices (critical optimization)
        # This avoids filtering the entire DataFrame per __getitem__ call
        self.index_map = {}
        for i, img in enumerate(self.images):
            rows = self.df[self.df['filename'] == img]
            # Store as numpy arrays to avoid pandas overhead during getitem
            self.index_map[i] = {
                'labels': rows['class'].to_numpy(),
                'xmins': rows['xmin'].to_numpy(),
                'ymins': rows['ymin'].to_numpy(),
                'xmaxs': rows['xmax'].to_numpy(),
                'ymaxs': rows['ymax'].to_numpy(),
                'width': int(rows['width'].values[0]),
                'height': int(rows['height'].values[0]),
                'filename': img
            }

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # O(1) lookup instead of O(n) DataFrame filtering
        info = self.index_map[idx]
        image_file = os.path.join(self.image_path, info['filename'])

        # Efficient image loading and preprocessing
        img = cv2.imread(image_file)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        # Prepare data for albumentations (keep as numpy, not tensors)
        labels = info['labels']  # numpy array
        boxes = np.stack([info['xmins'], info['ymins'], info['xmaxs'], info['ymaxs']], axis=1).astype(np.float32)

        # Apply augmentations if provided (BEFORE converting to tensors)
        if self.transforms is not None:
            # albumentations expects numpy arrays and lists
            bboxes_list = boxes.tolist()
            labels_list = labels.tolist()
            transformed = self.transforms(image=img, bboxes=bboxes_list, labels=labels_list)
            img = transformed["image"]
            boxes = np.array(transformed["bboxes"], dtype=np.float32)
            labels = np.array(transformed["labels"], dtype=np.int64)
        
        # NOW convert to tensors (after transforms)
        img_tensor = torch.as_tensor(img, dtype=torch.float32)
        # Handle channel ordering from albumentations/ToTensorV2
        if img_tensor.ndim == 3:
            if img_tensor.shape[0] != 3:  # If HWC format, convert to CHW
                img_tensor = img_tensor.permute(2, 0, 1)
        
        labels = torch.as_tensor(labels, dtype=torch.int64)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        areas = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        areas = torch.as_tensor(areas, dtype=torch.float32)
        image_id = torch.tensor([idx], dtype=torch.int64)
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": image_id,
            "area": areas,
            "iscrowd": iscrowd
        }

        return img_tensor, target

    def get_height_and_width(self, image):
        image_data = self.df.loc[self.df['filename'] == image]
        return image_data["width"].values[0], image_data["height"].values[0]


- 훈련 및 검증을 위한 증강 및 변환 정의

In [13]:
transform_train = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    ToTensorV2(p=1)
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

In [14]:
transform_test = A.Compose([
    ToTensorV2(p=1)
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

  self._set_keys()


- 데이터 로더에서 데이터를 가져온 후 호출되는 함수

In [15]:
def collate_fn(batch):
    return tuple(zip(*batch))

- 데이터셋 시작

In [20]:
train_dataset = AnimalDataset(train_csv, train_images, categories, transform_train)
test_dataset = AnimalDataset(test_csv, test_images, categories, transform_test)

- 데이터 세트에서 데이터 로더 생성

In [16]:
# Recreate datasets with optimized class
train_dataset = AnimalDataset(train_csv, train_images, categories, transform_train)
test_dataset = AnimalDataset(test_csv, test_images, categories, transform_test)

# Optimized DataLoader settings
# On Windows/Jupyter, num_workers=0 is often fastest (avoids spawn overhead)
# For GPU: use pin_memory=True and num_workers=2-4 with persistent_workers=True
data_loader_train = torch.utils.data.DataLoader(
    train_dataset, 
    batch_size=4, 
    shuffle=True, 
    num_workers=0,  # Start with 0 on Windows/Notebook
    collate_fn=collate_fn,
    pin_memory=False  # Set to True if using CUDA
)

data_loader_test = torch.utils.data.DataLoader(
    test_dataset, 
    batch_size=1, 
    shuffle=False, 
    num_workers=0,
    collate_fn=collate_fn,
    pin_memory=False
)

print("DataLoaders created successfully.")


DataLoaders created successfully.


- 데이터로더에서 이미지 플로팅 및 검증

In [19]:
def plot_images(images, targets):
    """Plot images with bounding boxes from batch"""
    fig_count = 0
    for image, target in zip(images, targets):
        # Ensure image is on CPU and convert to numpy
        if isinstance(image, torch.Tensor):
            sample = image.permute(1, 2, 0).cpu().numpy()
        else:
            sample = image
        
        # Ensure sample is in valid range [0, 1] or [0, 255]
        if sample.max() > 1.0:
            sample = sample / 255.0
        sample = np.clip(sample, 0, 1)
        
        # Create figure
        fig, ax = plt.subplots(1, 1, figsize=(8, 6))
        ax.imshow(sample)
        
        # Draw bounding boxes
        if isinstance(target["boxes"], torch.Tensor):
            boxes = target["boxes"].cpu().numpy().astype(np.int32)
        else:
            boxes = target["boxes"].astype(np.int32)
        
        for box in boxes:
            cv2.rectangle(sample,
                          (box[0], box[1]),
                          (box[2], box[3]),
                          (1.0, 0.0, 0.0), 2)  # Red box
        
        ax.imshow(sample)
        ax.set_title(f"Image {fig_count} - {len(boxes)} objects")
        ax.axis('off')
        plt.tight_layout()
        plt.show()
        fig_count += 1
    
    print(f"✓ Plotted {fig_count} image(s)")


In [17]:
images, targets = next(iter(data_loader_train))

In [20]:
plot_images(images, targets)

: 

In [None]:
import time

print("=" * 60)
print("IMPORTANT: Restart kernel and run all cells above for optimal performance!")
print("=" * 60)
print("\n현재 성능 측정 시도...")

try:
    # Single sample test (no batching)
    print("\n단일 샘플 로드 시간 측정:")
    t0 = time.perf_counter()
    img, target = train_dataset[0]
    t1 = time.perf_counter()
    print(f"✓ 샘플 로드 시간: {(t1-t0)*1000:.2f}ms")
    print(f"  이미지 형태: {img.shape}")
    print(f"  라벨 개수: {len(target['labels'])}")
    
    # Batch test
    print("\n배치 로드 시간 측정:")
    t0 = time.perf_counter()
    images, targets = next(iter(data_loader_train))
    t1 = time.perf_counter()
    print(f"✓ 배치 로드 시간: {(t1-t0)*1000:.2f}ms")
    print(f"  배치 크기: {len(images)}")
    print(f"  평균 샘플당: {(t1-t0)*1000/len(images):.2f}ms")
    
except Exception as e:
    print(f"⚠ 오류 발생 (Kernel 재시작 필요): {type(e).__name__}")
    print(f"  메시지: {str(e)[:100]}")
    
print("\n해결 방법:")
print("1. Kernel 메뉴 → Restart Kernel 선택")
print("2. 셀 1부터 순서대로 모두 실행 (Run All or Run All Above)")
print("3. 그러면 최적화된 성능으로 실행됩니다!")
print("=" * 60)


IMPORTANT: Restart kernel and run all cells above for optimal performance!

현재 성능 측정 시도...

단일 샘플 로드 시간 측정:
✓ 샘플 로드 시간: 12.80ms
  이미지 형태: torch.Size([3, 266, 474])
  라벨 개수: 1

배치 로드 시간 측정:
✓ 배치 로드 시간: 61.25ms
  배치 크기: 4
  평균 샘플당: 15.31ms

해결 방법:
1. Kernel 메뉴 → Restart Kernel 선택
2. 셀 1부터 순서대로 모두 실행 (Run All or Run All Above)
3. 그러면 최적화된 성능으로 실행됩니다!
