In [3]:
!pip3 install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl (39.5 MB)
   ---------------------------------------- 0.0/39.5 MB ? eta -:--:--
   -- ------------------------------------- 2.4/39.5 MB 12.2 MB/s eta 0:00:04
   ---- ----------------------------------- 4.7/39.5 MB 11.9 MB/s eta 0:00:03
   ------- -------------------------------- 7.1/39.5 MB 11.8 MB/s eta 0:00:03
   --------- ------------------------------ 9.4/39.5 MB 12.0 MB/s eta 0:00:03
   ----------- ---------------------------- 11.8/39.5 MB 11.9 MB/s eta 0:00:03
   -------------- ------------------------- 14.2/39.5 MB 11.9 MB/s eta 0:00:03
   ---------------- ----------------------- 16.5/39.5 MB 11.8 MB/s eta 0:00:02
   ------------------- -------------------- 18.9/39.5 MB 11.8 MB/s eta 0:00:02
   --------------------- ------------------ 21.2/39.5 MB 11.8 MB/s eta 0:00:02
   ------------------------ --------------- 23.9/

In [54]:
import os
import random
import cv2
from PIL import Image
from collections import Counter

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# 1. ImageTransform 클래스
class ImageTransform:
    def __init__(self, resize, mean, std):
        self.data_transform = {
            'train': transforms.Compose([
                transforms.RandomResizedCrop(resize, scale=(0.5, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(resize),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ])
        }

    def __call__(self, img, phase='train'):
        return self.data_transform[phase](img)

# 2. DogVsCatDataset 클래스
class DogVsCatDataset(Dataset):
    def __init__(self, file_list, transform=None, phase='train'):
        self.file_list = file_list
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, index):
        img_path = self.file_list[index]
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img, self.phase)

        # PDF 기준: 파일 이름에서 라벨 추출
        folder_name = os.path.basename(os.path.dirname(img_path)).lower()
        label = 1 if folder_name == 'dog' else 0

        return img, label

# 3. 데이터 파일 로딩 및 분할
cat_dir = './dogs-vs-cats/Cat'
dog_dir = './dogs-vs-cats/Dog'

cat_images_filepaths = sorted([os.path.join(cat_dir, f) for f in os.listdir(cat_dir)])
dog_images_filepaths = sorted([os.path.join(dog_dir, f) for f in os.listdir(dog_dir)])

images_filepaths = [*cat_images_filepaths, *dog_images_filepaths]
correct_images_filepaths = [f for f in images_filepaths if cv2.imread(f) is not None]

random.seed(42)
random.shuffle(correct_images_filepaths)

train_images_filepaths = correct_images_filepaths[:400]
val_images_filepaths = correct_images_filepaths[400:-10]
test_images_filepaths = correct_images_filepaths[-10:]

# 4. 전처리 및 DataLoader 정의
size = 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform = ImageTransform(resize=size, mean=mean, std=std)

train_dataset = DogVsCatDataset(train_images_filepaths, transform=transform, phase='train')
val_dataset = DogVsCatDataset(val_images_filepaths, transform=transform, phase='val')
test_dataset = DogVsCatDataset(test_images_filepaths, transform=transform, phase='val')

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

print(f"Train: {len(train_images_filepaths)}, Val: {len(val_images_filepaths)}, Test: {len(test_images_filepaths)}\n")

# 5. 디버깅용 출력
for images, labels in train_loader:
    print("train:")  
    print(images.shape)  # torch.Size([8, 3, 224, 224])
    print(labels, "\n")        # tensor([...])
    break

for images, labels in val_loader:
    print("val:") 
    print(images.shape)  # torch.Size([8, 3, 224, 224])
    print(labels, "\n")        # tensor([...])
    break

for images, labels in test_loader:
    print("test:") 
    print(images.shape)  # torch.Size([8, 3, 224, 224])
    print(labels)        # tensor([...])
    break

Train: 400, Val: 92, Test: 10

train:
torch.Size([8, 3, 224, 224])
tensor([0, 0, 0, 0, 0, 1, 1, 1]) 

val:
torch.Size([8, 3, 224, 224])
tensor([1, 1, 0, 1, 1, 1, 0, 1]) 

test:
torch.Size([1, 3, 224, 224])
tensor([0])


In [56]:
from collections import Counter
import os

def count_labels_from_paths(paths):
    labels = []
    for path in paths:
        folder = os.path.basename(os.path.dirname(path)).lower()
        label = 'dog' if folder == 'dog' else 'cat'
        labels.append(label)
    return Counter(labels)

# 데이터셋별 라벨 분포 출력
print("Train Label Count:", count_labels_from_paths(train_images_filepaths))
print("Val Label Count:", count_labels_from_paths(val_images_filepaths))
print("Test Label Count:", count_labels_from_paths(test_images_filepaths))


Train Label Count: Counter({'dog': 201, 'cat': 199})
Val Label Count: Counter({'dog': 47, 'cat': 45})
Test Label Count: Counter({'cat': 7, 'dog': 3})


In [57]:
import torch.nn as nn

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample:
            identity = self.downsample(x)
        out += identity
        return self.relu(out)

class BottleneckBlock(nn.Module):
    def __init__(self, in_channels, mid_channels, out_channels, stride=1, downsample=None):
        super(BottleneckBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_channels)
        self.conv2 = nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_channels)
        self.conv3 = nn.Conv2d(mid_channels, out_channels, kernel_size=1, stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        if self.downsample:
            identity = self.downsample(x)
        out += identity
        return self.relu(out)
