## 데이터셋
- Tiny ImageNet 사용
    - ImageNet을 축소한 버전(200개 클래스, 클래스당 500장 학습 이미지, 64×64 해상도)

In [1]:
!wget -q http://cs231n.stanford.edu/tiny-imagenet-200.zip

In [None]:
!unzip -q tiny-imagenet-200.zip

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

In [None]:
class TinyImageNet(Dataset):
    def __init__(self, root, split='train', transform=None):
        self.transform = transform
        self.images = []
        self.labels = []
        wnids = open(os.path.join(root, 'wnids.txt')).read().splitlines()
        wnid_to_idx = {wnid: idx for idx, wnid in enumerate(wnids)}

        if split == 'train':
            train_dir = os.path.join(root, 'train')
            for wnid in wnids:
                img_dir = os.path.join(train_dir, wnid, 'images')
                for name in os.listdir(img_dir):
                    self.images.append(os.path.join(img_dir, name))
                    self.labels.append(wnid_to_idx[wnid])
        else:  # validation
            val_dir = os.path.join(root, 'val')
            # val_annotations.txt: filename\twnid\t...
            ann = open(os.path.join(val_dir, 'val_annotations.txt')).read().splitlines()
            filename_to_wnid = {line.split('\t')[0]: line.split('\t')[1] for line in ann}
            for name in os.listdir(os.path.join(val_dir, 'images')):
                self.images.append(os.path.join(val_dir, 'images', name))
                self.labels.append(wnid_to_idx[filename_to_wnid[name]])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = Image.open(self.images[idx]).convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, self.labels[idx]


In [None]:
# ImageNet 표준 정규화 값
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std),
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std),
])

root = '/content/tiny-imagenet-200'
train_ds = TinyImageNet(root, 'train', transform=train_transform)
val_ds   = TinyImageNet(root, 'val',   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=128, shuffle=False, num_workers=4)



In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=200):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, 11, stride=4, padding=2), nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),

            nn.Conv2d(96, 256, 5, padding=2, groups=2), nn.ReLU(inplace=True),
            nn.LocalResponseNorm(5, alpha=1e-4, beta=0.75, k=2),
            nn.MaxPool2d(3, stride=2),

            nn.Conv2d(256, 384, 3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, 3, padding=1, groups=2), nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, 3, padding=1, groups=2), nn.ReLU(inplace=True),
            nn.MaxPool2d(3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(256*6*6, 4096), nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),     nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)

def build_alexnet(num_classes=200):
    return AlexNet(num_classes)


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
model = AlexNet(num_classes=200).to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

In [None]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=5, verbose=True
)



In [None]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = outputs.max(1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()

    val_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Val Accuracy: {val_acc:.4f}")