In [None]:
# 0. 작업 준비
import numpy as numpy
import matplotlib.pyplot as plt

import torch
from torch.utils import data
from torchvision import datasets, transforms, utils
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm

USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda' if USE_CUDA else 'cpu')
print(f'Using device: {DEVICE}')

Using device: cuda


In [16]:
ck_tr = transforms.Compose([transforms.ToTensor()])

tr_ds = datasets.Food101(root='./data/',
                 split='train',
                 download=True,
                 transform=ck_tr)

tr_ds

100%|██████████| 5.00G/5.00G [04:59<00:00, 16.7MB/s]


Dataset Food101
    Number of datapoints: 75750
    Root location: ./data/
    split=train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [7]:
# 상수 설정
if USE_CUDA:
    BATCH_SIZE = 128
else:
    BATCH_SIZE = 64
print(f'BATCH_SIZE: {BATCH_SIZE}')

BATCH_SIZE: 128


In [17]:
# 데이터 수정 (노이즈 삽입)
# 1. 데이터 준비
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 데이터 증강
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

tr_ds_loader = torch.utils.data.DataLoader(
    datasets.Food101(
        root='./data/',
        split='train',
        download=False,
        transform=train_transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=True
)

tt_ds_loader = torch.utils.data.DataLoader(
    datasets.Food101(
        root='./data/',
        split='test',
        download=False,
        transform=test_transform
    ),
    batch_size=BATCH_SIZE,
    shuffle=False
)

In [None]:
import torch
import torch.nn as nn

class Inception_Module(nn.Module):
    def __init__(self, in_channels, filters_num_array):
        super().__init__()

        br0_filters = filters_num_array[0]
        br1_filters = filters_num_array[1]
        br2_filters = filters_num_array[2]
        br3_filters = filters_num_array[3]

        self.br0_conv = self._conv_bn_relu(in_channels=in_channels, out_channels=br0_filters, kernel_size=1)

        self.br1_conv1 = self._conv_bn_relu(in_channels=in_channels, out_channels=br1_filters[0], kernel_size=1)
        self.br1_conv2 = self._conv_bn_relu(in_channels=br1_filters[0], out_channels=br1_filters[1], kernel_size=3, padding=1)

        self.br2_conv1 = self._conv_bn_relu(in_channels=in_channels, out_channels=br2_filters[0], kernel_size=1)
        self.br2_conv2 = self._conv_bn_relu(in_channels=br2_filters[0], out_channels=br2_filters[1], kernel_size=3, padding=1)

        self.br3_pool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.br3_conv = self._conv_bn_relu(in_channels=in_channels, out_channels=br3_filters, kernel_size=1)

    # 헬퍼함수
    def _conv_bn_relu(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        """레이어를 반환 (호출 X)"""
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels, eps=0.001),
            nn.ReLU(inplace=True)
        )
    # 호출은 self.conv(x) 처럼 forward 호출을 말함.

    # nn.Conv2d(...): 인스턴스 생성
    # self.conv = nn.Conv2d(...): nn.Module의 서브모듈(레이어)로 등록
    # self.conv(x): forward 호출
    # return nn.Sequential(...): 모듈 반환
    # nn.Sequential(...)(x): 생성 + 즉시 호출

    def forward(self, x):
        br0 = self.br0_conv(x)

        br1 = self.br1_conv1(x)
        br1 = self.br1_conv2(br1)

        br2 = self.br2_conv1(x)
        br2 = self.br2_conv2(br2)

        br3 = self.br3_pool(x)
        br3 = self.br3_conv(br3)

        # Concatenate
        out = torch.cat([br0, br1, br2, br3], dim=1)
        return out

# 보조 분류기
class AuxiliaryClassifier(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.conv = nn.Conv2d(in_channels, 128, kernel_size=1, bias=False)
        self.bn = nn.BatchNorm2d(128, eps=0.001)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(128 * 4 * 4, 1024)
        self.relu2 = nn.ReLU(inplace=True)
        self.dropout = nn.Dropout(0.7)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = self.avgpool(x)
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu2(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

class GoogLeNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=1000, aux_logits=True):
        super().__init__()

        self.aux_logits = aux_logits  # 보조 분류기 사용 여부 (원본 논문처럼 True가 기본)

        # Stem
        self.conv1 = self._conv_bn_relu(in_channels=in_channels, out_channels=64, kernel_size=7, stride=2, padding=3)
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.conv2 = self._conv_bn_relu(in_channels=64, out_channels=64, kernel_size=1)
        self.conv3 = self._conv_bn_relu(in_channels=64, out_channels=192, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        # Inception 모듈
        self.inception3a = Inception_Module(in_channels=192, filters_num_array=(64, (96, 128), (16, 32), 32))
        self.inception3b = Inception_Module(in_channels=256, filters_num_array=(128, (128, 192), (32, 96), 64))
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.inception4a = Inception_Module(in_channels=480, filters_num_array=(192, (96, 208), (16, 48), 64))
        self.inception4b = Inception_Module(in_channels=512, filters_num_array=(160, (112, 224), (24, 64), 64))
        self.inception4c = Inception_Module(in_channels=512, filters_num_array=(128, (128, 256), (24, 64), 64))
        self.inception4d = Inception_Module(in_channels=512, filters_num_array=(112, (144, 288), (32, 64), 64))
        self.inception4e = Inception_Module(in_channels=528, filters_num_array=(256, (160, 320), (32, 128), 128))
        self.pool4 = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)

        self.inception5a = Inception_Module(in_channels=832, filters_num_array=(256, (160, 320), (32, 128), 128))
        self.inception5b = Inception_Module(in_channels=832, filters_num_array=(384, (192, 384), (48, 128), 128))

        # 보조 분류기들 (auxiliary classifiers)
        if self.aux_logits:
            self.aux1 = AuxiliaryClassifier(512, num_classes)  # inception4a 출력 후
            self.aux2 = AuxiliaryClassifier(528, num_classes)  # inception4d 출력 후

        # 메인 분류기
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)

    def _conv_bn_relu(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        """레이어를 반환 (호출 X)"""
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, bias=False),
            nn.BatchNorm2d(out_channels, eps=0.001),
            nn.ReLU(inplace=True)
        )

    def forward(self, x): # 3, 224, 224
        x = self.conv1(x) # 64, 112, 112
        x = self.pool1(x) # 64, 56, 56

        x = self.conv2(x) # 64, 56, 56
        x = self.conv3(x) # 192, 56, 56
        x = self.pool2(x) # 192, 28, 28

        x = self.inception3a(x) # 256, 28, 28
        x = self.inception3b(x) # 480, 28, 28
        x = self.pool3(x) # 480, 14, 14

        x = self.inception4a(x) # 512, 14, 14
        if self.training and self.aux_logits:
            aux1 = self.aux1(x)

        # model.train()  # model.training = True
        # model.eval()   # model.training = False

        x = self.inception4b(x) # 512, 14, 14
        x = self.inception4c(x) # 512, 14, 14
        x = self.inception4d(x) # 528, 14, 14
        if self.training and self.aux_logits:
            aux2 = self.aux2(x)

        x = self.inception4e(x) # 832, 14, 14
        x = self.pool4(x) # 832, 7, 7

        x = self.inception5a(x) # 832, 7, 7
        x = self.inception5b(x) # 1024, 7, 7

        x = self.avgpool(x) # 1024, 1, 1
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)

        if self.training and self.aux_logits:
            return x, aux1, aux2
        return x

In [21]:
model = GoogLeNet(num_classes=1000, aux_logits=False).to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=3, gamma=0.1)

In [22]:
def train(model, tr_ds_loader, optimizer):
    model.train()
    running_loss = 0.
    total = 0
    correct = 0
    for (x, y) in tr_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        total += len(data)
        running_loss += loss.item() * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()

        train_loss = running_loss / total
        train_accuracy = correct / total

        tr_ds_loader.set_postfix({
            'train_loss': f'{train_loss:.4f}',
            'train_accuracy': f'{train_accuracy*100:.2f}%'
        })

In [23]:
@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.
    correct = 0
    for x, y in tt_ds_loader:
        data, target = x.to(DEVICE), y.to(DEVICE)
        output = model(data)
        loss = F.cross_entropy(output, target).item()
        test_loss += loss * len(data)
        pred = (output.argmax(dim=1) == target)
        correct += pred.sum().item()
    test_loss /= len(tt_ds_loader.dataset)
    test_accuracy = correct / len(tt_ds_loader.dataset)
    return test_loss, test_accuracy

In [24]:
from tqdm import tqdm
EPOCHS = 160
ES_PATIENCE = 5
best_loss = float('inf')
es_patience = 0
for epoch in range(1, EPOCHS+1):

    train_bar = tqdm(
        tr_ds_loader,
        desc=f'[Epochs: {epoch}/{EPOCHS}]',
        leave=True
    )
    train(model, train_bar, optimizer)
    scheduler.step()
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)
    tqdm.write(
        f'test_loss: {test_loss:.4f}, '
        f'test_accuracy: {test_accuracy * 100:.2f}%, '
        f'lr: {scheduler.get_last_lr()[0]:.4f}, '
        f'ES_Patience: {es_patience}/{ES_PATIENCE}'
    )

    if best_loss > test_loss:
        best_loss = test_loss
        es_patience = 0
    else:
        es_patience += 1
    if es_patience >= ES_PATIENCE:
        print('Early Stopping이 동작하였습니다.')
        break

[Epochs: 1/160]:  59%|█████▊    | 347/592 [07:05<05:00,  1.23s/it, train_loss=4.4783, train_accuracy=4.58%]


KeyboardInterrupt: 

In [25]:
from torch.cuda.amp import autocast, GradScaler

@torch.no_grad()
def evaluate(model, tt_ds_loader):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    for data, target in tt_ds_loader:
        data, target = data.to(DEVICE, non_blocking=True), target.to(DEVICE, non_blocking=True)

        output = model(data)
        loss = F.cross_entropy(output, target, reduction='sum').item()  # sum으로 바꿔서 정확히 평균 내기

        test_loss += loss
        pred = output.argmax(dim=1)
        correct += (pred == target).sum().item()
        total += data.size(0)

    test_loss /= total
    test_accuracy = correct / total
    return test_loss, test_accuracy


def train(model, train_loader, optimizer, scaler):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # tqdm 바 생성 (기존 train_bar처럼 desc 포함)
    train_bar = tqdm(
        train_loader,
        desc=f'[Epochs: {epoch}/{EPOCHS}] Training',
        leave=True
    )

    for data, target in train_bar:
        data, target = data.to(DEVICE, non_blocking=True), target.to(DEVICE, non_blocking=True)

        optimizer.zero_grad()

        # === AMP 적용 시작 ===
        with autocast(dtype=torch.float16):  # T4 최적: float16
            output = model(data)
            loss = F.cross_entropy(output, target)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        # === AMP 끝 ===

        batch_size = data.size(0)
        running_loss += loss.item() * batch_size
        pred = output.argmax(dim=1)
        correct += (pred == target).sum().item()
        total += batch_size

        # 실시간 postfix 업데이트 (매 배치)
        train_bar.set_postfix({
            'loss': f'{running_loss / total:.4f}',
            'acc': f'{100 * correct / total:.2f}%'
        })

    return running_loss / total, 100 * correct / total


# ====================== 학습 루프 ======================
EPOCHS = 160
ES_PATIENCE = 5
best_loss = float('inf')
es_patience = 0

# GradScaler 한 번만 생성 (재사용)
scaler = GradScaler()

for epoch in range(1, EPOCHS + 1):
    # train
    train_loss, train_acc = train(model, tr_ds_loader, optimizer, scaler)

    # scheduler step (보통 epoch 끝에)
    scheduler.step()

    # evaluate
    test_loss, test_accuracy = evaluate(model, tt_ds_loader)

    # tqdm.write 대신 print (Colab에서 깔끔함)
    print(
        f'Epoch {epoch:3d} | '
        f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% | '
        f'Test Loss: {test_loss:.4f} | Test Acc: {test_accuracy*100:.2f}% | '
        f'LR: {scheduler.get_last_lr()[0]:.6f} | '
        f'ES: {es_patience}/{ES_PATIENCE}'
    )

    # Early Stopping (test_loss 기준)
    if test_loss < best_loss:
        best_loss = test_loss
        es_patience = 0
        # 최적 모델 저장 원하면 여기 추가
        # torch.save(model.state_dict(), 'best_food101_googlenet.pth')
    else:
        es_patience += 1

    if es_patience >= ES_PATIENCE:
        print('=== Early Stopping 발동! ===')
        break

print('학습 완료!')

  scaler = GradScaler()
  with autocast(dtype=torch.float16):  # T4 최적: float16
[Epochs: 1/160] Training: 100%|██████████| 592/592 [09:17<00:00,  1.06it/s, loss=3.8552, acc=11.24%]


Epoch   1 | Train Loss: 3.8552 | Train Acc: 11.24% | Test Loss: 3.6145 | Test Acc: 14.76% | LR: 0.100000 | ES: 0/5


[Epochs: 2/160] Training: 100%|██████████| 592/592 [09:10<00:00,  1.08it/s, loss=3.4474, acc=17.91%]


Epoch   2 | Train Loss: 3.4474 | Train Acc: 17.91% | Test Loss: 3.2549 | Test Acc: 21.66% | LR: 0.100000 | ES: 0/5


[Epochs: 3/160] Training: 100%|██████████| 592/592 [09:16<00:00,  1.06it/s, loss=3.1192, acc=24.21%]


Epoch   3 | Train Loss: 3.1192 | Train Acc: 24.21% | Test Loss: 3.1728 | Test Acc: 22.79% | LR: 0.010000 | ES: 0/5


[Epochs: 4/160] Training:  20%|█▉        | 118/592 [01:46<07:05,  1.11it/s, loss=2.6593, acc=33.24%]


KeyboardInterrupt: 