In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as v2
import time
from datetime import datetime, timedelta
import copy
import pytz  # 시간대 처리
from tqdm import tqdm
from torch.utils.data import DataLoader
from torchvision import models

__kst = pytz.timezone('Asia/Seoul')
__device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# 데이터 전처리 및 증강
def get_data_transforms():
    # 훈련용 데이터 전처리 (데이터 증강 포함)
    train_transform = v2.Compose([
        v2.Resize((224, 224)),  # AlexNet 입력 크기에 맞게 리사이즈
        v2.RandomHorizontalFlip(p=0.5),  # 50% 확률로 좌우 반전
        v2.ToTensor(),
        v2.Normalize(mean=[0.485, 0.456, 0.406],
                     std=[0.229, 0.224, 0.225])  # ImageNet 평균/표준편차
    ])

    # 검증/테스트용 데이터 전처리
    val_transform = v2.Compose([
        v2.Resize((224, 224)),
        v2.ToTensor(),
        v2.Normalize(mean=[0.485, 0.456, 0.406],
                     std=[0.229, 0.224, 0.225])
    ])
    return train_transform, val_transform

# 데이터셋 로드
def load_data():
    train_transform, val_transform = get_data_transforms() # 전처리

    # CIFAR-10 데이터셋 다운로드 및 로드
    train_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=True, download=True, transform=train_transform)

    test_dataset = torchvision.datasets.CIFAR10(
        root='./data', train=False, download=True, transform=val_transform)

    # 검증 세트 생성 (훈련 데이터의 20% 사용)
    train_size = int(0.8 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(
        train_dataset, [train_size, val_size])

    # 검증 데이터셋에 올바른 transform 적용
    val_dataset.dataset = copy.deepcopy(train_dataset.dataset)
    val_dataset.dataset.transform = val_transform

    # DEBUG_ON이 True면 3개만 사용
    if True:
        from torch.utils.data import Subset
        train_dataset = Subset(train_dataset, range(100))
        val_dataset = Subset(val_dataset, range(100))
        test_dataset = Subset(test_dataset, range(100))
        batch_size = 32
    else:
        batch_size = 32

    train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size,
                          shuffle=False, num_workers=2)
    test_loader = DataLoader(test_dataset, batch_size=batch_size,
                           shuffle=False, num_workers=2)

    print(f"훈련 데이터: {len(train_dataset)}개")
    print(f"검증 데이터: {len(val_dataset)}개")
    print(f"테스트 데이터: {len(test_dataset)}개")

    return train_loader, val_loader, test_loader


# 훈련 함수
def train_fn(model, train_loader, criterion, optimizer, device, epoch=None, epochs=None):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    res = {}
    desc = f"Train [{epoch+1}/{epochs}]" if epoch is not None and epochs is not None else "Train"
    tqdm_kwargs = {} 
    tqdm_kwargs['disable'] = False
    tqdm_kwargs['mininterval'] = 1
    pbar = tqdm(train_loader, leave=True, desc=desc, position=0, **tqdm_kwargs)
    for images, labels in pbar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        batch_size = labels.size(0)
        # criterion이 기본(reduction='mean')이라 가정하여 샘플 기준 누적
        running_loss += loss.item() * batch_size
        _, predicted = torch.max(outputs, 1)
        total += batch_size
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total if total > 0 else 0.0
    epoch_acc = correct / total if total > 0 else 0.0
    res['loss'] = epoch_loss
    res['acc'] = epoch_acc
    return res

# 검증 함수
def evaluate_fn(model, val_loader, criterion, device, epoch, epochs):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    res = {}
    desc = f"Val   [{epoch+1}/{epochs}]"
    tqdm_kwargs = {} 
    tqdm_kwargs['disable'] = False
    tqdm_kwargs['mininterval'] = 1
    with torch.no_grad():
        pbar = tqdm(val_loader, leave=True, desc=desc, position=0, **tqdm_kwargs)
        for images, labels in pbar:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            batch_size = labels.size(0)
            running_loss += loss.item() * batch_size
            _, predicted = torch.max(outputs, 1)
            total += batch_size
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / total if total > 0 else 0.0
    epoch_acc = correct / total if total > 0 else 0.0
    res['loss'] = epoch_loss
    res['acc'] = epoch_acc
    return res

# 모델링 함수 (훈련 + 검증)
def modeling_fn(model, epochs, train_loader, val_loader, criterion, optimizer, scheduler, device):
    import sys
    history = []
    best_val_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
 
    start_time = time.time()
    epoch_start_time = None

    estimated_completion_str = None
    
    tqdm_kwargs = {} 
    tqdm_kwargs['disable'] = False
    tqdm_kwargs['mininterval'] = 1
    epoch = 0
    desc = f"Epoch [{epoch+1}/{epochs}]"
    pbar = tqdm(range(epochs), leave=True, desc=desc, position=0, **tqdm_kwargs)
    for epoch in pbar:
        sys.stdout.flush()  # 출력 버퍼 비우기
        epoch_start_time = time.time()
        pbar.set_description(f"Epoch [{epoch+1}/{epochs}]")
        
        train_res = train_fn(model, train_loader, criterion, optimizer, device, epoch=epoch, epochs=epochs)
        val_res = evaluate_fn(model, val_loader, criterion, device, epoch=epoch, epochs=epochs)
        if estimated_completion_str is not None:
            pbar.set_postfix_str("종료 " + estimated_completion_str)
            
        # 스케줄러 업데이트
        if scheduler:
            scheduler.step()

        res = {
            "epoch": epoch,
            "epochs": epochs,
            "train": train_res,
            "val": val_res,
        }

        history.append(res)

        # 최고 성능 모델 저장
        if val_res['acc'] > best_val_acc:
            best_val_acc = val_res['acc']
            best_model_wts = copy.deepcopy(model.state_dict())

        # 에포크 완료 시간 계산
        epoch_elapsed = time.time() - epoch_start_time

        # 첫 번째 에포크 완료 후 예상 완료 시간 계산
        if epoch == 0 and epochs > 1:
            remaining_epochs = epochs - 1
            estimated_total_time = epoch_elapsed * epochs
            estimated_completion = datetime.now(__kst) + timedelta(seconds=epoch_elapsed * remaining_epochs)
            estimated_completion_str = estimated_completion.strftime('%Y-%m-%d %H:%M:%S KST')
            pbar.set_postfix_str("종료 " + estimated_completion_str)
            
    total_elapsed = time.time() - start_time
    completion_time = datetime.now(__kst)

    # 최고 성능 모델 가중치 로드
    model.load_state_dict(best_model_wts)
    return model, history, completion_time, total_elapsed

# 테스트 함수
def test_model(model, test_loader, device):
    """모델 테스트"""
    model.eval()
    correct = 0
    total = 0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))

    with torch.no_grad():
        pbar = tqdm(test_loader, leave=True, desc="Testing", position=0)
        for images, labels in pbar:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # 클래스별 정확도 계산
            c = (predicted == labels).squeeze()
            for i in range(labels.size(0)):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1

    accuracy = 100 * correct / total
    return accuracy, class_total, classes, class_correct

def running():
    train_loader, val_loader, test_loader = load_data()
    # 먼저 사전 훈련된 모델을 1000 클래스로 로드
    model = models.alexnet(weights=models.AlexNet_Weights.DEFAULT, progress=True)
    
    # 마지막 분류 레이어를 CIFAR-10에 맞게 교체 (1000 -> 10)
    model.classifier[6] = nn.Linear(model.classifier[6].in_features, 10)
    model = model.to(__device)
    
    # 손실 함수와 옵티마이저 설정
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)
    
    # 모델 학습
    model, history, completion_time, total_elapsed = modeling_fn(model, 5, train_loader, val_loader, criterion, optimizer, scheduler, __device)
    best = max(history, key=lambda x: x['val']['acc'])

    test_acc, test_class_total, test_classes, test_class_correct = test_model(model, test_loader, __device)

    print(f"total : {total_elapsed:.2f} seconds")
    print(f"test acc: {test_acc:.2f}")
    for i in range(10):
        print(f"class {i}: {test_class_correct[i]}/{test_class_total[i]} ({100 * test_class_correct[i] / test_class_total[i]:.2f}%)")
    print(f"best acc: {best['val']['acc']:.2f}% at epoch {best['epoch']+1}")
    
    return model

model = running()

# 모델 학습
from torchinfo import summary
summary(res['model'], input_size = (1, 3, 224, 224 ))

Files already downloaded and verified
Files already downloaded and verified
훈련 데이터: 100개
검증 데이터: 100개
테스트 데이터: 100개


Train [1/5]: 100%|██████████| 4/4 [00:34<00:00,  8.67s/it]
Val   [1/5]: 100%|██████████| 4/4 [00:44<00:00, 11.04s/it]
Train [2/5]: 100%|██████████| 4/4 [00:39<00:00,  9.84s/it] 종료 2025-08-18 21:20:37 KST]
Val   [2/5]: 100%|██████████| 4/4 [00:19<00:00,  4.86s/it]
Train [3/5]: 100%|██████████| 4/4 [00:23<00:00,  5.78s/it] 종료 2025-08-18 21:20:37 KST]
Val   [3/5]: 100%|██████████| 4/4 [00:20<00:00,  5.21s/it]
Train [4/5]: 100%|██████████| 4/4 [00:21<00:00,  5.28s/it] 종료 2025-08-18 21:20:37 KST]
Val   [4/5]: 100%|██████████| 4/4 [00:19<00:00,  4.91s/it]
Train [5/5]: 100%|██████████| 4/4 [00:24<00:00,  6.15s/it] 종료 2025-08-18 21:20:37 KST]
Val   [5/5]: 100%|██████████| 4/4 [00:21<00:00,  5.40s/it]
Epoch [5/5]: 100%|██████████| 5/5 [04:28<00:00, 53.76s/it, 종료 2025-08-18 21:20:37 KST]
Testing: 100%|██████████| 4/4 [00:16<00:00,  4.05s/it]

total : 268.79 seconds
test acc: 58.00
class 0: 4.0/10.0 (40.00%)
class 1: 4.0/6.0 (66.67%)
class 2: 0.0/8.0 (0.00%)
class 3: 7.0/10.0 (70.00%)
class 4: 1.0/7.0 (14.29%)
class 5: 3.0/8.0 (37.50%)
class 6: 14.0/16.0 (87.50%)
class 7: 6.0/11.0 (54.55%)
class 8: 11.0/13.0 (84.62%)
class 9: 8.0/11.0 (72.73%)
best acc: 0.47% at epoch 5





Layer (type:depth-idx)                   Output Shape              Param #
AlexNet                                  [1, 10]                   --
├─Sequential: 1-1                        [1, 256, 6, 6]            --
│    └─Conv2d: 2-1                       [1, 64, 55, 55]           23,296
│    └─ReLU: 2-2                         [1, 64, 55, 55]           --
│    └─MaxPool2d: 2-3                    [1, 64, 27, 27]           --
│    └─Conv2d: 2-4                       [1, 192, 27, 27]          307,392
│    └─ReLU: 2-5                         [1, 192, 27, 27]          --
│    └─MaxPool2d: 2-6                    [1, 192, 13, 13]          --
│    └─Conv2d: 2-7                       [1, 384, 13, 13]          663,936
│    └─ReLU: 2-8                         [1, 384, 13, 13]          --
│    └─Conv2d: 2-9                       [1, 256, 13, 13]          884,992
│    └─ReLU: 2-10                        [1, 256, 13, 13]          --
│    └─Conv2d: 2-11                      [1, 256, 13, 13]         