<a href="https://colab.research.google.com/github/dokyoungleee/skumldk/blob/main/convmixer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import timm
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torchvision.transforms.autoaugment import RandAugment
from timm.data.mixup import Mixup
from torch.optim.lr_scheduler import CosineAnnealingLR
import time

# ConvMixer 모델 정의
def create_convmixer_cifar10(num_classes=10):
    model = timm.models.convmixer.ConvMixer(
        dim=128,        # Hidden Dimension
        depth=4,        # Number of Layers
        kernel_size=8,  # Kernel Size in Depthwise Convolution
        patch_size=1,   # Patch Size
        num_classes=num_classes,  # CIFAR-10 classes
        dropout=0.1     # Dropout
    )
    return model

# CIFAR-10 데이터셋 준비
def get_dataloaders(batch_size):
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),  # Random crop with padding
        transforms.RandomHorizontalFlip(),    # Horizontal Flip
        RandAugment(num_ops=3, magnitude=9),  # RandAugment
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalization
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalization
    ])

    train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform_train, download=True)
    test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform_test, download=True)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

    return train_loader, test_loader

# 학습 루프 정의
def train(model, dataloader, optimizer, criterion, device, mixup_fn):
    model.train()
    total_loss = 0
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputs, targets = mixup_fn(inputs, targets)  # Mixup 적용
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

# 평가 루프 정의
def evaluate(model, dataloader, criterion, device):
    model.eval()
    correct = 0
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()
    return total_loss / len(dataloader), correct / len(dataloader.dataset)

# 하이퍼파라미터 설정
batch_size = 128
learning_rate = 0.01
num_epochs = 150
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# ConvMixer 모델 생성
model = create_convmixer_cifar10(num_classes=10).to(device)

# Mixup 설정
mixup_fn = Mixup(
    mixup_alpha=0.2,  # Mixup alpha 값
    cutmix_alpha=0.3,  # CutMix alpha 값
    prob=0.7,          # Mixup/CutMix 확률
    label_smoothing=0.1,  # Label smoothing
    num_classes=10     # CIFAR-10 classes
)

# 데이터 로더 준비
train_loader, test_loader = get_dataloaders(batch_size)

# 손실 함수와 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=learning_rate)

# 러닝 레이트 스케줄러 정의
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

# 학습 및 평가 루프 실행
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss = train(model, train_loader, optimizer, criterion, device, mixup_fn)
    test_loss, test_acc = evaluate(model, test_loader, criterion, device)
    scheduler.step()  # Learning rate scheduling
    end_time = time.time()

    epoch_duration = end_time - start_time
    print(f"Epoch {epoch + 1}/{num_epochs} | Train Loss: {train_loss:.4f} | Test Loss: {test_loss:.4f} | Test Accuracy: {test_acc:.4f} | Time: {epoch_duration:.2f}s")

Files already downloaded and verified
Files already downloaded and verified
Epoch 1/150 | Train Loss: 1.8603 | Test Loss: 1.1100 | Test Accuracy: 0.6073 | Time: 70.49s
Epoch 2/150 | Train Loss: 1.5841 | Test Loss: 0.9986 | Test Accuracy: 0.6501 | Time: 70.38s
Epoch 3/150 | Train Loss: 1.4969 | Test Loss: 0.8076 | Test Accuracy: 0.7318 | Time: 70.90s
Epoch 4/150 | Train Loss: 1.4492 | Test Loss: 0.7577 | Test Accuracy: 0.7502 | Time: 71.44s
Epoch 5/150 | Train Loss: 1.4246 | Test Loss: 0.7120 | Test Accuracy: 0.7729 | Time: 71.33s
Epoch 6/150 | Train Loss: 1.3682 | Test Loss: 0.6418 | Test Accuracy: 0.7981 | Time: 70.37s
Epoch 7/150 | Train Loss: 1.3688 | Test Loss: 0.6581 | Test Accuracy: 0.7973 | Time: 71.88s
Epoch 8/150 | Train Loss: 1.3354 | Test Loss: 0.5746 | Test Accuracy: 0.8123 | Time: 70.92s
Epoch 9/150 | Train Loss: 1.3444 | Test Loss: 0.5932 | Test Accuracy: 0.8158 | Time: 70.06s
Epoch 10/150 | Train Loss: 1.3022 | Test Loss: 0.6168 | Test Accuracy: 0.8120 | Time: 70.19s
Epo