# Image Resize

제공되는 ViT model이 input이 224라, (CIFAR 10 은 32x32) 32->224로 resize했는데 accuracy가 20~30%.


모델을 32로 맞추는 게 맞는듯.

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
from torchvision import datasets
from timm.models.vision_transformer import VisionTransformer
from timm import create_model

import tqdm

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = create_model('vit_base_patch16_224', pretrained=False)

In [8]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [10]:
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [11]:
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
def train(model, train_loader, criterion, optimizer, epoch, device):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(train_loader)

In [13]:
def test(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    correct = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            test_loss += criterion(outputs, labels).item()

            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
    
    accuracy = 100 * correct / len(test_loader.dataset)
    return test_loss / len(test_loader), accuracy

In [None]:
num_epochs = 30

for epoch in tqdm.tqdm(range(num_epochs)):
    start_time = time.time()
    train_loss = train(model, train_loader, criterion, optimizer, epoch, device)
    val_loss, val_accuracy = test(model, valid_loader, criterion, device)

    print(f"Epoch {epoch + 1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val Accuracy: {val_accuracy:.2f}%")
    
    end_time = time.time()
    print(f"Epoch {epoch + 1} took {end_time - start_time:.2f} seconds")

In [None]:
test_loss, test_accuracy = test(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f} - Test Accuracy: {test_accuracy:.2f}%")