In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, ConcatDataset
import numpy as np
from tqdm import tqdm

import itertools
from torchinfo import summary

In [3]:
ViT = timm.create_model('vit_base_patch16_224', pretrained=False)

In [4]:
model = ViT

In [5]:
print(model)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

batch_size = 32
learning_rate = 0.001
num_epochs = 50

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [6]:
model.head = nn.Linear(model.head.in_features, 37)

In [7]:
print(summary(model, input_size=(32, 3, 224, 224)))

Layer (type:depth-idx)                   Output Shape              Param #
VisionTransformer                        [32, 37]                  152,064
├─PatchEmbed: 1-1                        [32, 196, 768]            --
│    └─Conv2d: 2-1                       [32, 768, 14, 14]         590,592
│    └─Identity: 2-2                     [32, 196, 768]            --
├─Dropout: 1-2                           [32, 197, 768]            --
├─Identity: 1-3                          [32, 197, 768]            --
├─Identity: 1-4                          [32, 197, 768]            --
├─Sequential: 1-5                        [32, 197, 768]            --
│    └─Block: 2-3                        [32, 197, 768]            --
│    │    └─LayerNorm: 3-1               [32, 197, 768]            1,536
│    │    └─Attention: 3-2               [32, 197, 768]            2,362,368
│    │    └─Identity: 3-3                [32, 197, 768]            --
│    │    └─Identity: 3-4                [32, 197, 768]          

In [10]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
trainval_data = datasets.OxfordIIITPet(root="data", split="trainval", target_types="category", download=True, transform=transform)
test_data = datasets.OxfordIIITPet(root="data", split="test", target_types="category", download=True, transform=transform)
combined_data = ConcatDataset([trainval_data, test_data])

train_size = int(0.7 * len(combined_data))
val_size = int(0.15 * len(combined_data))
test_size = len(combined_data) - train_size - val_size
train_data, val_data, test_data = random_split(combined_data, [train_size, val_size, test_size])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

print(f"Train set size: {len(train_data)}")
print(f"Validation set size: {len(val_data)}")
print(f"Test set size: {len(test_data)}")

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

Downloading https://thor.robots.ox.ac.uk/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


100%|██████████| 792M/792M [00:35<00:00, 22.1MB/s]


Extracting data/oxford-iiit-pet/images.tar.gz to data/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/pets/annotations.tar.gz to data/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19.2M/19.2M [00:02<00:00, 9.17MB/s]


Extracting data/oxford-iiit-pet/annotations.tar.gz to data/oxford-iiit-pet
Train set size: 5144
Validation set size: 1102
Test set size: 1103


In [17]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in tqdm(train_loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Train Loss: {epoch_loss:.4f}, Train Accuracy: {accuracy:.2f}%")

In [18]:
def evaluate(model, data_loader, criterion, device, phase="Validation"):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc=f"{phase}"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total
    print(f"{phase} Loss: {epoch_loss:.4f}, {phase} Accuracy: {accuracy:.2f}%")

In [19]:
def measure_inference_time(model, data_loader, device):
    model.eval()
    times = []

    with torch.no_grad():
        for inputs, _ in data_loader:
            inputs = inputs.to(device)
            start_time = torch.cuda.Event(enable_timing=True)
            end_time = torch.cuda.Event(enable_timing=True)

            start_time.record()
            _ = model(inputs)  # inference 수행
            end_time.record()

            # 시간 측정
            torch.cuda.synchronize()  # CUDA에서 모든 커널이 완료될 때까지 대기
            elapsed_time = start_time.elapsed_time(end_time)  # 밀리초 단위로 반환
            times.append(elapsed_time)

    # 통계량 계산
    times_np = np.array(times)
    total_inferences = len(times_np)
    avg_time = np.mean(times_np)
    std_dev = np.std(times_np)
    max_time = np.max(times_np)
    min_time = np.min(times_np)

    # 결과 출력
    print(f"Inference Time Measurement Results:")
    print(f"Total Inferences: {total_inferences}")
    print(f"Average Time: {avg_time:.2f} ms")
    print(f"Standard Deviation: {std_dev:.2f} ms")
    print(f"Maximum Time: {max_time:.2f} ms")
    print(f"Minimum Time: {min_time:.2f} ms")

    return times

In [20]:
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    train(model, train_loader, criterion, optimizer, device)
    evaluate(model, val_loader, criterion, device, phase="Validation")


Epoch 1/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.69it/s]


Train Loss: 3.8612, Train Accuracy: 2.26%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.36it/s]


Validation Loss: 3.7756, Validation Accuracy: 2.00%

Epoch 2/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.70it/s]


Train Loss: 3.7389, Train Accuracy: 2.47%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.43it/s]


Validation Loss: 3.7372, Validation Accuracy: 3.27%

Epoch 3/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.6915, Train Accuracy: 3.11%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.6402, Validation Accuracy: 2.81%

Epoch 4/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.6419, Train Accuracy: 3.52%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.34it/s]


Validation Loss: 3.6302, Validation Accuracy: 3.27%

Epoch 5/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.6342, Train Accuracy: 3.34%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.32it/s]


Validation Loss: 3.5946, Validation Accuracy: 2.99%

Epoch 6/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.6250, Train Accuracy: 3.34%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.41it/s]


Validation Loss: 3.6191, Validation Accuracy: 3.81%

Epoch 7/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.6105, Train Accuracy: 3.58%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.6078, Validation Accuracy: 3.90%

Epoch 8/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5914, Train Accuracy: 3.99%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5908, Validation Accuracy: 4.08%

Epoch 9/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5682, Train Accuracy: 4.32%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5448, Validation Accuracy: 5.17%

Epoch 10/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5522, Train Accuracy: 4.37%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.38it/s]


Validation Loss: 3.5604, Validation Accuracy: 3.09%

Epoch 11/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5426, Train Accuracy: 4.20%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.32it/s]


Validation Loss: 3.5335, Validation Accuracy: 4.63%

Epoch 12/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5410, Train Accuracy: 4.53%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5450, Validation Accuracy: 4.08%

Epoch 13/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5413, Train Accuracy: 4.74%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5435, Validation Accuracy: 4.63%

Epoch 14/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5325, Train Accuracy: 4.69%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5142, Validation Accuracy: 5.54%

Epoch 15/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5272, Train Accuracy: 5.00%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5421, Validation Accuracy: 4.08%

Epoch 16/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5349, Train Accuracy: 5.00%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.36it/s]


Validation Loss: 3.5406, Validation Accuracy: 5.81%

Epoch 17/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5328, Train Accuracy: 4.82%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5345, Validation Accuracy: 5.54%

Epoch 18/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5797, Train Accuracy: 4.16%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5803, Validation Accuracy: 3.90%

Epoch 19/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5731, Train Accuracy: 3.69%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5718, Validation Accuracy: 4.99%

Epoch 20/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5703, Train Accuracy: 4.22%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5735, Validation Accuracy: 3.81%

Epoch 21/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5633, Train Accuracy: 3.73%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.33it/s]


Validation Loss: 3.5766, Validation Accuracy: 4.26%

Epoch 22/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5635, Train Accuracy: 4.16%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.36it/s]


Validation Loss: 3.5869, Validation Accuracy: 4.26%

Epoch 23/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5872, Train Accuracy: 3.77%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5735, Validation Accuracy: 4.36%

Epoch 24/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5743, Train Accuracy: 4.30%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.38it/s]


Validation Loss: 3.5754, Validation Accuracy: 4.72%

Epoch 25/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5593, Train Accuracy: 4.41%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.37it/s]


Validation Loss: 3.5381, Validation Accuracy: 5.26%

Epoch 26/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5463, Train Accuracy: 4.45%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.41it/s]


Validation Loss: 3.5377, Validation Accuracy: 4.08%

Epoch 27/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5425, Train Accuracy: 4.51%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5340, Validation Accuracy: 4.45%

Epoch 28/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5351, Train Accuracy: 5.03%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.41it/s]


Validation Loss: 3.5280, Validation Accuracy: 4.90%

Epoch 29/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5179, Train Accuracy: 5.31%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.41it/s]


Validation Loss: 3.5381, Validation Accuracy: 4.99%

Epoch 30/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5338, Train Accuracy: 4.84%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5333, Validation Accuracy: 5.08%

Epoch 31/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5495, Train Accuracy: 4.92%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.34it/s]


Validation Loss: 3.5482, Validation Accuracy: 4.45%

Epoch 32/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5321, Train Accuracy: 5.17%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.33it/s]


Validation Loss: 3.5563, Validation Accuracy: 3.81%

Epoch 33/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5438, Train Accuracy: 4.30%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5365, Validation Accuracy: 5.08%

Epoch 34/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5490, Train Accuracy: 4.74%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.42it/s]


Validation Loss: 3.5345, Validation Accuracy: 5.26%

Epoch 35/50


Training: 100%|██████████| 161/161 [00:58<00:00,  2.73it/s]


Train Loss: 3.5386, Train Accuracy: 4.80%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.42it/s]


Validation Loss: 3.5375, Validation Accuracy: 4.63%

Epoch 36/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5408, Train Accuracy: 4.76%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5516, Validation Accuracy: 3.72%

Epoch 37/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5489, Train Accuracy: 4.72%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.36it/s]


Validation Loss: 3.5725, Validation Accuracy: 4.72%

Epoch 38/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5529, Train Accuracy: 4.47%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.33it/s]


Validation Loss: 3.5435, Validation Accuracy: 4.72%

Epoch 39/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.71it/s]


Train Loss: 3.5443, Train Accuracy: 4.39%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.38it/s]


Validation Loss: 3.5679, Validation Accuracy: 4.45%

Epoch 40/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5457, Train Accuracy: 4.88%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.37it/s]


Validation Loss: 3.5535, Validation Accuracy: 4.17%

Epoch 41/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5660, Train Accuracy: 3.83%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5512, Validation Accuracy: 4.08%

Epoch 42/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5552, Train Accuracy: 4.37%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.37it/s]


Validation Loss: 3.5686, Validation Accuracy: 3.81%

Epoch 43/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5491, Train Accuracy: 4.35%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5687, Validation Accuracy: 4.90%

Epoch 44/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5546, Train Accuracy: 4.14%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.40it/s]


Validation Loss: 3.5575, Validation Accuracy: 3.90%

Epoch 45/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5468, Train Accuracy: 4.39%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.38it/s]


Validation Loss: 3.5507, Validation Accuracy: 3.90%

Epoch 46/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5639, Train Accuracy: 4.43%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5514, Validation Accuracy: 4.63%

Epoch 47/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5598, Train Accuracy: 4.16%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5569, Validation Accuracy: 4.72%

Epoch 48/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.73it/s]


Train Loss: 3.5569, Train Accuracy: 4.94%


Validation: 100%|██████████| 35/35 [00:08<00:00,  4.35it/s]


Validation Loss: 3.5554, Validation Accuracy: 4.54%

Epoch 49/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5487, Train Accuracy: 4.57%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]


Validation Loss: 3.5589, Validation Accuracy: 3.90%

Epoch 50/50


Training: 100%|██████████| 161/161 [00:59<00:00,  2.72it/s]


Train Loss: 3.5519, Train Accuracy: 4.72%


Validation: 100%|██████████| 35/35 [00:07<00:00,  4.39it/s]

Validation Loss: 3.5422, Validation Accuracy: 4.72%





In [21]:
print("\nFinal Test Evaluation")
evaluate(model, test_loader, criterion, device, phase="Test")


Final Test Evaluation


Test: 100%|██████████| 35/35 [00:08<00:00,  4.31it/s]

Test Loss: 3.5597, Test Accuracy: 4.62%





In [22]:
times = measure_inference_time(model, test_loader, device)

Inference Time Measurement Results:
Total Inferences: 35
Average Time: 79.19 ms
Standard Deviation: 7.08 ms
Maximum Time: 86.26 ms
Minimum Time: 40.93 ms
