In [1]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [2]:
import timm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, ConcatDataset
import numpy as np
from tqdm import tqdm

import itertools
from torchinfo import summary

In [3]:
class ConvNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=2, padding=1):
        super(ConvNorm, self).__init__()
        self.linear = nn.Conv2d(
            in_channels, out_channels, kernel_size=kernel_size,
            stride=stride, padding=padding, bias=False
        )
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        x = self.linear(x)
        x = self.bn(x)
        return x

In [4]:
class Stem16(nn.Module):
    def __init__(self):
        super(Stem16, self).__init__()
        self.conv1 = ConvNorm(3, 32)
        self.act1 = nn.Hardswish()
        self.conv2 = ConvNorm(32, 64)
        self.act2 = nn.Hardswish()
        self.conv3 = ConvNorm(64, 128)
        self.act3 = nn.Hardswish()
        self.conv4 = ConvNorm(128, 256)

    def forward(self, x):
        x = self.act1(self.conv1(x))
        x = self.act2(self.conv2(x))
        x = self.act3(self.conv3(x))
        x = self.conv4(x)
        return x

In [5]:
class LinearNorm(nn.Module):
    def __init__(self, in_features, out_features):
        super(LinearNorm, self).__init__()
        self.linear = nn.Linear(in_features, out_features, bias=False)
        self.bn = nn.BatchNorm1d(out_features)

    def forward(self, x):

        if x.dim() == 3:
            B, N, C = x.shape
            x = x.reshape(B * N, C)
            x = self.bn(self.linear(x))
            x = x.reshape(B, N, -1)
        else:
            x = self.bn(self.linear(x))
        return x

In [6]:
class Attention(nn.Module):
    def __init__(self, dim, num_heads, attn_ratio=2):
        super(Attention, self).__init__()
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5
        inner_dim = head_dim * num_heads * 3
        self.qkv = LinearNorm(dim, inner_dim)

        self.proj = nn.Sequential(
            nn.Hardswish(),
            LinearNorm(dim, dim)
        )

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x)
        qkv = qkv.view(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        return self.proj(x)

In [7]:
class LevitMlp(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(LevitMlp, self).__init__()
        self.ln1 = LinearNorm(in_features, hidden_features)
        self.act = nn.Hardswish()
        self.drop = nn.Dropout(p=0.0, inplace=False)
        self.ln2 = LinearNorm(hidden_features, out_features)

    def forward(self, x):
        x = self.ln1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.ln2(x)
        return x

In [88]:
class LevitBlock(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio=2):
        super(LevitBlock, self).__init__()
        self.attn = Attention(dim, num_heads)
        self.drop_path1 = nn.Identity()
        self.mlp = LevitMlp(dim, dim * mlp_ratio, dim)
        self.drop_path2 = nn.Identity()

    def forward(self, x):
        #print("Input to LevitBlock:", x.shape)
        x = x + self.drop_path1(self.attn(x))  # Attention 연산
        #print("After Attention:", x.shape)
        x = x + self.drop_path2(self.mlp(x))  # MLP 연산
        #print("After MLP:", x.shape)
        return x

In [9]:
class AttentionDownsample(nn.Module):
    def __init__(self, dim, out_dim, num_heads, attn_ratio=2):
        super(AttentionDownsample, self).__init__()
        self.num_heads = num_heads
        self.scale = (dim // num_heads) ** -0.5
        inner_dim = dim * attn_ratio * num_heads
        self.kv = LinearNorm(dim, inner_dim)

        self.q = nn.Sequential(
            nn.Conv2d(dim, dim, kernel_size=2, stride=2),
            nn.Flatten(start_dim=1)
        )

        self.proj = nn.Sequential(
            nn.Hardswish(),
            LinearNorm(dim, out_dim)
        )

    def forward(self, x):
        B, N, C = x.shape
        H = W = int(N ** 0.5)
        x = x.reshape(B, C, H, W)

        kv = self.kv(x.flatten(2).transpose(1, 2))
        q = self.q(x)

        q = q.reshape(B, -1, C)
        x = self.proj(q)
        return x

In [89]:
class LevitDownsample(nn.Module):
    def __init__(self, dim, out_dim, num_heads, attn_ratio=2):
        super(LevitDownsample, self).__init__()
        self.attn_downsample = AttentionDownsample(dim, out_dim, num_heads, attn_ratio)
        self.mlp = LevitMlp(out_dim, out_dim * attn_ratio, out_dim)
        self.drop_path = nn.Identity()

    def forward(self, x):
        #print("Before Downsample:", x.shape)

        # Convert 4D to 3D
        if x.dim() == 4:  # If input is 4D, convert to 3D
            B, C, H, W = x.shape
            x = x.view(B, C, -1).transpose(1, 2)  # B, N, C
            #print("After 4D to 3D conversion for Downsample:", x.shape)

        x = self.attn_downsample(x)
        #print("After attn_downsample:", x.shape)
        x = self.drop_path(self.mlp(x))
        #print("After MLP in Downsample:", x.shape)

        return x


In [90]:
class LevitStage(nn.Module):
    def __init__(self, dim, out_dim, num_heads, num_blocks, downsample=True):
        super(LevitStage, self).__init__()
        self.downsample = LevitDownsample(dim, out_dim, num_heads) if downsample else nn.Identity()
        self.blocks = nn.Sequential(*[LevitBlock(out_dim, num_heads) for _ in range(num_blocks)])

    def forward(self, x):
        x = self.downsample(x)
        x = self.blocks(x)
        return x

In [91]:
class ConvLevitStage(nn.Module):
    def __init__(self, in_channels, out_channels, num_blocks, kernel_size, stride, padding):
        super(ConvLevitStage, self).__init__()
        self.layers = nn.Sequential(
            *[nn.Conv2d(in_channels if i == 0 else out_channels, out_channels, kernel_size, stride, padding)
              for i in range(num_blocks)],
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        #print("Input to ConvLevitStage:", x.shape)  # Debugging 출력
        x = self.layers(x)
        #print("Output of ConvLevitStage:", x.shape)  # Debugging 출력
        return x

In [92]:
class NormLinear(nn.Module):
    def __init__(self, in_features, out_features, dropout_prob=0.0):
        super(NormLinear, self).__init__()
        self.bn = nn.BatchNorm1d(in_features)
        self.drop = nn.Dropout(p=dropout_prob, inplace=False)
        self.linear = nn.Linear(in_features, out_features, bias=True)

    def forward(self, x):
        x = self.bn(x)
        x = self.drop(x)
        x = self.linear(x)
        return x


In [93]:
class LevitDistilled(nn.Module):
    def __init__(self, num_classes=37):
        super(LevitDistilled, self).__init__()
        self.stem = Stem16()

        self.stage1 = ConvLevitStage(
            in_channels=256,
            out_channels=256,
            num_blocks=3,
            kernel_size=1,
            stride=1,
            padding=0
        )

        self.stage2 = LevitStage(dim=256, out_dim=384, num_heads=6, num_blocks=3, downsample=True)
        self.stage3 = LevitStage(dim=384, out_dim=512, num_heads=8, num_blocks=2, downsample=True)

        self.head = NormLinear(in_features=512, out_features=num_classes, dropout_prob=0.0)
        self.head_dist = NormLinear(in_features=512, out_features=num_classes, dropout_prob=0.0)

    def forward(self, x):
        # Stem
        x = self.stem(x)
        #print("After Stem:", x.shape)

        # Stage 1
        x = self.stage1(x)
        #print("After Stage 1:", x.shape)

        # Convert to 3D for stage2
        B, C, H, W = x.shape
        x = x.view(B, C, -1).transpose(1, 2)  # B, N, C
        #print("After 3D conversion for Stage 2:", x.shape)

        # Stage 2
        x = self.stage2(x)
        #print("After Stage 2:", x.shape)

        # Convert back to 4D for stage3
        H = W = int(x.shape[1] ** 0.5)
        x = x.transpose(1, 2).view(B, 384, H, W)
        x = self.stage3(x)

        # Final pooling
        # Handle 3D tensor properly
        if x.dim() == 3:
            x = x.mean(dim=1)  # Mean over the sequence dimension
        elif x.dim() == 4:
            x = torch.mean(x, dim=(2, 3))  # Global Average Pooling for 4D tensor

        # Classification heads
        out = self.head(x)
        out_dist = self.head_dist(x)
        return out

In [94]:
ResNet50 = timm.create_model('resnet50', pretrained=True)

In [95]:
print(summary(ResNet50, input_size=(32, 3, 224, 224)))

Layer (type:depth-idx)                   Output Shape              Param #
ResNet                                   [32, 1000]                --
├─Conv2d: 1-1                            [32, 64, 112, 112]        9,408
├─BatchNorm2d: 1-2                       [32, 64, 112, 112]        128
├─ReLU: 1-3                              [32, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [32, 64, 56, 56]          --
├─Sequential: 1-5                        [32, 256, 56, 56]         --
│    └─Bottleneck: 2-1                   [32, 256, 56, 56]         --
│    │    └─Conv2d: 3-1                  [32, 64, 56, 56]          4,096
│    │    └─BatchNorm2d: 3-2             [32, 64, 56, 56]          128
│    │    └─ReLU: 3-3                    [32, 64, 56, 56]          --
│    │    └─Conv2d: 3-4                  [32, 64, 56, 56]          36,864
│    │    └─BatchNorm2d: 3-5             [32, 64, 56, 56]          128
│    │    └─Identity: 3-6                [32, 64, 56, 56]          --
│ 

In [96]:
for param in ResNet50.parameters():
    param.requires_grad = False

In [97]:
ResNet50.fc = nn.Identity()

In [112]:
class LauncherModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.resnet = ResNet50
        self.fc = nn.Linear(2048, 3 * 56 * 56)

        self.upsample = nn.ConvTranspose2d(3, 3, kernel_size=4, stride=4, padding=0)

        self.levit = LevitDistilled()

    def forward(self, x):
        x = self.resnet(x)  # (32, 2048)
        x = self.fc(x)  # (32, 9408)
        x = x.view(x.size(0), 3, 56, 56)  # (32, 3, 56, 56)
        x = self.upsample(x)  # (32, 3, 224, 224)

        #print("Shape of x after upsampling:", x.shape)

        x = self.levit(x)
        return x


In [113]:
# model = LevitDistilled()
model = LauncherModel()
print(model)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

batch_size = 32
learning_rate = 0.001
num_epochs = 50

LauncherModel(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (act1): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (act1): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (drop_block): Identity()
        (act2): ReLU(inplace=True)
        (aa): Identity()
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05,

In [101]:
print(summary(model, input_size=(32, 3, 224, 224), verbose=2))

Shape of x after upsampling: torch.Size([32, 3, 224, 224])
Layer (type:depth-idx)                                       Output Shape              Param #
LauncherModel                                                [32, 37]                  --
├─ResNet: 1-1                                                [32, 2048]                --
│    └─conv1.weight                                                                    ├─9,408
│    └─bn1.weight                                                                      ├─64
│    └─bn1.bias                                                                        ├─64
│    └─layer1.0.conv1.weight                                                           ├─4,096
│    └─layer1.0.bn1.weight                                                             ├─64
│    └─layer1.0.bn1.bias                                                               ├─64
│    └─layer1.0.conv2.weight                                                           ├─36,864
│    └─layer

In [102]:
model = LauncherModel().to(device)

# 더미 입력 생성 (예: (32, 3, 224, 224))
dummy_input = torch.randn(32, 3, 224, 224).to(device)

# 각 단계의 출력 크기 확인
print("Input shape:", dummy_input.shape)
x = dummy_input

x = model.resnet(x)
print("After ResNet:", x.shape)

x = model.fc(x)
x = x.view(x.size(0), 3, 56, 56)
print("After fc reshape:", x.shape)

x = model.upsample(x)
print("After upsampling:", x.shape)

x = model.levit(x)
print("After LevitDistilled:", x.shape)


Input shape: torch.Size([32, 3, 224, 224])
After ResNet: torch.Size([32, 2048])
After fc reshape: torch.Size([32, 3, 56, 56])
After upsampling: torch.Size([32, 3, 224, 224])
After LevitDistilled: torch.Size([32, 37])


In [104]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [105]:
trainval_data = datasets.OxfordIIITPet(root="data", split="trainval", target_types="category", download=True, transform=transform)
test_data = datasets.OxfordIIITPet(root="data", split="test", target_types="category", download=True, transform=transform)
combined_data = ConcatDataset([trainval_data, test_data])

train_size = int(0.7 * len(combined_data))
val_size = int(0.15 * len(combined_data))
test_size = len(combined_data) - train_size - val_size
train_data, val_data, test_data = random_split(combined_data, [train_size, val_size, test_size])

Downloading https://thor.robots.ox.ac.uk/pets/images.tar.gz to data/oxford-iiit-pet/images.tar.gz


100%|██████████| 792M/792M [00:37<00:00, 21.1MB/s]


Extracting data/oxford-iiit-pet/images.tar.gz to data/oxford-iiit-pet
Downloading https://thor.robots.ox.ac.uk/pets/annotations.tar.gz to data/oxford-iiit-pet/annotations.tar.gz


100%|██████████| 19.2M/19.2M [00:02<00:00, 9.12MB/s]


Extracting data/oxford-iiit-pet/annotations.tar.gz to data/oxford-iiit-pet


In [106]:
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

print(f"Train set size: {len(train_data)}")
print(f"Validation set size: {len(val_data)}")
print(f"Test set size: {len(test_data)}")

Train set size: 5144
Validation set size: 1102
Test set size: 1103


In [107]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [108]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in tqdm(train_loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(f"Train Loss: {epoch_loss:.4f}, Train Accuracy: {accuracy:.2f}%")

In [109]:
def evaluate(model, data_loader, criterion, device, phase="Validation"):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc=f"{phase}"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total
    print(f"{phase} Loss: {epoch_loss:.4f}, {phase} Accuracy: {accuracy:.2f}%")

In [110]:
def measure_inference_time(model, data_loader, device):
    model.eval()
    times = []

    with torch.no_grad():
        for inputs, _ in data_loader:
            inputs = inputs.to(device)
            start_time = torch.cuda.Event(enable_timing=True)
            end_time = torch.cuda.Event(enable_timing=True)

            start_time.record()
            _ = model(inputs)  # inference 수행
            end_time.record()

            # 시간 측정
            torch.cuda.synchronize()  # CUDA에서 모든 커널이 완료될 때까지 대기
            elapsed_time = start_time.elapsed_time(end_time)  # 밀리초 단위로 반환
            times.append(elapsed_time)

    # 통계량 계산
    times_np = np.array(times)
    total_inferences = len(times_np)
    avg_time = np.mean(times_np)
    std_dev = np.std(times_np)
    max_time = np.max(times_np)
    min_time = np.min(times_np)

    # 결과 출력
    print(f"Inference Time Measurement Results:")
    print(f"Total Inferences: {total_inferences}")
    print(f"Average Time: {avg_time:.2f} ms")
    print(f"Standard Deviation: {std_dev:.2f} ms")
    print(f"Maximum Time: {max_time:.2f} ms")
    print(f"Minimum Time: {min_time:.2f} ms")

    return times

In [114]:
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    train(model, train_loader, criterion, optimizer, device)
    evaluate(model, val_loader, criterion, device, phase="Validation")


Epoch 1/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.25it/s]


Train Loss: 3.7489, Train Accuracy: 2.59%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.90it/s]


Validation Loss: 3.7127, Validation Accuracy: 1.36%

Epoch 2/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7373, Train Accuracy: 2.55%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.93it/s]


Validation Loss: 3.7901, Validation Accuracy: 1.36%

Epoch 3/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7621, Train Accuracy: 2.20%


Validation: 100%|██████████| 35/35 [00:05<00:00,  6.02it/s]


Validation Loss: 3.6977, Validation Accuracy: 1.63%

Epoch 4/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.32it/s]


Train Loss: 3.7461, Train Accuracy: 2.00%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.94it/s]


Validation Loss: 3.7766, Validation Accuracy: 0.82%

Epoch 5/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7572, Train Accuracy: 2.04%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.91it/s]


Validation Loss: 3.7766, Validation Accuracy: 1.18%

Epoch 6/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7499, Train Accuracy: 2.20%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.96it/s]


Validation Loss: 3.7227, Validation Accuracy: 1.54%

Epoch 7/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7402, Train Accuracy: 2.59%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.83it/s]


Validation Loss: 3.7726, Validation Accuracy: 1.27%

Epoch 8/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7458, Train Accuracy: 2.35%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.91it/s]


Validation Loss: 3.7471, Validation Accuracy: 1.36%

Epoch 9/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7492, Train Accuracy: 2.41%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.94it/s]


Validation Loss: 3.7498, Validation Accuracy: 1.54%

Epoch 10/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.26it/s]


Train Loss: 3.7499, Train Accuracy: 2.37%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.97it/s]


Validation Loss: 3.7044, Validation Accuracy: 1.81%

Epoch 11/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7468, Train Accuracy: 2.27%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.98it/s]


Validation Loss: 3.7390, Validation Accuracy: 1.36%

Epoch 12/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7563, Train Accuracy: 2.06%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.91it/s]


Validation Loss: 3.7434, Validation Accuracy: 1.27%

Epoch 13/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.29it/s]


Train Loss: 3.7441, Train Accuracy: 2.04%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.86it/s]


Validation Loss: 3.7440, Validation Accuracy: 1.36%

Epoch 14/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.34it/s]


Train Loss: 3.7505, Train Accuracy: 2.14%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.88it/s]


Validation Loss: 3.7295, Validation Accuracy: 1.09%

Epoch 15/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.33it/s]


Train Loss: 3.7449, Train Accuracy: 2.24%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.90it/s]


Validation Loss: 3.7669, Validation Accuracy: 1.18%

Epoch 16/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7457, Train Accuracy: 2.04%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.79it/s]


Validation Loss: 3.8533, Validation Accuracy: 1.18%

Epoch 17/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7503, Train Accuracy: 2.08%


Validation: 100%|██████████| 35/35 [00:05<00:00,  6.00it/s]


Validation Loss: 3.7170, Validation Accuracy: 1.81%

Epoch 18/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.29it/s]


Train Loss: 3.7586, Train Accuracy: 2.06%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.99it/s]


Validation Loss: 3.7128, Validation Accuracy: 1.45%

Epoch 19/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7339, Train Accuracy: 2.62%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.94it/s]


Validation Loss: 3.7749, Validation Accuracy: 1.54%

Epoch 20/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7511, Train Accuracy: 2.20%


Validation: 100%|██████████| 35/35 [00:05<00:00,  6.00it/s]


Validation Loss: 3.8005, Validation Accuracy: 1.72%

Epoch 21/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7473, Train Accuracy: 2.59%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.94it/s]


Validation Loss: 3.7480, Validation Accuracy: 1.72%

Epoch 22/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7519, Train Accuracy: 2.37%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.80it/s]


Validation Loss: 3.8162, Validation Accuracy: 1.00%

Epoch 23/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.25it/s]


Train Loss: 3.7374, Train Accuracy: 2.45%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.84it/s]


Validation Loss: 3.7951, Validation Accuracy: 0.91%

Epoch 24/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.29it/s]


Train Loss: 3.7347, Train Accuracy: 2.82%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.87it/s]


Validation Loss: 3.8105, Validation Accuracy: 0.91%

Epoch 25/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.24it/s]


Train Loss: 3.7579, Train Accuracy: 2.27%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.90it/s]


Validation Loss: 3.7041, Validation Accuracy: 1.54%

Epoch 26/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.25it/s]


Train Loss: 3.7437, Train Accuracy: 2.59%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.95it/s]


Validation Loss: 3.6747, Validation Accuracy: 1.27%

Epoch 27/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.23it/s]


Train Loss: 3.7430, Train Accuracy: 1.98%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.95it/s]


Validation Loss: 3.7775, Validation Accuracy: 1.27%

Epoch 28/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7484, Train Accuracy: 2.12%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.94it/s]


Validation Loss: 3.8235, Validation Accuracy: 1.27%

Epoch 29/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7506, Train Accuracy: 2.64%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.90it/s]


Validation Loss: 3.7866, Validation Accuracy: 1.54%

Epoch 30/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7634, Train Accuracy: 2.31%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.79it/s]


Validation Loss: 3.6933, Validation Accuracy: 1.45%

Epoch 31/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.32it/s]


Train Loss: 3.7560, Train Accuracy: 2.49%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.84it/s]


Validation Loss: 3.7427, Validation Accuracy: 1.54%

Epoch 32/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.32it/s]


Train Loss: 3.7488, Train Accuracy: 2.62%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.87it/s]


Validation Loss: 3.7439, Validation Accuracy: 1.27%

Epoch 33/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7540, Train Accuracy: 2.26%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.92it/s]


Validation Loss: 3.6802, Validation Accuracy: 1.45%

Epoch 34/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7418, Train Accuracy: 2.29%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.92it/s]


Validation Loss: 3.7794, Validation Accuracy: 1.09%

Epoch 35/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7530, Train Accuracy: 1.94%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.91it/s]


Validation Loss: 3.6864, Validation Accuracy: 1.36%

Epoch 36/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7483, Train Accuracy: 2.51%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.93it/s]


Validation Loss: 3.7329, Validation Accuracy: 1.18%

Epoch 37/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.28it/s]


Train Loss: 3.7446, Train Accuracy: 2.08%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.87it/s]


Validation Loss: 3.6712, Validation Accuracy: 1.00%

Epoch 38/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.32it/s]


Train Loss: 3.7464, Train Accuracy: 2.22%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.72it/s]


Validation Loss: 3.7436, Validation Accuracy: 1.54%

Epoch 39/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7478, Train Accuracy: 2.22%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.91it/s]


Validation Loss: 3.7151, Validation Accuracy: 1.45%

Epoch 40/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7509, Train Accuracy: 2.24%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.88it/s]


Validation Loss: 3.7370, Validation Accuracy: 1.54%

Epoch 41/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.33it/s]


Train Loss: 3.7693, Train Accuracy: 2.41%


Validation: 100%|██████████| 35/35 [00:05<00:00,  6.01it/s]


Validation Loss: 3.8191, Validation Accuracy: 0.91%

Epoch 42/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7501, Train Accuracy: 2.70%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.97it/s]


Validation Loss: 3.7699, Validation Accuracy: 1.27%

Epoch 43/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7452, Train Accuracy: 2.26%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.88it/s]


Validation Loss: 3.7297, Validation Accuracy: 1.45%

Epoch 44/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7448, Train Accuracy: 2.27%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.99it/s]


Validation Loss: 3.7429, Validation Accuracy: 1.00%

Epoch 45/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.27it/s]


Train Loss: 3.7380, Train Accuracy: 2.43%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.87it/s]


Validation Loss: 3.7241, Validation Accuracy: 1.54%

Epoch 46/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.34it/s]


Train Loss: 3.7494, Train Accuracy: 2.39%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.89it/s]


Validation Loss: 3.7079, Validation Accuracy: 0.91%

Epoch 47/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7628, Train Accuracy: 2.12%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.87it/s]


Validation Loss: 3.7325, Validation Accuracy: 1.36%

Epoch 48/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.30it/s]


Train Loss: 3.7532, Train Accuracy: 2.02%


Validation: 100%|██████████| 35/35 [00:06<00:00,  5.83it/s]


Validation Loss: 3.6588, Validation Accuracy: 1.36%

Epoch 49/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.32it/s]


Train Loss: 3.7458, Train Accuracy: 2.35%


Validation: 100%|██████████| 35/35 [00:05<00:00,  6.02it/s]


Validation Loss: 3.8021, Validation Accuracy: 1.00%

Epoch 50/50


Training: 100%|██████████| 161/161 [00:30<00:00,  5.31it/s]


Train Loss: 3.7416, Train Accuracy: 2.24%


Validation: 100%|██████████| 35/35 [00:05<00:00,  5.97it/s]

Validation Loss: 3.6905, Validation Accuracy: 1.54%





In [115]:
print("\nFinal Test Evaluation")
evaluate(model, test_loader, criterion, device, phase="Test")


Final Test Evaluation


Test: 100%|██████████| 35/35 [00:06<00:00,  5.67it/s]

Test Loss: 3.7527, Test Accuracy: 1.81%





In [116]:
times = measure_inference_time(model, test_loader, device)

Inference Time Measurement Results:
Total Inferences: 35
Average Time: 19.21 ms
Standard Deviation: 0.62 ms
Maximum Time: 19.35 ms
Minimum Time: 15.63 ms


In [117]:
from torch import profiler

dummy_input = torch.randn(32, 3, 224, 224).cuda()

# Profiling inference
with profiler.profile(
    activities=[
       profiler.ProfilerActivity.CPU,
        profiler.ProfilerActivity.CUDA,  # Include if using GPU
    ],
    on_trace_ready=profiler.tensorboard_trace_handler("./logs"),  # Optional logging
    record_shapes=True,
    with_stack=True
) as prof:
    with torch.no_grad():
        model(dummy_input)


# Print results
print(prof.key_averages().table(sort_by="cuda_time_total" if torch.cuda.is_available() else "cpu_time_total", row_limit=10))

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                      aten::convolution         1.21%     414.375us        37.06%      12.720ms     201.905us       0.000us         0.00%       8.115ms     128.811us            63  
                                     aten::_convolution        10.23%       3.510ms        35.85%      12.306ms     195.328us       0.000us         0.00%       8.115ms     128.811us            63  
         