In [24]:
!pip install pynvml



In [25]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

import matplotlib.pyplot as plt
import numpy as np

import copy
from collections import namedtuple
import time
import os
import random
import re

import cv2
from torch.utils.data import Dataset, DataLoader, Subset
from PIL import Image

from tqdm import tqdm
from pynvml import *
import pandas as pd

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [26]:
def print_gpu_utilization():
    if torch.cuda.is_available():
        device = torch.cuda.current_device()  # 현재 GPU 디바이스 정보
        allocated_memory = torch.cuda.memory_allocated(device) / 1024**3  # 메모리 사용량 (GB)
        reserved_memory = torch.cuda.memory_reserved(device) / 1024**3  # 예약된 메모리 (GB)
        print(f"Allocated Memory: {allocated_memory:.2f} GB")
        print(f"Reserved Memory: {reserved_memory:.2f} GB")
    else:
        print("No GPU available.")

In [27]:
def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()

In [28]:
size = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
batch_size = 32

In [29]:
train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(size, scale=(0.5, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

test_transforms = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(size),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

In [30]:
# CIFAR-10
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transforms)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transforms)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [31]:
VALID_RATIO = 0.7
n_train_examples = int(len(trainset) * VALID_RATIO)
n_valid_examples = len(trainset) - n_train_examples

train_data, valid_data = data.random_split(trainset, [n_train_examples, n_valid_examples])

In [32]:
valid_data = copy.deepcopy(valid_data)
valid_data.dataset.transform = test_transforms

In [33]:
len(train_data), len(valid_data), len(testset)

(35000, 15000, 10000)

In [34]:
sample_fraction = 0.2

# 무작위 인덱스 생성
train_indices = torch.randperm(len(trainset))[:int(len(trainset) * sample_fraction)]
valid_indices = torch.randperm(len(valid_data))[:int(len(valid_data) * sample_fraction)]
test_indices = torch.randperm(len(testset))[:int(len(testset) * sample_fraction)]

# 서브셋 생성
train_subset = Subset(trainset, train_indices)
valid_subset = Subset(valid_data, valid_indices)
test_subset = Subset(testset, test_indices)

In [35]:
len(train_subset), len(valid_subset), len(test_subset)

(10000, 3000, 2000)

In [36]:
train_iterator = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
valid_iterator = DataLoader(valid_subset, batch_size=batch_size, shuffle=False)
test_iterator = DataLoader(test_subset, batch_size=batch_size, shuffle=False)

In [37]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample = False):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        if downsample:
            conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
            bn = nn.BatchNorm2d(out_channels)
            downsample = nn.Sequential(conv, bn)
        else:
            downsample = None
        self.downsample = downsample

    def forward(self, x):
        i = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)

        if self.downsample is not None:
            i = self.downsample(i)

        x += i
        x = self.relu(x)

        return x

In [38]:
class ResNet(nn.Module):
    def __init__(self, config, output_dim, zero_init_residual = False):
        super().__init__()

        block, n_blocks, channels = config
        self.in_channels = channels[0]
        assert len(n_blocks) == len(channels) == 4

        self.conv1 = nn.Conv2d(3, self.in_channels, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self.get_resnet_layer(block, n_blocks[0], channels[0])
        self.layer2 = self.get_resnet_layer(block, n_blocks[1], channels[1], stride=2)
        self.layer3 = self.get_resnet_layer(block, n_blocks[2], channels[2], stride=2)
        self.layer4 = self.get_resnet_layer(block, n_blocks[3], channels[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(self.in_channels, output_dim)

        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
                #elif isinstance(m, Bottleneck):
                    #nn.init.constant_(m.bn3.weight, 0)

    def get_resnet_layer(self, block, n_blocks, channels, stride=1):
        layers = []
        if self.in_channels != block.expansion * channels:
            downsample = True
        else:
            downsample = False
        layers.append(block(self.in_channels, channels, stride, downsample))
        for i in range(1, n_blocks):
            layers.append(block(block.expansion * channels, channels))

        self.in_channels = block.expansion * channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        h = x.view(x.shape[0], -1)
        x = self.fc(h)
        return x, h

In [39]:
ResNetConfig = namedtuple('ResNetConfig', ['block', 'n_blocks', 'channels'])

In [40]:
resnet18_config = ResNetConfig(block = BasicBlock, n_blocks = [2, 2, 2, 2], channels = [64, 128, 256, 512])

In [41]:
pretrained_model = models.resnet18(pretrained=True)



In [42]:
print(pretrained_model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [43]:
model = ResNet(resnet18_config, 10)

In [44]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kerne

In [45]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [47]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

pretrained_model = model.to(device)
criterion = criterion.to(device)

In [48]:
def calculate_accuracy(y_pred, y):
    top_pred = y_pred.argmax(1, keepdim=True)
    correct = top_pred.eq(y.view_as(top_pred)).sum()
    acc = correct.float() / y.shape[0]
    return acc

In [49]:
pattern = re.compile(r'key=(?P<key>\S+)\s+'
                     r'self_cpu_time=(?P<self_cpu_time>\S+)\s+'
                     r'cpu_time=(?P<cpu_time>\S+)\s+'
                     r'self_cuda_time=(?P<self_cuda_time>\S+)\s+'
                     r'cuda_time=(?P<cuda_time>\S+)\s+'
                     r'input_shapes=(?P<input_shapes>\S*)\s*'
                     r'cpu_memory_usage=(?P<cpu_memory_usage>\S*)\s*'
                     r'cuda_memory_usage=(?P<cuda_memory_usage>\S*)')

In [50]:
def train(model, train_loader, criterion, optimizer, device):
    start_time = time.monotonic()
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    # PyTorch Profiler 시작
    with profile(
        activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
        profile_memory=True,  # 메모리 사용량 추적
        record_shapes=True  # 텐서 크기 기록
    ) as prof:
        for inputs, labels in tqdm(train_loader, desc="Training"):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            with record_function("forward_pass"):
                outputs = model(inputs)

            with record_function("loss_computation"):
                loss = criterion(outputs[0], labels)

            with record_function("backward_pass"):
                loss.backward()

            with record_function("optimizer_step"):
                optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs[0], 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    end_time = time.monotonic()

    # 프로파일링 결과 요약 출력
    selected_keys = ["forward_pass", "loss_computation", "backward_pass", "optimizer_step"]

    # key_averages()로부터 얻은 평균값을 필터링
    filtered_averages = [avg for avg in prof.key_averages() if avg.key in selected_keys]
    extracted_data = []

    for avg in filtered_averages:
      avg_str = str(avg)
      match = pattern.search(avg_str)
      if match:
        extracted_data.append(match.groupdict())
    df = pd.DataFrame(extracted_data)
    print(df)
    #print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=20))
    # 훈련 후 평균 손실과 정확도 계산
    epoch_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total

    return epoch_loss, accuracy, start_time, end_time

In [51]:
def evaluate(model, data_loader, criterion, device, phase="Validation"):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc=f"{phase}"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs[0], labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs[0], 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total
    # print(f"{phase} Loss: {epoch_loss:.4f}, {phase} Accuracy: {accuracy:.2f}%")

    return epoch_loss, accuracy

In [52]:
torch.cuda.empty_cache()

In [53]:
free_memory, total_memory = torch.cuda.mem_get_info()
print(f"Free memory: {free_memory / 1024**2:.2f} MB")
print(f"Total memory: {total_memory / 1024**2:.2f} MB")

Free memory: 40026.81 MB
Total memory: 40513.81 MB


In [54]:
EPOCHS = 10
best_valid_loss = float('inf')
total_time = 0
for epoch in range(EPOCHS + 1):

    train_loss, train_acc, start_time, end_time = train(model, train_iterator, criterion, optimizer, device)

    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion, device)
    #if valid_loss < best_valid_loss:
        #best_valid_loss = valid_loss
        #torch.save(model.state_dict(), 'vgg19-model.pt')

    # end_time = time.monotonic()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    total_time += end_time - start_time

    print(f'Epoch: {epoch+1:02} | Epoch Train Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc:.2f}%')

print("Train finished")

Training: 100%|██████████| 313/313 [00:29<00:00, 10.77it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     703.625ms   11.568ms        0.000us    4.884ms   
1      forward_pass       0.000us    0.000us         3.015s    9.603ms   
2  loss_computation      30.271ms  485.389us        0.000us    5.574us   
3  loss_computation       0.000us    0.000us       46.469ms  148.462us   
4     backward_pass        3.611s   11.635ms        0.000us    1.754us   
5     backward_pass       0.000us    0.000us      548.928us    1.759us   
6    optimizer_step      15.550ms    3.128ms        0.000us  718.100us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216600290304>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202091302400>  
5                             0                0>  
6                           248         914

Validation: 100%|██████████| 94/94 [00:04<00:00, 19.92it/s]


Epoch: 01 | Epoch Train Time: 1m 0s
	Train Loss: 2.017 | Train Acc: 24.62%
	 Val. Loss: 1.994 |  Val. Acc: 30.37%


Training: 100%|██████████| 313/313 [00:27<00:00, 11.49it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     726.222ms    8.613ms        0.000us    4.883ms   
1      forward_pass       0.000us    0.000us         2.604s    8.320ms   
2  loss_computation      29.187ms  206.996us        0.000us    5.502us   
3  loss_computation       0.000us    0.000us       19.000ms   60.702us   
4     backward_pass        3.102s    9.961ms        0.000us    1.770us   
5     backward_pass       0.000us    0.000us      554.148us    1.770us   
6    optimizer_step      16.187ms    2.668ms        0.000us  718.352us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216602977280>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:05<00:00, 17.67it/s]


Epoch: 02 | Epoch Train Time: 0m 58s
	Train Loss: 1.781 | Train Acc: 34.19%
	 Val. Loss: 1.732 |  Val. Acc: 35.77%


Training: 100%|██████████| 313/313 [00:26<00:00, 12.02it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     704.554ms    8.408ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.549s    8.145ms   
2  loss_computation      27.051ms  197.959us        0.000us    5.491us   
3  loss_computation       0.000us    0.000us       18.584ms   59.373us   
4     backward_pass        2.985s    9.587ms        0.000us    1.737us   
5     backward_pass       0.000us    0.000us      543.746us    1.737us   
6    optimizer_step      15.212ms    2.654ms        0.000us  720.268us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216596554752>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0         463

Validation: 100%|██████████| 94/94 [00:05<00:00, 16.89it/s]


Epoch: 03 | Epoch Train Time: 0m 57s
	Train Loss: 1.654 | Train Acc: 38.69%
	 Val. Loss: 1.610 |  Val. Acc: 40.47%


Training: 100%|██████████| 313/313 [00:25<00:00, 12.12it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     701.464ms    8.342ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.531s    8.086ms   
2  loss_computation      27.041ms  197.578us        0.000us    5.500us   
3  loss_computation       0.000us    0.000us       18.498ms   59.100us   
4     backward_pass        2.992s    9.607ms        0.000us    1.772us   
5     backward_pass       0.000us    0.000us      554.525us    1.772us   
6    optimizer_step      15.248ms    2.619ms        0.000us  717.605us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216606188544>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:05<00:00, 16.37it/s]


Epoch: 04 | Epoch Train Time: 0m 56s
	Train Loss: 1.521 | Train Acc: 44.14%
	 Val. Loss: 1.446 |  Val. Acc: 46.63%


Training: 100%|██████████| 313/313 [00:26<00:00, 11.70it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     712.330ms    8.465ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.565s    8.195ms   
2  loss_computation      28.195ms  202.309us        0.000us    5.491us   
3  loss_computation       0.000us    0.000us       18.710ms   59.777us   
4     backward_pass        3.019s    9.694ms        0.000us    1.737us   
5     backward_pass       0.000us    0.000us      543.753us    1.737us   
6    optimizer_step      15.843ms    2.656ms        0.000us  715.493us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:05<00:00, 16.27it/s]


Epoch: 05 | Epoch Train Time: 0m 57s
	Train Loss: 1.383 | Train Acc: 50.15%
	 Val. Loss: 1.214 |  Val. Acc: 54.73%


Training: 100%|██████████| 313/313 [00:27<00:00, 11.25it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     709.614ms    8.446ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.562s    8.186ms   
2  loss_computation      26.617ms  195.221us        0.000us    5.506us   
3  loss_computation       0.000us    0.000us       18.395ms   58.769us   
4     backward_pass        3.009s    9.662ms        0.000us    1.773us   
5     backward_pass       0.000us    0.000us      554.880us    1.773us   
6    optimizer_step      15.804ms    2.709ms        0.000us  715.532us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:06<00:00, 15.59it/s]


Epoch: 06 | Epoch Train Time: 0m 58s
	Train Loss: 1.283 | Train Acc: 53.80%
	 Val. Loss: 1.178 |  Val. Acc: 57.97%


Training: 100%|██████████| 313/313 [00:28<00:00, 10.97it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     716.082ms    8.492ms        0.000us    4.883ms   
1      forward_pass       0.000us    0.000us         2.573s    8.222ms   
2  loss_computation      27.018ms  196.841us        0.000us    5.492us   
3  loss_computation       0.000us    0.000us       18.352ms   58.634us   
4     backward_pass        3.036s    9.749ms        0.000us    1.739us   
5     backward_pass       0.000us    0.000us      544.188us    1.739us   
6    optimizer_step      15.822ms    2.713ms        0.000us  715.474us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:06<00:00, 14.37it/s]


Epoch: 07 | Epoch Train Time: 0m 59s
	Train Loss: 1.186 | Train Acc: 57.64%
	 Val. Loss: 1.039 |  Val. Acc: 63.20%


Training: 100%|██████████| 313/313 [00:31<00:00,  9.98it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     723.742ms    8.662ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.626s    8.389ms   
2  loss_computation      27.341ms  199.952us        0.000us    5.495us   
3  loss_computation       0.000us    0.000us       18.865ms   60.271us   
4     backward_pass        3.020s    9.699ms        0.000us    1.770us   
5     backward_pass       0.000us    0.000us      554.113us    1.770us   
6    optimizer_step      15.531ms    2.969ms        0.000us  715.527us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0         926

Validation: 100%|██████████| 94/94 [00:06<00:00, 14.79it/s]


Epoch: 08 | Epoch Train Time: 1m 4s
	Train Loss: 1.099 | Train Acc: 60.68%
	 Val. Loss: 0.971 |  Val. Acc: 65.97%


Training: 100%|██████████| 313/313 [00:32<00:00,  9.70it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     726.585ms    8.625ms        0.000us    4.881ms   
1      forward_pass       0.000us    0.000us         2.614s    8.353ms   
2  loss_computation      27.267ms  200.095us        0.000us    5.549us   
3  loss_computation       0.000us    0.000us       18.955ms   60.558us   
4     backward_pass        3.068s    9.851ms        0.000us    1.752us   
5     backward_pass       0.000us    0.000us      548.424us    1.752us   
6    optimizer_step      15.488ms    2.820ms        0.000us  715.307us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0         463

Validation: 100%|██████████| 94/94 [00:06<00:00, 14.53it/s]


Epoch: 09 | Epoch Train Time: 1m 3s
	Train Loss: 1.033 | Train Acc: 63.65%
	 Val. Loss: 0.955 |  Val. Acc: 67.27%


Training: 100%|██████████| 313/313 [00:29<00:00, 10.53it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     712.816ms    8.594ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.607s    8.329ms   
2  loss_computation      27.186ms  198.613us        0.000us    5.502us   
3  loss_computation       0.000us    0.000us       18.575ms   59.346us   
4     backward_pass        3.082s    9.897ms        0.000us    1.772us   
5     backward_pass       0.000us    0.000us      554.501us    1.772us   
6    optimizer_step      15.873ms    2.764ms        0.000us  715.394us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:06<00:00, 14.75it/s]


Epoch: 10 | Epoch Train Time: 1m 1s
	Train Loss: 0.972 | Train Acc: 65.50%
	 Val. Loss: 0.981 |  Val. Acc: 65.00%


Training: 100%|██████████| 313/313 [00:32<00:00,  9.71it/s]


                key self_cpu_time   cpu_time self_cuda_time  cuda_time  \
0      forward_pass     733.168ms    8.756ms        0.000us    4.882ms   
1      forward_pass       0.000us    0.000us         2.652s    8.473ms   
2  loss_computation      28.480ms  204.793us        0.000us    5.494us   
3  loss_computation       0.000us    0.000us       18.905ms   60.399us   
4     backward_pass        3.060s    9.827ms        0.000us    1.738us   
5     backward_pass       0.000us    0.000us      543.975us    1.738us   
6    optimizer_step      16.163ms    2.855ms        0.000us  715.442us   

  input_shapes cpu_memory_usage cuda_memory_usage  
0                             0     216593343488>  
1                             0                0>  
2                             0           641024>  
3                             0                0>  
4                             0    -202100805632>  
5                             0                0>  
6                             0            

Validation: 100%|██████████| 94/94 [00:06<00:00, 14.67it/s]

Epoch: 11 | Epoch Train Time: 1m 3s
	Train Loss: 0.895 | Train Acc: 68.42%
	 Val. Loss: 0.851 |  Val. Acc: 69.97%
Train finished





In [55]:
free_memory, total_memory = torch.cuda.mem_get_info()
print(f"Free memory: {free_memory / 1024**2:.2f} MB")
print(f"Total memory: {total_memory / 1024**2:.2f} MB")

Free memory: 38534.81 MB
Total memory: 40513.81 MB


In [56]:
print("ResNet18")
print(f'Total Training Time: {int(total_time/60)}m {int(total_time%60)}s')

ResNet18
Total Training Time: 11m 0s


In [57]:
torch.save(model.state_dict(), 'trained_model.pth')

In [58]:
import os
model_file_size = os.path.getsize('trained_model.pth')  # 바이트 단위
model_file_size_MB = model_file_size / (1024 ** 2)  # MB로 변환
print(f"Saved model file size: {model_file_size_MB:.2f} MB")

Saved model file size: 42.73 MB


In [59]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name} - Size: {param.size()} - Number of elements: {param.numel()}")

conv1.weight - Size: torch.Size([64, 3, 7, 7]) - Number of elements: 9408
bn1.weight - Size: torch.Size([64]) - Number of elements: 64
bn1.bias - Size: torch.Size([64]) - Number of elements: 64
layer1.0.conv1.weight - Size: torch.Size([64, 64, 3, 3]) - Number of elements: 36864
layer1.0.bn1.weight - Size: torch.Size([64]) - Number of elements: 64
layer1.0.bn1.bias - Size: torch.Size([64]) - Number of elements: 64
layer1.0.conv2.weight - Size: torch.Size([64, 64, 3, 3]) - Number of elements: 36864
layer1.0.bn2.weight - Size: torch.Size([64]) - Number of elements: 64
layer1.0.bn2.bias - Size: torch.Size([64]) - Number of elements: 64
layer1.1.conv1.weight - Size: torch.Size([64, 64, 3, 3]) - Number of elements: 36864
layer1.1.bn1.weight - Size: torch.Size([64]) - Number of elements: 64
layer1.1.bn1.bias - Size: torch.Size([64]) - Number of elements: 64
layer1.1.conv2.weight - Size: torch.Size([64, 64, 3, 3]) - Number of elements: 36864
layer1.1.bn2.weight - Size: torch.Size([64]) - Numbe

In [60]:
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")

Total number of parameters: 11181642


In [61]:
from torchsummary import summary

summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,