In [1]:
%%time

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms

# 모델 정의
class BaseCNN(nn.Module):
    def __init__(self, class_num):
        super(BaseCNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.GELU()
        self.pool1 = nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2)

        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.relu2 = nn.GELU()
        self.pool2 = nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2)

        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.GELU()
        self.pool3 = nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2)

        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.GELU()
        self.pool4 = nn.Conv2d(64, 64, kernel_size=5, stride=2, padding=2)

        self.conv5 = nn.Conv2d(64, 256, kernel_size=4)
        self.bn5 = nn.BatchNorm2d(256)
        self.relu5 = nn.GELU()

        self.fc6 = nn.Linear(256, 256)
        self.relu6 = nn.GELU()
        self.dropout6 = nn.Dropout(0.5)
        self.fc7 = nn.Linear(256, class_num)
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        # 손실 함수(loss function) 및 최적화 알고리즘(optimizer) 설정
        self.loss_fn = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)
        _out = self.pool1(out)

        out = self.conv2(_out)
        out = self.bn2(out)
        out = _out + self.relu2(out)
        _out = self.pool2(out)

        out = self.conv3(_out)
        out = self.bn3(out)
        out = _out + self.relu3(out)
        _out = self.pool3(out)

        out = self.conv4(_out)
        out = self.bn4(out)
        out = _out + self.relu4(out)
        out = self.pool4(out)

        out = self.conv5(out)
        out = self.bn5(out)
        out = self.relu5(out)

        out = out.view(out.size(0), -1)
        out = self.fc6(out)
        out = self.relu6(out)
        out = self.dropout6(out)
        out = self.fc7(out)

        return out
    
    def set_data(self, data_path="datasets"):
        # Augmentation을 적용한 변환(transform) 객체 생성
        transform_train = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize((68, 68)),
            transforms.RandomCrop((64, 64), padding=4),
            transforms.RandomRotation(degrees=10),
            transforms.ToTensor(),
        ])

        # Augmentation을 적용하지 않은 변환(transform) 객체 생성
        transform_test = transforms.Compose([
            transforms.Grayscale(),
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
        ])

        # 데이터셋을 생성
        dataset_train = datasets.ImageFolder(data_path, transform=transform_train)
        dataset_test = datasets.ImageFolder(data_path, transform=transform_test)

        # 데이터셋을 train, test 데이터 셋으로 분리
        num_data = len(dataset_train)
        num_train = int(num_data * 0.8)  # 80%를 train 데이터셋으로 사용
        num_valid = int(num_data * 0.1)  # 10%를 validation 데이터셋으로 사용
        num_test = num_data - num_train - num_valid  # 나머지 10%를 test 데이터셋으로 사용
        train_set, valid_set, test_set = torch.utils.data.random_split(dataset_train, [num_train, num_valid, num_test])

        # 각 데이터셋을 DataLoader로 래핑(wrap)하기
        self.train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True)
        self.valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=128, shuffle=False)
        self.test_loader = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False)
   
    def test(self, dataloder=None):
        if dataloder is None:
            dataloder = self.test_loader
            
        size = len(dataloder.dataset)
        num_batches = len(dataloder)
        test_loss, correct = 0, 0

        with torch.no_grad():
            for inputs, labels in dataloder:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                pred = self(inputs)
                test_loss += self.loss_fn(pred, labels).item()
                correct += (pred.argmax(1) == labels).type(torch.float).sum().item()

        test_loss /= num_batches
        correct /= size
        print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

        return test_loss

    def train(self):
        size = len(self.train_loader.dataset)
        for batch, (inputs, labels) in enumerate(self.train_loader):
            inputs, labels = inputs.to(self.device), labels.to(self.device)

            # Compute prediction and loss
            pred = self(inputs)
            loss = self.loss_fn(pred, labels)

            # Backpropagation
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            if batch % 10 == 0:
                loss, current = loss.item(), (batch + 1) * len(inputs)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

        val_loss = self.test(self.valid_loader)
        return val_loss


CPU times: user 5.15 s, sys: 1.63 s, total: 6.79 s
Wall time: 4.94 s


In [2]:
%%time
# 모델 객체 생성
model = BaseCNN(410)
if torch.cuda.is_available():
    model.cuda()

model.set_data("datasets")

[2023-03-16 09:45:46.597] [info] Requesting resources for KT AI Accelerator from the server...
[2023-03-16 09:45:47.616] [info] Initializing the worker daemon for KT AI Accelerator
[2023-03-16 09:45:48.757] [info] [1/1] Connecting to resources on the server (192.168.110.1:24161)...
[2023-03-16 09:45:48.769] [info] Establishing links to the resources...
[2023-03-16 09:45:48.806] [info] KT AI Accelerator is ready to use.
CPU times: user 259 ms, sys: 107 ms, total: 365 ms
Wall time: 2.57 s


In [5]:
%%time
prev_loss = 1e+10

for t in range(20):
    print(f"Epoch {t+1}\n-------------------------------")
    val_loss = model.train()
    
    if val_loss < prev_loss:
        # 모델 저장
        torch.save(model, 'todohanja.pth')
        prev_loss = val_loss

Epoch 1
-------------------------------
loss: 0.086070  [  128/25047]
loss: 0.071183  [ 1408/25047]
loss: 0.020743  [ 2688/25047]
loss: 0.066931  [ 3968/25047]
loss: 0.044829  [ 5248/25047]
loss: 0.004871  [ 6528/25047]
loss: 0.025747  [ 7808/25047]
loss: 0.020665  [ 9088/25047]
loss: 0.026137  [10368/25047]
loss: 0.009955  [11648/25047]
loss: 0.016025  [12928/25047]
loss: 0.006475  [14208/25047]
loss: 0.040938  [15488/25047]
loss: 0.072696  [16768/25047]
loss: 0.025354  [18048/25047]
loss: 0.046908  [19328/25047]
loss: 0.026171  [20608/25047]
loss: 0.039478  [21888/25047]
loss: 0.028303  [23168/25047]
loss: 0.058439  [24448/25047]
Validation Error: 
 Accuracy: 98.4%, Avg loss: 0.059897 

Epoch 2
-------------------------------
loss: 0.005473  [  128/25047]
loss: 0.042589  [ 1408/25047]
loss: 0.064207  [ 2688/25047]
loss: 0.068610  [ 3968/25047]
loss: 0.010490  [ 5248/25047]
loss: 0.015436  [ 6528/25047]
loss: 0.034919  [ 7808/25047]
loss: 0.053301  [ 9088/25047]
loss: 0.032523  [10368

In [6]:
%%time
model.test()

Validation Error: 
 Accuracy: 98.4%, Avg loss: 0.055935 

CPU times: user 1.92 s, sys: 103 ms, total: 2.03 s
Wall time: 2.33 s


0.055935481088235976