# Import

In [1]:
import yaml # pip install pyyaml 설치 필요
import wandb
import torch
import torch.nn as nn
import os
from tqdm import tqdm
from model.CNN import CNN
from utils.checkpoints import save_checkpoint, load_checkpoint

device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [2]:
device

device(type='cuda')

In [None]:
# config 파일 로드
def loadConfig(configFile):
    with open(configFile, encoding='utf-8') as f:
        config = yaml.safe_load(f)
    return config

configFile = 'config.yaml'

# 아래처럼 사용
# learning_rate = config['learning_rate']
# epochs = config['epochs']
# batch_size = config['batch_size']
# loss_function = config['loss_function']


# Settings

# DataLoader

In [3]:
import torch
from torch.utils.data import DataLoader, Dataset

In [4]:
BATCH_SIZE =  32 #한 배치당 32개 이미지데이터
EPOCHS = 40 # 전체 데이터 셋을 40번 반복
lr = 1e-2 # 학습률(learnign rate)

csv_file = ""
root_dir = "./data"

In [5]:
from torchvision import transforms
from utils.dataloader import CustomDataset

# 이미지 전처리 및 데이터셋 생성
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 원하는 크기로 리사이즈
    transforms.ToTensor(),           # 텐서로 변환
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 정규화
])

train_dataset = CustomDataset("", root_dir='./data', transform=transform)
val_dataset = CustomDataset("", root_dir='./data', transform=transform)
train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                                    batch_size = BATCH_SIZE,
                                                    shuffle = True, # 순서가 암기되는것을 막기위해.
                                                    )
val_loader = torch.utils.data.DataLoader(dataset = val_dataset,
                                                    batch_size = BATCH_SIZE,
                                                    shuffle = False, # 테스트 순서 유지.
                                                    ) 

In [6]:
len(train_dataset)

204

In [7]:
train_dataset[0][0].shape

torch.Size([3, 224, 224])

# Model Def

In [9]:
class AlexNet(nn.Module):
    def __init__(self, num_class=10):
        super(AlexNet, self).__init__()

        self.conv_layer1 = nn.Sequential(
            nn.Conv2d(1, 96, kernel_size=4),
            nn.ReLU(inplace=True),
            nn.Conv2d(96, 96, kernel_size=3),
            nn.ReLU(inplace=True)
        )
        self.conv_layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        self.conv_layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )

        self.fc_layer1 = nn.Sequential(
            nn.Dropout(),
            nn.Linear(6400, 800),
            nn.ReLU(inplace=True),
            nn.Linear(800, 10)
        )

    def forward(self, x):
        output = self.conv_layer1(x)
        output = self.conv_layer2(output)
        output = self.conv_layer3(output)
        output = torch.flatten(output, 1)
        output = self.fc_layer1(output)
        return output


# Model Load & Loss Function

# TRAIN CODE

In [10]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [11]:
def binary_acc(y_pred, y_test):

    y_pred_tag = torch.round(torch.sigmoid(y_pred))

    #checkpoint_filepath = 'checkpoint.pth'  # 체크포인트 저장 파일 경로
    correct_results_sum = (y_pred_tag == y_test).sum().float()
    acc = correct_results_sum/y_test.shape[0]
    acc = torch.round(acc * 100)
    
    return acc

In [15]:
config = {'epoches': EPOCHS, 'batch_size': BATCH_SIZE, 'learning_rate': lr}
wandb.init(project='my-test-project', config=config)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mkihoon090[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [16]:
checkpoint_interval = 5 ## 5에폭마다 체크포인트 저장
best_val_loss = float('inf')  # 초기값 무한대

# checkpoints 폴더가 없으면 생성
checkpoint_dir = 'checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

for epoch in range(EPOCHS):
    train_loss = 0
    train_acc = 0
    val_loss = 0
    val_acc = 0
    ## Training
    print(f"Training on {epoch}")
    for images, labels in tqdm(train_loader, desc="train"):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        pred = model(images)
        
        # pred => [0.2, 0.3, 0.5, 0] 
        # label => [0] or [1] or [2] or [3] 
        
        loss = criterion(pred, labels)
        
        # pred => argmax()
        # label  => [0] or [1] 
        pred = torch.argmax(pred, dim=1)
        acc = binary_acc(pred, labels)

        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_acc += acc.item()
    
    train_loss = train_loss / len(train_dataset)
    train_acc = train_acc / len(train_dataset)
    print(f'Epoch {epoch+0:03}: | Loss: {train_loss:.5f} | Acc: {train_acc:.3f}')
    wandb.log({'accuracy': train_acc, 'loss': train_loss})
    
    ## Validation
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="val"):
            images, labels = images.to(device), labels.to(device)

            pred = model(images)
        
            loss = criterion(pred, labels)
            pred = torch.argmax(pred, dim=1)
            acc = binary_acc(pred, labels)

            val_loss += loss.item()
            val_acc += acc.item()

    val_loss = val_loss / len(val_dataset)
    val_acc = val_acc / len(val_dataset)
    print(f'Epoch {epoch+0:03}: | Loss: {val_loss:.5f} | Acc: {val_acc:.3f}')
    wandb.log({'accuracy': val_acc, 'loss': val_loss})

    # validation이 개선되었을 때만 체크포인트 저장
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint_filepath = os.path.join(checkpoint_dir, f'checkpoint_epoch_{epoch + 1}.pth')  # 에포크 번호 포함
        save_checkpoint(model, optimizer, epoch, val_loss, checkpoint_filepath)
        print(f"Checkpoint updated at epoch {epoch + 1} and saved as {checkpoint_filepath}")

# 최종 체크포인트 저장
save_checkpoint(model, optimizer, epoch, train_loss / len(train_loader), 'final_checkpoint.pth')


Training on 0


train: 100%|██████████| 7/7 [00:03<00:00,  2.28it/s]


Epoch 000: | Loss: 0.04759 | Acc: 0.863


val: 100%|██████████| 7/7 [00:02<00:00,  2.62it/s]


Epoch 000: | Loss: 0.04763 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_1.pth
Checkpoint updated at epoch 1 and saved as checkpoints\checkpoint_epoch_1.pth
Training on 1


train: 100%|██████████| 7/7 [00:02<00:00,  2.37it/s]


Epoch 001: | Loss: 0.04760 | Acc: 0.809


val: 100%|██████████| 7/7 [00:02<00:00,  2.60it/s]


Epoch 001: | Loss: 0.04764 | Acc: 0.784
Training on 2


train: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s]


Epoch 002: | Loss: 0.04759 | Acc: 0.809


val: 100%|██████████| 7/7 [00:03<00:00,  2.26it/s]


Epoch 002: | Loss: 0.04763 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_3.pth
Checkpoint updated at epoch 3 and saved as checkpoints\checkpoint_epoch_3.pth
Training on 3


train: 100%|██████████| 7/7 [00:03<00:00,  2.26it/s]


Epoch 003: | Loss: 0.04758 | Acc: 0.941


val: 100%|██████████| 7/7 [00:02<00:00,  2.40it/s]


Epoch 003: | Loss: 0.04760 | Acc: 0.779
Checkpoint saved at checkpoints\checkpoint_epoch_4.pth
Checkpoint updated at epoch 4 and saved as checkpoints\checkpoint_epoch_4.pth
Training on 4


train: 100%|██████████| 7/7 [00:03<00:00,  2.33it/s]


Epoch 004: | Loss: 0.04757 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.42it/s]


Epoch 004: | Loss: 0.04757 | Acc: 0.779
Checkpoint saved at checkpoints\checkpoint_epoch_5.pth
Checkpoint updated at epoch 5 and saved as checkpoints\checkpoint_epoch_5.pth
Training on 5


train: 100%|██████████| 7/7 [00:02<00:00,  2.35it/s]


Epoch 005: | Loss: 0.04759 | Acc: 0.858


val: 100%|██████████| 7/7 [00:03<00:00,  2.33it/s]


Epoch 005: | Loss: 0.04753 | Acc: 0.779
Checkpoint saved at checkpoints\checkpoint_epoch_6.pth
Checkpoint updated at epoch 6 and saved as checkpoints\checkpoint_epoch_6.pth
Training on 6


train: 100%|██████████| 7/7 [00:03<00:00,  2.23it/s]


Epoch 006: | Loss: 0.04755 | Acc: 0.907


val: 100%|██████████| 7/7 [00:02<00:00,  2.40it/s]


Epoch 006: | Loss: 0.04752 | Acc: 0.779
Checkpoint saved at checkpoints\checkpoint_epoch_7.pth
Checkpoint updated at epoch 7 and saved as checkpoints\checkpoint_epoch_7.pth
Training on 7


train: 100%|██████████| 7/7 [00:03<00:00,  2.25it/s]


Epoch 007: | Loss: 0.04755 | Acc: 0.882


val: 100%|██████████| 7/7 [00:02<00:00,  2.51it/s]


Epoch 007: | Loss: 0.04751 | Acc: 0.779
Checkpoint saved at checkpoints\checkpoint_epoch_8.pth
Checkpoint updated at epoch 8 and saved as checkpoints\checkpoint_epoch_8.pth
Training on 8


train: 100%|██████████| 7/7 [00:02<00:00,  2.34it/s]


Epoch 008: | Loss: 0.04755 | Acc: 0.819


val: 100%|██████████| 7/7 [00:02<00:00,  2.33it/s]


Epoch 008: | Loss: 0.04749 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_9.pth
Checkpoint updated at epoch 9 and saved as checkpoints\checkpoint_epoch_9.pth
Training on 9


train: 100%|██████████| 7/7 [00:03<00:00,  2.33it/s]


Epoch 009: | Loss: 0.04754 | Acc: 0.809


val: 100%|██████████| 7/7 [00:02<00:00,  2.63it/s]


Epoch 009: | Loss: 0.04748 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_10.pth
Checkpoint updated at epoch 10 and saved as checkpoints\checkpoint_epoch_10.pth
Training on 10


train: 100%|██████████| 7/7 [00:02<00:00,  2.36it/s]


Epoch 010: | Loss: 0.04760 | Acc: 0.618


val: 100%|██████████| 7/7 [00:02<00:00,  2.66it/s]


Epoch 010: | Loss: 0.04748 | Acc: 0.784
Training on 11


train: 100%|██████████| 7/7 [00:02<00:00,  2.49it/s]


Epoch 011: | Loss: 0.04761 | Acc: 0.853


val: 100%|██████████| 7/7 [00:02<00:00,  2.67it/s]


Epoch 011: | Loss: 0.04747 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_12.pth
Checkpoint updated at epoch 12 and saved as checkpoints\checkpoint_epoch_12.pth
Training on 12


train: 100%|██████████| 7/7 [00:02<00:00,  2.43it/s]


Epoch 012: | Loss: 0.04756 | Acc: 0.833


val: 100%|██████████| 7/7 [00:02<00:00,  2.48it/s]


Epoch 012: | Loss: 0.04748 | Acc: 0.784
Training on 13


train: 100%|██████████| 7/7 [00:02<00:00,  2.50it/s]


Epoch 013: | Loss: 0.04762 | Acc: 0.809


val: 100%|██████████| 7/7 [00:02<00:00,  2.69it/s]


Epoch 013: | Loss: 0.04746 | Acc: 0.784
Checkpoint saved at checkpoints\checkpoint_epoch_14.pth
Checkpoint updated at epoch 14 and saved as checkpoints\checkpoint_epoch_14.pth
Training on 14


train: 100%|██████████| 7/7 [00:02<00:00,  2.38it/s]


Epoch 014: | Loss: 0.04760 | Acc: 0.912


val: 100%|██████████| 7/7 [00:02<00:00,  2.55it/s]


Epoch 014: | Loss: 0.04749 | Acc: 0.784
Training on 15


train: 100%|██████████| 7/7 [00:02<00:00,  2.37it/s]


Epoch 015: | Loss: 0.04758 | Acc: 0.912


val: 100%|██████████| 7/7 [00:02<00:00,  2.42it/s]


Epoch 015: | Loss: 0.04748 | Acc: 0.784
Training on 16


train: 100%|██████████| 7/7 [00:03<00:00,  2.25it/s]


Epoch 016: | Loss: 0.04759 | Acc: 0.887


val: 100%|██████████| 7/7 [00:02<00:00,  2.41it/s]


Epoch 016: | Loss: 0.04749 | Acc: 0.784
Training on 17


train: 100%|██████████| 7/7 [00:03<00:00,  2.24it/s]


Epoch 017: | Loss: 0.04757 | Acc: 0.833


val: 100%|██████████| 7/7 [00:02<00:00,  2.51it/s]


Epoch 017: | Loss: 0.04751 | Acc: 0.784
Training on 18


train: 100%|██████████| 7/7 [00:02<00:00,  2.48it/s]


Epoch 018: | Loss: 0.04756 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.66it/s]


Epoch 018: | Loss: 0.04750 | Acc: 0.784
Training on 19


train: 100%|██████████| 7/7 [00:02<00:00,  2.47it/s]


Epoch 019: | Loss: 0.04763 | Acc: 0.838


val: 100%|██████████| 7/7 [00:02<00:00,  2.62it/s]


Epoch 019: | Loss: 0.04750 | Acc: 0.784
Training on 20


train: 100%|██████████| 7/7 [00:03<00:00,  2.29it/s]


Epoch 020: | Loss: 0.04759 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.58it/s]


Epoch 020: | Loss: 0.04751 | Acc: 0.784
Training on 21


train: 100%|██████████| 7/7 [00:03<00:00,  2.33it/s]


Epoch 021: | Loss: 0.04759 | Acc: 0.873


val: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s]


Epoch 021: | Loss: 0.04756 | Acc: 0.779
Training on 22


train: 100%|██████████| 7/7 [00:03<00:00,  2.31it/s]


Epoch 022: | Loss: 0.04758 | Acc: 0.828


val: 100%|██████████| 7/7 [00:02<00:00,  2.39it/s]


Epoch 022: | Loss: 0.04757 | Acc: 0.779
Training on 23


train: 100%|██████████| 7/7 [00:02<00:00,  2.36it/s]


Epoch 023: | Loss: 0.04759 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.54it/s]


Epoch 023: | Loss: 0.04758 | Acc: 0.779
Training on 24


train: 100%|██████████| 7/7 [00:02<00:00,  2.40it/s]


Epoch 024: | Loss: 0.04757 | Acc: 0.912


val: 100%|██████████| 7/7 [00:02<00:00,  2.50it/s]


Epoch 024: | Loss: 0.04758 | Acc: 0.779
Training on 25


train: 100%|██████████| 7/7 [00:02<00:00,  2.40it/s]


Epoch 025: | Loss: 0.04759 | Acc: 0.887


val: 100%|██████████| 7/7 [00:02<00:00,  2.41it/s]


Epoch 025: | Loss: 0.04759 | Acc: 0.784
Training on 26


train: 100%|██████████| 7/7 [00:02<00:00,  2.34it/s]


Epoch 026: | Loss: 0.04758 | Acc: 0.814


val: 100%|██████████| 7/7 [00:02<00:00,  2.65it/s]


Epoch 026: | Loss: 0.04758 | Acc: 0.779
Training on 27


train: 100%|██████████| 7/7 [00:02<00:00,  2.44it/s]


Epoch 027: | Loss: 0.04758 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.58it/s]


Epoch 027: | Loss: 0.04760 | Acc: 0.779
Training on 28


train: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s]


Epoch 028: | Loss: 0.04760 | Acc: 0.838


val: 100%|██████████| 7/7 [00:03<00:00,  2.32it/s]


Epoch 028: | Loss: 0.04760 | Acc: 0.779
Training on 29


train: 100%|██████████| 7/7 [00:03<00:00,  2.21it/s]


Epoch 029: | Loss: 0.04758 | Acc: 0.858


val: 100%|██████████| 7/7 [00:02<00:00,  2.59it/s]


Epoch 029: | Loss: 0.04756 | Acc: 0.779
Training on 30


train: 100%|██████████| 7/7 [00:02<00:00,  2.34it/s]


Epoch 030: | Loss: 0.04758 | Acc: 0.853


val: 100%|██████████| 7/7 [00:02<00:00,  2.45it/s]


Epoch 030: | Loss: 0.04752 | Acc: 0.784
Training on 31


train: 100%|██████████| 7/7 [00:03<00:00,  2.22it/s]


Epoch 031: | Loss: 0.04758 | Acc: 0.863


val: 100%|██████████| 7/7 [00:02<00:00,  2.46it/s]


Epoch 031: | Loss: 0.04751 | Acc: 0.784
Training on 32


train: 100%|██████████| 7/7 [00:02<00:00,  2.34it/s]


Epoch 032: | Loss: 0.04761 | Acc: 0.833


val: 100%|██████████| 7/7 [00:02<00:00,  2.42it/s]


Epoch 032: | Loss: 0.04751 | Acc: 0.784
Training on 33


train: 100%|██████████| 7/7 [00:02<00:00,  2.38it/s]


Epoch 033: | Loss: 0.04758 | Acc: 0.838


val:  29%|██▊       | 2/7 [00:01<00:03,  1.49it/s]


KeyboardInterrupt: 