In [11]:
import argparse
import torch
from torch import nn, optim
import torch.nn.functional as F
from datetime import datetime
import os
import wandb
from pathlib import Path

In [12]:
import sys
BASE_PATH = "./"
sys.path.append(BASE_PATH)

In [13]:
CURRENT_FILE_PATH = "./"
CHECKPOINT_FILE_PATH = os.path.join(CURRENT_FILE_PATH, "checkpoints")
if not os.path.isdir(CHECKPOINT_FILE_PATH):
  os.makedirs(os.path.join(CURRENT_FILE_PATH, "checkpoints"))

In [14]:
import import_ipynb
from fashion_mnist_data import get_fashion_mnist_data, get_fashion_mnist_test_data
from trainer import ClassificationTrainer

In [15]:
class CNN_MNIST(nn.Module):
    def __init__(self, in_channels, n_output):
        super(CNN_MNIST, self).__init__()

        # 첫 번째 Convolutional Layer: 32개의 3x3 커널, 패딩 1
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)  # Batch Normalization
        # 두 번째 Convolutional Layer: 64개의 3x3 커널, 패딩 1
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)  # Batch Normalization
        # Max Pooling Layer: 2x2 커널, 스트라이드 2
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # 첫 번째 Fully Connected Layer: 64 * 7 * 7 크기의 입력을 받아 128개의 뉴런 생성
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        # 두 번째 Fully Connected Layer: 128개의 뉴런에서 10개의 출력 뉴런 생성 (클래스 수)
        self.fc2 = nn.Linear(128, n_output)
        # Dropout Layer: 50%의 확률로 뉴런을 비활성화하여 과적합 방지
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        # 첫 번째 Convolutional Layer를 통과하고 활성화 함수 및 Batch Normalization을 적용
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        # 두 번째 Convolutional Layer를 통과하고 활성화 함수 및 Batch Normalization을 적용
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        # 2D 이미지를 1D 벡터로 평탄화
        x = x.view(-1, 64 * 7 * 7)
        # 첫 번째 Fully Connected Layer를 통과하고 활성화 함수를 적용
        x = F.relu(self.fc1(x))
        # Dropout을 적용하여 과적합 방지
        x = self.dropout(x)
        # 두 번째 Fully Connected Layer를 통과하여 최종 출력 생성
        x = self.fc2(x)
        return x

In [16]:
def get_cnn_model():
    improved_cnn_mnist_model = CNN_MNIST(in_channels=1, n_output=10)
    return improved_cnn_mnist_model

In [18]:
def main(args):
    run_time_str = datetime.now().astimezone().strftime('%Y-%m-%d_%H-%M-%S')
    
    config = {
        'epochs': args.epochs,
        'batch_size': args.batch_size,
        'validation_intervals': args.validation_intervals,
        'learning_rate': args.learning_rate,
        'early_stop_patience': args.early_stop_patience
    }
    
    project_name = "fashion_mnist_cnn"
    wandb.init(
        mode="online" if args.wandb else "disabled",
        project=project_name,
        notes="fashion mnist with cnn",
        name=run_time_str,
        config=config
    )
    print(args)
    print(wandb.config)
    
    device = torch.device("cpu")
    print(f"Training on device {device}.")
    
    train_data_loader, validation_data_loader, mnist_transforms = get_fashion_mnist_data()
    model = get_cnn_model()
    model.to(device)
    mnist_transforms.to(device)
    wandb.watch(model)
    
    from torchinfo import summary
    summary(model=model, input_size=(1, 1, 28, 28))
    
    optimizer = optim.Adam(model.parameters(), lr=wandb.config.learning_rate)
    
    classification_trainer = ClassificationTrainer(
        project_name, model, optimizer, train_data_loader, validation_data_loader, mnist_transforms,
        run_time_str, wandb, device, CHECKPOINT_FILE_PATH
    )
    classification_trainer.train_loop()
    
    wandb.finish()

In [19]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--wandb", action=argparse.BooleanOptionalAction, default=True, help="True or False"
    )
    parser.add_argument(
        "-b", "--batch_size", type=int, default=2_048, help="Batch size (int, default: 2_048)"
    )
    
    parser.add_argument(
        "-e", "--epochs", type=int, default=10_000, help="Number of training epochs (int, default:10_000)"
    )
    
    parser.add_argument(
        "-r", "--learning_rate", type=float, default=1e-3, help="Learning rate (float, default: 1e-3)"
    )
    
    parser.add_argument(
        "-v", "--validation_intervals", type=int, default=10,
        help="Number of training epochs between validations (int, default: 10)"
    )
    
    parser.add_argument(
        "-p", "--early_stop_patience", type=int, default=10,
        help="Number of early stop patience (int, default: 10)"
    )
    
    args = parser.parse_args(args=[])
    main(args)

Namespace(wandb=True, batch_size=2048, epochs=10000, learning_rate=0.001, validation_intervals=10, early_stop_patience=10)
{'epochs': 10000, 'batch_size': 2048, 'validation_intervals': 10, 'learning_rate': 0.001, 'early_stop_patience': 10}
Training on device cpu.
Num Train Samples:  55000
Num Validation Samples:  5000
Sample Shape:  torch.Size([1, 28, 28])
Number of Data Loading Workers: 1
torch.Size([1, 28, 28, 55000])
mean : tensor([0.2859]), std : tensor([0.3529])
[Epoch   1] T_loss: 0.9103, T_accuracy: 67.6436 | V_loss: 0.5256, V_accuracy: 82.7800 | Early stopping is stated! | T_time: 00:00:26, T_speed: 0.038
[Epoch  10] T_loss: 0.2520, T_accuracy: 91.2036 | V_loss: 0.2362, V_accuracy: 91.1200 | V_loss decreased ( 0.526 -->  0.236). Saving model... | T_time: 00:04:13, T_speed: 0.040
[Epoch  20] T_loss: 0.1755, T_accuracy: 93.6636 | V_loss: 0.2080, V_accuracy: 92.7800 | V_loss decreased ( 0.236 -->  0.208). Saving model... | T_time: 00:08:26, T_speed: 0.040
[Epoch  30] T_loss: 0.129

0,1
Epoch,▁▂▂▃▃▄▄▅▆▆▇▇█
Training accuracy (%),▁▆▇▇▇████████
Training loss,█▃▂▂▂▁▁▁▁▁▁▁▁
Training speed (epochs/sec.),▃▇▇▇▇███▁▂▂▃▃
Validation accuracy (%),▁▇███████████
Validation loss,█▂▁▁▁▂▂▃▄▄▅▅▅

0,1
Epoch,120.0
Training accuracy (%),98.89455
Training loss,0.03018
Training speed (epochs/sec.),0.03857
Validation accuracy (%),92.9
Validation loss,0.38828
