<a href="https://colab.research.google.com/github/jeong1suk/DeepLearning/blob/main/05_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fashion Mnist DNN Tutorial (CNN & Multi-layer Perceptron(MLP))

# 외부 파일 가져오기 & requirements 설치

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
import sys
from datetime import datetime


In [None]:
drive_project_root = '/content/drive/MyDrive/#fastcampus'
sys.path.append(drive_project_root)

In [None]:
#!pip install -r "/content/drive/MyDrive/#fastcampus/requirements.txt"

In [None]:
!pip install torch_optimizer

In [None]:
!pip install wandb

In [None]:
!pip install omegaconf

In [None]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from omegaconf import OmegaConf
from omegaconf import DictConfig

import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torch_optimizer import RAdam
from torch_optimizer import AdamP
from torch.utils.tensorboard import SummaryWriter

from torchvision.datasets import FashionMNIST
from torchvision import transforms
from torch.utils.data import random_split

import wandb

In [None]:
from data_utils import dataset_split

In [None]:
data_root = os.path.join(os.getcwd(), 'data')
# '/content' 경로에 'data'라고하는 폴더를 추가해서 데이터를 저장.

# 전처리 부분 (preprocessing) & 데이터셋 정의
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]), # mean, std
    ]

)
fashion_mnist_dataset = FashionMNIST(data_root, download=True, train=True, transform=transform)

In [None]:
fashion_mnist_dataset[0][1]

In [None]:
'''
dset = random_split(
    fashion_mnist_dataset,
    [int(len(fashion_mnist_dataset)*0.7), len(fashion_mnist_dataset)-int(len(fashion_mnist_dataset)*0.7)]
)
# 이 코드로 해도 되지만, 랜덤성이 있기 때문에 util함수를 만들어서 split을 해준다.
'''

# DataLoader를 정의

In [None]:
datasets = dataset_split(fashion_mnist_dataset, split=[0.9, 0.1])
print(datasets)

train_dataset = datasets["train"]
val_dataset = datasets["val"]

train_batch_size = 100 # 한번에 얼마나 업데이트를 할 것 인지
val_batch_size = 100
#DataLoader = batch 단위로 묶는 것
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=1
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=val_batch_size, shuffle=False, num_workers=1
)

In [None]:
for sample_batch in train_dataloader:
    print(sample_batch)
    print(sample_batch[0].shape, sample_batch[1].shape)
    break

# 모델 (Multi-layer Perceptron) (MLP) 정의


## 모델 MLPWithDropout 정의

In [None]:
# Define Model

class MLP(nn.Module):
    def __init__(self, in_dim: int, h1_dim: int, h2_dim: int, out_dim: int):
        super().__init__()
        self.linear1 = nn.Linear(in_dim, h1_dim)
        self.linear2 = nn.Linear(h1_dim, h2_dim)
        self.linear3 = nn.Linear(h2_dim, out_dim)
        self.relu = F.relu
        pass


    def forward(self, input):
        x = torch.flatten(input, start_dim=1)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        out = self.linear3(x)
        # out = F.softmax(out)
        return out

class MLPWithDropout(MLP):
    def __init__(self, in_dim: int, h1_dim: int, h2_dim: int, out_dim: int, dropout_prob: float):
        super().__init__(in_dim, h1_dim, h2_dim, out_dim)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.dropout2 = nn.Dropout(dropout_prob)

    def forward(self, input):
        x = torch.flatten(input, start_dim=1)
        x = self.relu(self.linear1(x))
        x = self.dropout1(x)
        x = self.relu(self.linear2(x))
        x = self.dropout2(x)
        out = self.linear3(x)
        # out = F.softmax(out)
        return out

## CNN 모델 정의

In [None]:
_cnn_cfg_dict = {
    "layer_1": {
        "conv2d_in_channels": 1,
        "conv2d_out_channels": 32,
        "conv2d_kernel_size": 3,
        "conv2d_padding": 1,
        "maxpool2d_kernel_size": 2,
        "maxpool2d_stride": 2,
    },
    "layer_2": {
        "conv2d_in_channels": 32,
        "conv2d_out_channels": 64,
        "conv2d_kernel_size": 3,
        "conv2d_padding": 0,
        "maxpool2d_kernel_size": 2,
        "maxpool2d_stride": 1,
    },
    "fc_1": {
        "in_features": 7744, # 값 보고 수정하기.
        "out_features": 512,
    },
    "fc_2": {
        "in_features": 512,
        "out_features": 128,
    },
    "fc_3": {
        "in_features": 128,
        "out_features": 10,
    },
    "dropout_prob": 0.25
}
_cnn_cfg = OmegaConf.create(_cnn_cfg_dict)
# print(_cnn_cfg)
print(OmegaConf.to_yaml(_cnn_cfg))
# with open("cnn_test.yaml", "w") as f:
    # OmegaConf.save(_cnn_cfg, f)
# print(_cnn_cfg.layer_1, _cnn_cfg["fc_1"])
# OmegaConf.load()

class CNN(nn.Module):
    def __init__(self, cfg: DictConfig = _cnn_cfg):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(
                in_channels=cfg.layer_1.conv2d_in_channels,
                out_channels=cfg.layer_1.conv2d_out_channels,
                kernel_size=cfg.layer_1.conv2d_kernel_size,
                padding=cfg.layer_1.conv2d_padding,
            ),
            nn.BatchNorm2d(num_features=cfg.layer_1.conv2d_out_channels),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=cfg.layer_1.maxpool2d_kernel_size,
                stride=cfg.layer_1.maxpool2d_stride
            )
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(
                in_channels=cfg.layer_2.conv2d_in_channels,
                out_channels=cfg.layer_2.conv2d_out_channels,
                kernel_size=cfg.layer_2.conv2d_kernel_size,
                padding=cfg.layer_2.conv2d_padding,
            ),
            nn.BatchNorm2d(num_features=cfg.layer_2.conv2d_out_channels),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=cfg.layer_2.maxpool2d_kernel_size,
                stride=cfg.layer_2.maxpool2d_stride
            )
        )
        self.fc1 = nn.Linear(
            in_features=cfg.fc_1.in_features,
            out_features=cfg.fc_1.out_features,
        )
        self.fc2 = nn.Linear(
            in_features=cfg.fc_2.in_features,
            out_features=cfg.fc_2.out_features,
        )
        self.fc3 = nn.Linear(
            in_features=cfg.fc_3.in_features,
            out_features=cfg.fc_3.out_features,
        )
        self.dropout = nn.Dropout2d(cfg.dropout_prob)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1) # flatten
        out = self.fc1(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

CNN()

## Learning Rate Scheduler

In [None]:
# Warmup Scheduler
class WarmupLR(optim.lr_scheduler.LambdaLR):
    def __init__(
            self, optimizer: optim.Optimizer,
            warmup_end_steps:int,
            last_epoch: int = -1,
        ):
        def warmup_fn(step: int):
            if step < warmup_end_steps:
                return float(step) / float(max(warmup_end_steps, 1))
            return 1.0

        super().__init__(optimizer, warmup_fn, last_epoch)


# 모델 선언 및 손실 함수, 최적화(Optimization) 정의, Tensorboard Logger 정의

In [None]:
# gpu setup
# gpu = None
gpu = 0 # gpu 0번 쓰겠다.

# define model.
# model = MLP(28*28, 128, 64, 10)
# model = MLPWithDropout(28*28, 128, 64, 10, dropout_prob=0.3)
model = CNN(cfg=_cnn_cfg)
if gpu is not None:
    model.cuda(gpu)
model_name = type(model).__name__
print(model_name)

# define loss
loss_function = nn.CrossEntropyLoss()

# define optimizer
lr = 1e-3
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
optimizer = RAdam(model.parameters(), lr=1e-3)
# optimizer = AdamP(model.parameters(), lr=1e-3)
optimizer_name = type(optimizer).__name__

# define scheduler
scheduler = None
# scheduler = WarmupLR(optimizer, 1500)
scheduler_name = type(scheduler).__name__ if scheduler is not None else "no"

max_epoch = 50

# define tensorboard logger
# 여러개를 돌릴 거기 때문에 로그 디렉토리를 관리해줌.
run_name = f"{datetime.now().isoformat(timespec='seconds')}-{model_name}-{optimizer_name}_optim_{lr}_lr_with_{scheduler_name}_scheduler"
run_dirname = "dnn-tutorial-fashion-mnist-runs"
# log_dir = f"runs/{run_name}"
log_dir = os.path.join(drive_project_root, "runs", run_dirname, run_dirname)
writer = SummaryWriter(log_dir=log_dir)
log_interval = 100

# define wandb
project_name = "fashion_mnist_tutorials"
run_tags = [project_name]
wandb.init(
    project=project_name,
    name=run_name,
    tags=run_tags,
    config={"lr":lr, "model_name":model_name, "optimizer_name": optimizer_name, "scheduler_name":scheduler_name},
    reinit=True,
)
wandb.watch(model)

# set save model path
log_model_path = os.path.join(log_dir, "models")
os.makedirs(log_model_path, exist_ok=True)

In [None]:
# 스케쥴링을 보기 위한 코드
# for i in range(100):
#     print("step", i)
#     optimizer.step()
#     scheduler.step()
#     print(scheduler.get_last_lr())

# Early Stopping callback Object Class 정의

In [None]:
# With some modifications, source is from https://github.com/Bjarten/early-stopping-pytorch
class EarlyStopping:
    """
    Early stops the training if validation loss doesn't improve after a given patience.
    """
    def __init__(self, patience=7, verbose=False, delta=0, path="checkpoint.ckpt", trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                Defalut: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                Defalut: False
            delta (float):  Minimum change in the monitored quantity to qualify as an improvement.
                Defalut: 0
            path (str): Path for the checkpoint to be saved to.
                Defalut: 'checkpoint.ckpt'
            trace_func (function): trace print function.
                Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')

        filename = self.path.split('/')[-1]
        save_dir = os.path.dirname(self.path)
        torch.save(model, os.path.join(save_dir, f"val_loss-{val_loss}-{filename}"))
        self.val_loss_min = val_loss

In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/drive/MyDrive/\#fastcampus/runs/dnn-tutorial-fashion-mnist-runs/

# define EarlyStopping.
early_stopper = EarlyStopping(
    patience=3, verbose=True, path=os.path.join(log_model_path, "model.ckpt")
)

# do train with validation
train_step = 0
for epoch in range(1, max_epoch+1):
    # valid step
    with torch.no_grad():
        val_loss = 0.0
        val_corrects = 0
        model.eval()

        for val_batch_idx, (val_images, val_labels) in enumerate(
            tqdm(val_dataloader, position=0, leave=True, desc="validation")
        ):
            if gpu is not None:
                val_images = val_images.cuda(gpu)
                val_labels = val_labels.cuda(gpu)
                # print("ㅗㄷㅁㄱ", gpu)
            # Forward
            val_outputs = model(val_images)
            _, val_preds = torch.max(val_outputs, 1)

            # loss & acc
            val_loss += loss_function(val_outputs, val_labels) / val_outputs.shape[0]
            val_corrects += torch.sum(val_preds == val_labels.data) / val_outputs.shape[0]

    # valid step logging
    val_epoch_loss = val_loss / len(val_dataloader)
    val_epoch_acc = val_corrects / len(val_dataloader)

    print(
        f"{epoch} epoch, {train_step} step: val_loss: {val_epoch_loss}, val_acc: {val_epoch_acc}"
    )

    #tensorboard log
    writer.add_scalar("Loss/val", val_epoch_loss, train_step)
    writer.add_scalar("Acc/val", val_epoch_acc, train_step)
    writer.add_images("Images/val", val_images, train_step)

    # wandb log
    wandb.log({
        "Loss/val": val_epoch_loss,
        "Acc/val": val_epoch_acc,
        "Images/val": wandb.Image(val_images),
        "Outputs/val": wandb.Histogram(val_outputs.detach().cpu().numpy()),
        "Preds/val": wandb.Histogram(val_preds.detach().cpu().numpy()),
        "Labels/val": wandb.Histogram(val_labels.data.detach().cpu().numpy()),
    }, step=train_step)

    # check model early stopping point & save model if model reached the best performance.
    early_stopper(val_epoch_loss, model)
    if early_stopper.early_stop:
        break

    #train step
    current_loss = 0
    current_corrects = 0
    model.train()

    for batch_idx, (images, labels) in enumerate(
        tqdm(train_dataloader, position=0, leave=True, desc="training")
    ):

        if gpu is not None:
            images = images.cuda(gpu)
            labels = labels.cuda(gpu)

        current_loss = 0.0
        current_corrects = 0

        # Forward
        # get predictions
        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        # get loss (Loss 계산)
        loss = loss_function(outputs, labels)

        # Backpropagation
        # Optimizer 초기화 (zero화)
        optimizer.zero_grad()

        # Perform backward pass
        loss.backward()

        # Perform Opimization
        optimizer.step()

        # Perform LR Scheduler work
        if scheduler is not None:
            scheduler.step()

        current_loss += loss.item()
        current_corrects += torch.sum(preds == labels.data)

        if train_step % log_interval == 0:
            train_loss = current_loss / log_interval
            train_acc = current_corrects / log_interval

            print(
                f"{train_step}: train_loss: {train_loss}, train_acc: {train_acc}"
            )
            if scheduler is None:
                cur_lr = optimizer.param_groups[0]["lr"]
            else:
                cur_lr = scheduler.get_last_lr()[0]

            # tensorboard log
            writer.add_scalar("Loss/train", train_loss, train_step)
            writer.add_scalar("Acc/train", train_acc, train_step)
            writer.add_images("Images/train", images, train_step)
            writer.add_scalar("Learning Rate", cur_lr, train_step) # 스케쥴러를 설정하면 lr이 바뀌기 때문에 logging
            writer.add_graph(model, images)

            # wandb log
            wandb.log({
                "Loss/train": train_loss,
                "Acc/train": train_acc,
                "Images/train": wandb.Image(images),
                "Outputs/train": wandb.Histogram(outputs.detach().cpu().numpy()),
                "Preds/train": wandb.Histogram(preds.detach().cpu().numpy()),
                "Labels/train": wandb.Histogram(labels.data.detach().cpu().numpy()),
                "Learning Rate": cur_lr,
            }, step=train_step)

            current_loss = 0
            current_corrects = 0

        train_step += 1

In [None]:
# save model
# torch.save(model, os.path.join(log_model_path, "model.ckpt"))

In [None]:
log_model_path

In [None]:
# load models
# loaded_model = torch.load(os.path.join(log_model_path, "model.ckpt"))
# loaded_model = torch.load(os.path.join(log_model_path, "val_loss-0.003235985990613699-model.ckpt"))
loaded_model = torch.load(os.path.join(log_model_path, "val_loss-0.0023227857891470194-model.ckpt"))
loaded_model.eval()
loaded_model.cpu()
print(loaded_model)

In [None]:
def softmax(x, axis=0):
    "numpy softmax"
    max = np.max(x, axis=axis, keepdims=True)
    e_x = np.exp(x - max)
    sum = np.sum(e_x, axis=axis, keepdims=True)
    f_x = e_x / sum
    return f_x

In [None]:
test_batch_size = 100
test_dataset = FashionMNIST(data_root, download=True, train=False, transform=transforms.ToTensor())
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=1)

test_labels_list = []
test_preds_list = []
test_outputs_list = []
for i, (test_images, test_labels) in enumerate(tqdm(test_dataloader, position=0, leave=True, desc="testing")):
    # forward
    test_outputs = loaded_model(test_images)
    _, test_preds = torch.max(test_outputs, 1)

    final_outs = softmax(test_outputs.detach().numpy(), axis=1)
    test_outputs_list.extend(final_outs)
    test_preds_list.extend(test_preds.detach().numpy())
    test_labels_list.extend(test_labels.detach().numpy())


test_preds_list = np.array(test_preds_list)
test_labels_list = np.array(test_labels_list)

print(f"\nacc: {np.mean(test_preds_list == test_labels_list)*100}%")

In [None]:
# ROC Curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

fpr = {}
tpr = {}
threshold = {}
n_class = 10

for i in range(n_class):
    fpr[i], tpr[i], threshold[i] = roc_curve(test_labels_list, np.array(test_outputs_list)[:, i], pos_label=i)

# plot
for i in range(n_class):
    plt.plot(fpr[i], tpr[i], linestyle="--", label=f"Class {i} vs Rest")
plt.title("Multi-class ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc="best")
plt.show()

print("auc_score: ", roc_auc_score(test_labels_list, test_outputs_list, multi_class="ovo", average="macro"))