# Multi-layer Perceptron(MLP)

## Install Library & requirements

In [None]:
!pwd

In [1]:
import os
import sys
from datetime import datetime

drive_project_root = "/home/jmj3047/mj_MLP_prac"
sys.path.append(drive_project_root)


In [2]:
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
from torch_optimizer import RAdam
from torch_optimizer import AdamP

from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import FashionMNIST
from torchvision import transforms

import wandb

In [3]:
#!pwd
data_root = os.path.join(os.getcwd(), 'data')

#preprocessing & 데이터 셋 정의
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize([0.5],[0.5]) #mean, std
    ]
)

fashion_mnist_dataset = FashionMNIST(data_root, download=True, train=True, transform=transform)


## DataLoader 정의

In [4]:
from torch.utils.data import random_split
from data_utils import dataset_split

In [5]:
datasets= dataset_split(fashion_mnist_dataset, split=[0.9,0.1])

train_dataset = datasets['train']
val_dataset = datasets['val']

train_batch_size = 100
val_batch_size = 10

train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size = train_batch_size, shuffle=True, num_workers=1
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size = val_batch_size, shuffle=True, num_workers=1
)

In [6]:
for sample_batch in train_dataloader:
    print(sample_batch[0].shape, sample_batch[1].shape)
    break

#torch.Size([100, 1, 28, 28]) batchsize, channel, width, height

torch.Size([100, 1, 28, 28]) torch.Size([100])


## 모델(Multi-Layer Perceptron) (MLP) 정의
## 모델 MLPWithDropout 정의

In [7]:
import torch. nn.functional as F

In [8]:
#Define Model

class MLP(nn.Module):
    def __init__(self, in_dim=int, h1_dim = int, h2_dim = int, out_dim = int):
        super().__init__()
        self.linear1 = nn.Linear(in_dim, h1_dim)
        self.linear2 = nn.Linear(h1_dim, h2_dim)
        self.linear3 = nn.Linear(h2_dim, out_dim)
        self.relu = F.relu #activation 함수 정의

        pass

    def forward(self, input):
        x = torch.flatten(input, start_dim=1)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        out = self.linear3(x)
        out = F.sigmoid(out) #binary classification은 softmax로 사용
        return out



class MLPWithDropout(MLP):
    def __init__(self, in_dim: int, h1_dim: int, h2_dim: int, out_dim: int, dropout_prob: float):
        super().__init__(in_dim, h1_dim, h2_dim, out_dim)
        self.dropout1 = nn.Dropout(dropout_prob)
        self.dropout2 = nn.Dropout(dropout_prob)
    
    def forward(self, input):
        x = torch.flatten(input, start_dim=1)
        x = self.relu(self.linear1(x))
        x = self.dropout1(x)
        x = self.relu(self.linear2(x))
        x = self.dropout2(x)
        out = self.linear3(x)
        # out = F.softmax(out)
        return out


## 모델 선언 및 손실함수, 최적화(Optimizer) 정의, Tensorboard Logger 정의

In [12]:
#define model
# model = MLP(28*28, 128, 64, 10)
model = MLPWithDropout(28*28, 128,64,10, dropout_prob=0.3)
model_name = type(model).__name__
print(model_name)

#define loss
loss_function = nn.CrossEntropyLoss()

#define optimizer
lr=1e-3
# optimizer = torch.optim.RAdam(model.parameters(), lr=lr)
# optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.AdamP(model.parameters(), lr=lr)
optimizer_name = type(optimizer).__name__


#define scheduler
scheduler = None
scheduler_name = type(scheduler).__name__ if scheduler is not None else "no"

max_epoch = 10

#define tensorboard logger
run_name = f"{datetime.now().isoformat(timespec='seconds')}-{model_name}-{optimizer_name}_optim_{lr}_lr_with_{scheduler_name}_scheduler"
log_dir = f"runs/{run_name}"
writer = SummaryWriter(log_dir=log_dir)
# writer = SummaryWriter()
log_interval =100

#define wandb
project_name='fastcapmus_fashion_mnist_tutorials'
run_tags = [project_name]
wandb.init(
    project=project_name,
    name=run_name,
    tags=run_tags,
    config={"lr":lr, "model_name":model_name, "optimizer_name":optimizer_name, "scheduler_name": scheduler_name},
    reinit=True
)

# set save model path
log_model_path = os.path.join(log_dir, "models")
os.makedirs(log_model_path, exist_ok=True)

MLPWithDropout


AttributeError: module 'torch.optim' has no attribute 'AdamP'

## Early Stopping callback Object Class 정의

In [None]:
# With some modifications, source is from https://github.com/Bjarten/early-stopping-pytorch

class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.ckpt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.ckpt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None: #loss가 최소화 됐을때 save_checkpoint를 저장함
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta: #모델 성능이 더이상 개선되지 않는다 했을 때 early stopping이 됨
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience: #개선이 안됐다고 바로 하는게 아니라 조금 기다렸다가 stop함
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        
        filename = self.path.split('/')[-1]
        save_dir = os.path.dirname(self.path)
        torch.save(model, os.path.join(save_dir, f"val_loss-{val_loss}-{filename}")) #어떤 score에서 멈췄는지가 중요하기 때문에 그걸 print해줌
        self.val_loss_min = val_loss

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs/ #여기다가 log를 쌓을거고 거기 있는걸 plot해달라는 의미

#define EarlyStopping
early_stopper = EarlyStopping(
    patience=3, verbose=True, path = os.path.join(log_model_path, "model.ckpt")
)

#do train with validation
train_step = 0
for epoch in range(1, max_epoch+1):
    #validation step
    with torch.no_grad(): # optimizer가 업데이트 하면 안됨
        val_loss = 0.0
        val_corrects = 0
        model.eval()

        for val_batch_idx, (val_images, val_labels) in enumerate(
            tqdm(val_dataloader, position=0, leave=True, desc = 'validation')
        ):
            #forward
            val_outputs = model(val_images)
            _, val_preds = torch.max(val_outputs, 1)
            
            #loss & acc
            val_loss += loss_function(val_outputs, val_labels) / val_outputs.shape[0] #이게 batch size. batch size만큼 평균을 내겠다는 뜻
            val_corrects += torch.sum(val_preds == val_labels.data) / val_outputs.shape[0]
        
        #valid step logging
        val_epoch_loss = val_loss / len(val_dataloader)
        val_epoch_acc = val_corrects/len(val_dataloader)
        print(
            f"{epoch} epoch, {train_step} step: val_loss: {val_epoch_loss}, val_acc: {val_epoch_acc}" 
        )

        #tensorboard log
        writer.add_scalar("Loss/val", val_epoch_loss, train_step)
        writer.add_scalar("Acc/val", val_epoch_acc, train_step)
        writer.add_images("Images/val", val_images, train_step)

        #wandb log
        wandb.log({
            "Loss/val":val_epoch_loss,
            "Acc/val": val_epoch_acc,
            "Images/val": wandb.Image(val_images),
            "Outputs/val": wandb.Histogram(val_outputs.detach().numpy()),
            "Preds/val": wandb.Histogram(val_preds.detach().numpy()),
            "Labels/val": wandb.Histogram(val_labels.data.detach().numpy()),
        }, step=train_step)

        # check early stopping point & save model if model reached the best performance
        early_stopper(val_epoch_loss, model)
        if early_stopper.early_stop:
            break
        
        #train step
        current_loss = 0
        current_corrects = 0
        model
                



    #train step
    for batch_idx, (images, labels) in enumerate(
         tqdm(train_dataloader, position=0, leave=True, desc = 'train')
    ):
        current_loss = 0.0
        current_corrects = 0

        #get predictions
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        # print(outputs)
        # print(preds)
        
        #get loss
        loss = loss_function(outputs, labels)


        ###### 여기까지가 forward ###

        #Backpropagation

        #optimitizer 초기화(zero화)
        optimizer.zero_grad()

        #perform backward pass
        loss.backward()

        #perfrom optimization
        optimizer.step()

        current_loss +=loss.item()
        current_corrects += torch.sum(preds == labels.data)

        if train_step % log_interval == 0:
            train_loss = current_loss / log_interval
            train_acc = current_corrects/log_interval
            print(
                f"{train_step}: train_loss: {train_loss}, train_acc: {train_acc}" 
            )

            #tensorboard log
            writer.add_scalar("Loss/train", train_step)
            writer.add_scalar("Acc/train", train_step)
            writer.add_images("Images/train", images, train_step)
            writer.add_graph(model, images)
            
            # wandb log
            wandb.log({
                "Loss/train": train_loss,
                "Acc/train": train_acc,
                "Images/train": wandb.Image(images),
                "Outputs/train": wandb.Histogram(outputs.detach().cpu().numpy()),
                "Preds/train": wandb.Histogram(preds.detach().cpu().numpy()),
                "Labels/train": wandb.Histogram(labels.data.detach().cpu().numpy()),
            }, step=train_step)

            current_loss = 0
            current_corrects = 0

        train_step += 1



In [None]:
# save model
# os.makedirs("./logs/models", exist_ok=True)
# torch.save(model, os.path.join(log_model_path, "model.ckpt"))

In [None]:
log_model_path

In [None]:
# load model
loaded_model = torch.load(os.path.join(log_model_path, "val_loss-0.03366972133517265-model.ckpt"))
loaded_model.eval()
print(loaded_model)

In [None]:
def softmax(x, axis=0):
    "numpy softmax"
    max = np.max(x, axis=axis, keepdims=True)
    e_x = np.exp(x - max)
    sum = np.sum(e_x, axis = axis, keepdims=True)
    f_x = e_x / sum
    return f_x

In [None]:
test_batch_size = 100
test_dataset = FashionMNIST(data_root, download=True, train=False, transform=transforms.ToTensor())
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=1)

test_labels_list = []
test_preds_list = []
test_outputs_list = []


for i, (test_images, test_labels) in enumerate(tqdm(test_dataloader, position=0, leave=True, desc="testing")):
    #forward
    test_outputs = loaded_model(test_images)
    _, test_preds = torch.max(test_outputs, 1)

    final_outs = softmax(test_outputs.detach().numpy(), axis=1)
    test_outputs_list.extend(final_outs)
    test_preds_list.extend(test_preds.detach().numpy())
    test_labels_list.extend(test_preds.detach().numpy())

test_preds_list = np.array(test_preds_list)
test_labels_list = np.array(test_labels_list)

print(f"acc: {np.mean(test_preds_list==test_labels_list)*100}%")

In [None]:
#ROC curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

fpr={}
tpr={}
thresh={}
n_class = 10

for i in range(n_class):
    fpr[i], tpr[i], thresh[i] = roc_curve(test_labels_list, np.array(test_outputs_list)[:,i], pos_label=i)

#print(fpr)

#plot
for i in range(n_class):
    plt.plot(fpr[i], tpr[i], linestyle = '--', label=f"class{i} vs Rest")
plt.title("Multi-class ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(loc = "best")
plt.show()

print(roc_auc_score(test_labels_list, test_outputs_list, multi_class='ovo', average='macro'))