# Baseline model for Project: Skin Disease Classification

## 1. Imoport libraries, Data and preprocessing data

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, models, transforms
import wandb
from torchinfo import summary
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer

In [11]:
train_dir = "/kaggle/input/skin-disease-dataset/Skin_Disease_Dataset/Train"
val_dir = "/kaggle/input/skin-disease-dataset/Skin_Disease_Dataset/Val"
test_dir = "/kaggle/input/skin-disease-dataset/Skin_Disease_Dataset/Test"

In [12]:
image_size = (224, 224)

In [13]:
batch_size = 32

In [14]:
data_transforms = {
    "train": transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize(image_size),
    ]),
    "val": transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.Resize(image_size),
    ]),
    "test": transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(image_size),
    ]),
}

In [15]:
train_set = datasets.ImageFolder(train_dir, 
                                 transform=data_transforms['train'])

val_set = datasets.ImageFolder(val_dir, 
                                 transform=data_transforms['val'])

test_set = datasets.ImageFolder(test_dir, 
                                 transform=data_transforms['test'])

### Dataloader

In [16]:
train_loader = DataLoader(train_set, 
                          batch_size=batch_size,
                          shuffle=True, 
                          num_workers=4)

val_loader = DataLoader(val_set, 
                          batch_size=batch_size,
                          shuffle=False, 
                          num_workers= 4)

test_loader = DataLoader(test_set, 
                          batch_size=batch_size,
                          shuffle=False, 
                          num_workers=4)

In [None]:
class_names = train_set.classes
class_names

## 2. Define model Architecture

In [18]:
class MLP(pl.LightningModule):
    def __init__(self, input_size, output_size, hidden_units):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Flatten(),  
            nn.Linear(input_size, hidden_units),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_units, hidden_units),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(hidden_units, output_size),
        )
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss_fn(y_pred, y)
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        loss = self.loss_fn(y_pred, y)
        acc = (y_pred.argmax(dim=-1) == y).float().mean()
        self.log("val_loss", loss)
        self.log("val_acc", acc)
        return {"val_loss": loss, "val_acc": acc}

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_pred = self(x)
        acc = (y_pred.argmax(dim=-1) == y).float().mean()
        self.log("test_acc", acc)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer



In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(input_size=(224 * 224 * 3), output_size=10, hidden_units=256).to(device)
summary(model)

Layer (type:depth-idx)                   Param #
MLP                                      --
├─Sequential: 1-1                        --
│    └─Flatten: 2-1                      --
│    └─Linear: 2-2                       38,535,424
│    └─ReLU: 2-3                         --
│    └─Dropout: 2-4                      --
│    └─Linear: 2-5                       65,792
│    └─ReLU: 2-6                         --
│    └─Dropout: 2-7                      --
│    └─Linear: 2-8                       2,570
├─CrossEntropyLoss: 1-2                  --
Total params: 38,603,786
Trainable params: 38,603,786
Non-trainable params: 0

### Wandb config

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api_key")
wandb.login(key=secret_value_0)

In [None]:
wandb.init(project='(Baseline)MLP_Skin_Disease', name="training_MLP_0001", config={
    "learning_rate": 1e-3,
    "batch_size": 32,
    "optim": "Adam",
    "epochs": 100
    }
)
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",  
    dirpath="/kaggle/working/",  
    filename="checkpoint_0001_Adam"
)

wandb_logger = WandbLogger()
trainer = Trainer(
    max_epochs=100,
    callbacks=[checkpoint_callback],  
    logger=wandb_logger 
)


## 3. Train the model

In [5]:
device

device(type='cuda')

In [None]:
trainer.fit(model, train_loader, val_loader)

## 4. Testing

In [19]:
checkpoint_path = "/kaggle/working/checkpoint_0001_Adam.ckpt"

# Load model từ checkpoint
model = MLP.load_from_checkpoint(
    checkpoint_path, 
    input_size=224 * 224 * 3,  # Kích thước đầu vào
    output_size=10,            # Số lớp đầu ra (10 nhãn)
    hidden_units=256           # Số hidden units (sửa theo cấu hình đã dùng)
)


In [20]:
trainer = Trainer()
trainer.test(model, dataloaders=test_loader)

  self.pid = os.fork()


Testing: |          | 0/? [00:00<?, ?it/s]

  self.pid = os.fork()


[{'test_acc': 0.3064901530742645}]