## Importing libraries
```



In [1]:
pip install pytorch-lightning --q

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install timm --q

Note: you may need to restart the kernel to use updated packages.


In [3]:
import argparse
import os
from pprint import pprint

import timm
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
#from pytorch_lightning.utilities.seed import seed_everything
from lightning_fabric.utilities.seed import seed_everything
from torch.utils.data import DataLoader,Dataset
from torchmetrics import Accuracy , F1Score , CohenKappa, Recall, Precision, AUROC, MatthewsCorrCoef, ConfusionMatrix
from torchvision.datasets import ImageFolder
from sklearn.model_selection import train_test_split
import pandas as pd
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import pytorch_lightning as pl



## Set Variables

### get_optimizer()-get_lr_scheduler_config() ----Variables

In [4]:
pl.seed_everything(42)

Seed set to 42


42

In [5]:
OPT = 'adam'  # adam, sgd, adamp eklenecek
WEIGHT_DECAY = 0.0001
MOMENTUM = 0.9  # only when OPT is sgd
BASE_LR = 0.001
LR_SCHEDULER = 'step'  # step, multistep, reduce_on_plateau
LR_DECAY_RATE = 0.1
LR_STEP_SIZE = 5  # only when LR_SCHEDULER is step
LR_STEP_MILESTONES = [10, 15]  # only when LR_SCHEDULER is multistep

### CustomDataset(Dataset)- getdataloaders() train_test_split ----Variables

In [6]:
image_column = 'image_id'
label_column =  'label'
img_size = 256
batch_size = 128
num_workers = 2
pin_memory = True
seed = 42

In [7]:
!pwd

/home/ccipfm/Downloads


### SimpleModel(LightningModule)---- Variable

In [8]:
model_name = "densenetblur121d"

pretrained = True
num_classes = 2

###Trainer get_trainer() -----Variable

In [9]:
max_epochs = 30
min_epochs = 5
use_gpu = True
checkpoint_dir = "checkpoints"
monitor_metric = "val/loss"

##Classes and Functions

In [10]:
def get_optimizer(parameters) -> torch.optim.Optimizer:
    if OPT == 'adam':
        optimizer = torch.optim.Adam(parameters, lr=BASE_LR, weight_decay=WEIGHT_DECAY)
    elif OPT == 'sgd':
        optimizer = torch.optim.SGD(
            parameters, lr=BASE_LR, weight_decay=WEIGHT_DECAY, momentum=MOMENTUM
        )
    else:
        raise NotImplementedError()

    return optimizer

In [11]:
def get_lr_scheduler_config(optimizer: torch.optim.Optimizer) -> dict:
    if LR_SCHEDULER == 'step':
        scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer, step_size=LR_STEP_SIZE, gamma=LR_DECAY_RATE
        )
        lr_scheduler_config = {
            'scheduler': scheduler,
            'interval': 'epoch',
            'frequency': 1,
        }
    elif LR_SCHEDULER == 'multistep':
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=LR_STEP_MILESTONES, gamma=LR_DECAY_RATE
        )
        lr_scheduler_config = {
            'scheduler': scheduler,
            'interval': 'epoch',
            'frequency': 1,
        }
    elif LR_SCHEDULER == 'reduce_on_plateau':
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='max', factor=0.1, patience=10, threshold=0.0001
        )
        lr_scheduler_config = {
            'scheduler': scheduler,
            'monitor': 'val/loss',
            'interval': 'epoch',
            'frequency': 1,
        }
    else:
        raise NotImplementedError

    return lr_scheduler_config

In [12]:
class CustomDataset(Dataset):
    def __init__(self, df, root_dir, img_size: int | tuple = 112,grayscale=True):
        self.df = df
        self.root_dir = root_dir
        # self.img_size = img_size
        # self.mean = [0.485, 0.456, 0.406]
        # self.std = [0.229, 0.224, 0.225]
        # self.transform = transforms.Compose([
        #     transforms.Resize(self.img_size),
        #     transforms.ToTensor(),
        #     transforms.Normalize(mean=self.mean, std=self.std),
        # ])
        if grayscale:
            self.transform = transforms.Compose([transforms.Grayscale(num_output_channels=3),
                                                      transforms.Resize((img_size, img_size)),
                                                      transforms.ToTensor()])
        else:
            self.transform = transforms.Compose([transforms.Resize((img_size, img_size)),
                                                      transforms.ToTensor()])
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, f"{self.df[image_column].iloc[idx]}.png")
        image = Image.open(img_name)
        image = self.transform(image)
        label = self.df[label_column].iloc[idx]
        return image, label

In [13]:
def get_data_loaders(train_df, val_df, test_df, root_dir, img_size, batch_size, num_workers, grayscale=True,pin_memory = True):
    train_loader = DataLoader(
        CustomDataset(train_df, root_dir, img_size=img_size, grayscale=grayscale),
        batch_size=batch_size,
        pin_memory = True,
        shuffle=False,
        drop_last=True,
        num_workers=num_workers
    )

    val_loader = DataLoader(
        CustomDataset(val_df, root_dir, img_size=img_size, grayscale=grayscale),
        batch_size=batch_size,
        pin_memory = True,
        shuffle=False,
        drop_last=False,
        num_workers=num_workers
    )

    test_loader = DataLoader(
        CustomDataset(test_df, root_dir, img_size=img_size, grayscale=grayscale),
        batch_size=batch_size,
        pin_memory = True,
        shuffle=False,
        drop_last=False,
        num_workers=num_workers
    )

    return train_loader, val_loader, test_loader

In [14]:
class SimpleModel(LightningModule):
    def __init__(
        self,
        model_name: str = 'resnet18',
        pretrained: bool = False,
        num_classes: int | None = None,
        class_weights: float | None = None,
    ):
        super().__init__()
        self.save_hyperparameters()
        self.model = timm.create_model(
            model_name=model_name, pretrained=pretrained, num_classes=num_classes
        )
        self.class_weights = class_weights

        self.train_loss = nn.CrossEntropyLoss(weight=class_weights)
        self.train_f1 = F1Score(task="binary")
        self.train_acc = Accuracy(task="binary")
        self.train_auc = AUROC(task="binary")
        #self.train_mcc = MatthewsCorrCoef(task="binary")
        self.train_recall = Recall(task="binary")
        self.train_prec = Precision(task="binary")
        self.train_kappa = CohenKappa(task="binary")


        self.val_loss = nn.CrossEntropyLoss(weight=class_weights)
        self.val_f1 = F1Score(task="binary")
        self.val_acc = Accuracy(task="binary")
        self.val_auc = AUROC(task="binary")
        #self.val_mcc = MatthewsCorrCoef(task="binary")
        self.val_recall = Recall(task="binary")
        self.val_prec = Precision(task="binary")
        self.val_kappa = CohenKappa(task="binary")


        self.test_loss = nn.CrossEntropyLoss(weight=class_weights)
        self.test_f1 = F1Score(task="binary")
        self.test_acc = Accuracy(task="binary")
        self.test_auc = AUROC(task="binary")
        #self.test_mcc = MatthewsCorrCoef(task="binary")
        self.test_recall = Recall(task="binary")
        self.test_prec = Precision(task="binary")
        self.test_kappa = CohenKappa(task="binary")

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, target = batch

        out = self(x)
        _, pred = out.max(1)

        loss = self.train_loss(out, target)
        f1 = self.train_f1(pred,target)
        acc = self.train_acc(pred, target)
        auc = self.train_auc(pred,target)
        #mcc = self.train_mcc(pred,target)
        recall = self.train_recall(pred,target)
        prec = self.train_prec(pred,target)
        kappa = self.train_kappa(pred,target)

        #self.log_dict({'train/loss': loss,'train/f1':f1,  'train/acc': acc,'train/auc':auc,'train/mcc':mcc,'train/recall':recall,'train/prec':prec,'train/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)
        self.log_dict({'train/loss': loss,'train/f1':f1,  'train/acc': acc,'train/auc':auc,'train/recall':recall,'train/prec':prec,'train/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)

        return loss

    def validation_step(self, batch, batch_idx):
        x, target = batch

        out = self(x)
        _, pred = out.max(1)

        loss = self.val_loss(out, target)
        f1 = self.val_f1(pred,target)
        acc = self.val_acc(pred, target)
        auc = self.val_auc(pred,target)
        #mcc = self.val_mcc(pred,target)
        recall = self.val_recall(pred,target)
        prec = self.val_prec(pred,target)
        kappa = self.val_kappa(pred,target)

        #self.log_dict({'val/loss': loss,'val/f1':f1,  'val/acc': acc,'val/auc':auc,'val/mcc':mcc,'val/recall':recall,'val/prec':prec,'val/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)
        self.log_dict({'val/loss': loss,'val/f1':f1,  'val/acc': acc,'val/auc':auc,'val/recall':recall,'val/prec':prec,'val/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)


    def test_step(self, batch, batch_idx):
        x, target = batch

        out = self(x)
        _, pred = out.max(1)

        loss = self.test_loss(out, target)
        f1 = self.test_f1(pred,target)
        acc = self.test_acc(pred, target)
        auc = self.test_auc(pred,target)
        #mcc = self.test_mcc(pred,target)
        recall = self.test_recall(pred,target)
        prec = self.test_prec(pred,target)
        kappa = self.test_kappa(pred,target)

        #self.log_dict({'test/loss': loss,'test/f1':f1,  'test/acc': acc,'test/auc':auc,'test/mcc':mcc,'test/recall':recall,'test/prec':prec,'test/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)
        self.log_dict({'test/loss': loss,'test/f1':f1,  'test/acc': acc,'test/auc':auc,'test/recall':recall,'test/prec':prec,'test/kappa':kappa}, prog_bar=True,on_epoch=True,logger=True)

    def predict_step(self, batch, batch_idx):
        x, target = batch

        out = self(x)
        _, pred = out.max(1)

        return pred, target

    def configure_optimizers(self):
        optimizer = get_optimizer(self.parameters())
        lr_scheduler_config = get_lr_scheduler_config(optimizer)
        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler_config}

In [15]:
"""def get_callbacks(checkpoint_dir, monitor_metric="val/loss"):
    early_stopping_callback = EarlyStopping(
        monitor=monitor_metric,
        patience=5,
        mode="min"
    )

    checkpoint_callback = ModelCheckpoint(
        dirpath=checkpoint_dir,
        filename="checkpoint/{epoch:02d}-{val/loss:.4f}-{val/f1:.4f}",
        save_top_k=1,
        verbose=True,
        monitor=monitor_metric,
        mode="min"
    )
    """

'def get_callbacks(checkpoint_dir, monitor_metric="val/loss"):\n    early_stopping_callback = EarlyStopping(\n        monitor=monitor_metric,\n        patience=5,\n        mode="min"\n    )\n\n    checkpoint_callback = ModelCheckpoint(\n        dirpath=checkpoint_dir,\n        filename="checkpoint/{epoch:02d}-{val/loss:.4f}-{val/f1:.4f}",\n        save_top_k=1,\n        verbose=True,\n        monitor=monitor_metric,\n        mode="min"\n    )\n    '

In [16]:
def get_trainer(max_epochs=10, min_epochs=5, use_gpu=True, checkpoint_dir="checkpoints", monitor_metric="val/loss",device=1):
    accelerator = "gpu" if use_gpu else "auto"
    device = 1
    precision = '16-mixed' or 'bf16-mixed'
    #callbacks = get_callbacks(checkpoint_dir, monitor_metric)

    trainer = pl.Trainer(
        max_epochs=max_epochs,
        min_epochs=min_epochs,
        accelerator=accelerator,
        devices=[2],
        callbacks=[ModelCheckpoint(
                            dirpath ="checkpoints",
                            filename="checkpoint/{epoch:02d}-{val/loss:.4f}-{val/f1:.4f}",
                            save_top_k =1,
                            verbose = True,
                            monitor = "val/f1",
                            mode = "max"),

                                    EarlyStopping(monitor='val/f1', patience=5, mode="max")]
                         ,
        precision=precision
    )
    return trainer

In [17]:
df = pd.read_csv('/media/ccipfm/primerData/MTLP_Dataset/MTLP_Dataset.csv',usecols=['image_id','label'])
root_dir = '/media/ccipfm/primerData/MTLP_Dataset/MTLP_Screenshots'

In [19]:
val_size = 0.15
test_size = 0.3

In [20]:
train_df, temp_df = train_test_split(df, test_size=val_size + test_size, random_state=seed,stratify=df[label_column]) #split dataframe to train,test,val
val_df, test_df = train_test_split(temp_df, test_size=test_size / (val_size + test_size), random_state=seed)

In [21]:
train_loader, val_loader, test_loader = get_data_loaders(train_df, val_df,test_df, root_dir, img_size=img_size, batch_size=batch_size, num_workers=num_workers, pin_memory = pin_memory)

In [22]:
class_weights = (1 - (df[label_column].value_counts().sort_index() / len(df))).values #calculate class weight
class_weights = torch.from_numpy(class_weights).float().to("cuda")

In [23]:
model = SimpleModel(
        model_name=model_name, pretrained=pretrained, num_classes=num_classes, class_weights=class_weights
    )

In [24]:
trainer = get_trainer(max_epochs=max_epochs, min_epochs=min_epochs, use_gpu=use_gpu, checkpoint_dir=checkpoint_dir, monitor_metric=monitor_metric)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [25]:
trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)

You are using a CUDA device ('NVIDIA RTX A4000') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
2024-08-07 16:23:42.623950: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-07 16:23:43.055863: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
/home/ccipfm/.loc

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0, global step 310: 'val/f1' reached 0.83848 (best 0.83848), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=00-val/loss=0.3125-val/f1=0.8385.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1, global step 620: 'val/f1' reached 0.87548 (best 0.87548), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=01-val/loss=0.2536-val/f1=0.8755.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2, global step 930: 'val/f1' reached 0.87665 (best 0.87665), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=02-val/loss=0.2610-val/f1=0.8766.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3, global step 1240: 'val/f1' reached 0.90353 (best 0.90353), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=03-val/loss=0.2081-val/f1=0.9035.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4, global step 1550: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5, global step 1860: 'val/f1' reached 0.93448 (best 0.93448), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=05-val/loss=0.1506-val/f1=0.9345.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6, global step 2170: 'val/f1' reached 0.93709 (best 0.93709), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=06-val/loss=0.1574-val/f1=0.9371.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7, global step 2480: 'val/f1' reached 0.93914 (best 0.93914), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=07-val/loss=0.1712-val/f1=0.9391.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 8, global step 2790: 'val/f1' reached 0.94005 (best 0.94005), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=08-val/loss=0.1848-val/f1=0.9401.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 9, global step 3100: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 10, global step 3410: 'val/f1' reached 0.94079 (best 0.94079), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=10-val/loss=0.1962-val/f1=0.9408.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 11, global step 3720: 'val/f1' reached 0.94103 (best 0.94103), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=11-val/loss=0.1981-val/f1=0.9410.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 12, global step 4030: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 13, global step 4340: 'val/f1' reached 0.94149 (best 0.94149), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=13-val/loss=0.2042-val/f1=0.9415.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 14, global step 4650: 'val/f1' reached 0.94178 (best 0.94178), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=14-val/loss=0.2076-val/f1=0.9418.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 15, global step 4960: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 16, global step 5270: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 17, global step 5580: 'val/f1' reached 0.94188 (best 0.94188), saving model to '/home/ccipfm/Downloads/checkpoints/checkpoint/epoch=17-val/loss=0.2088-val/f1=0.9419.ckpt' as top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 18, global step 5890: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 19, global step 6200: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 20, global step 6510: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 21, global step 6820: 'val/f1' was not in top 1


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 22, global step 7130: 'val/f1' was not in top 1


In [26]:
trainer.test(model, dataloaders=test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test/loss': 0.20670229196548462,
  'test/f1': 0.9431433081626892,
  'test/acc': 0.9528219103813171,
  'test/auc': 0.9528986811637878,
  'test/recall': 0.9535622596740723,
  'test/prec': 0.9339141845703125,
  'test/kappa': 0.9024006128311157}]

In [27]:
def evaluate_model(hist):
  preds = []
  target = []
  for i in range(len(hist)):
    preds.append(hist[i][0])
    target.append(hist[i][1])

  preds = torch.cat(preds)
  target = torch.cat(target)

  f1 = F1Score(task="binary", num_classes=num_classes)
  f1_score_neural = f1(preds, target)

  accuracy = Accuracy(task="binary", num_classes=num_classes)
  accuracy_neural = accuracy(preds, target)

  confmat = ConfusionMatrix(task="binary", num_classes=num_classes)
  conf_matrix_neural = confmat(preds, target)
  return f1_score_neural, accuracy_neural, conf_matrix_neural

In [28]:
hist = trainer.predict(model,dataloaders=test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Predicting: |          | 0/? [00:00<?, ?it/s]

In [29]:
with open("hist.txt", "w") as text_file:
    text_file.write(str(hist))

In [30]:
f1_score_neural,accuracy_neural,conf_matrix_neural = evaluate_model(hist)
print("Test Set F1-Score (Neural Network):", f1_score_neural)
print("Test Set Accuracy (Neural Network):", accuracy_neural)
print("Confusion Matrix (Neural Network):\n", conf_matrix_neural)


Test Set F1-Score (Neural Network): tensor(0.9439)
Test Set Accuracy (Neural Network): tensor(0.9528)
Confusion Matrix (Neural Network):
 tensor([[12070,   607],
        [  417,  8611]])


In [38]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [39]:
 %reload_ext tensorboard

In [40]:
%tensorboard --logdir=/path/to/logs --port=8888

Reusing TensorBoard on port 8888 (pid 1113806), started 13:40:08 ago. (Use '!kill 1113806' to kill it.)

In [41]:
!kill 154306

/bin/bash: line 1: kill: (154306) - No such process


##HyperParameter(TOP3)