In [77]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torchvision.transforms as T
from torchvision.transforms import InterpolationMode

import pytorch_lightning
from monai.transforms import (
    Activations,
)

from monai.data import Dataset, DataLoader
from pathlib import Path
import torch
import numpy as np
from pytorch_lightning.callbacks import ModelCheckpoint
from torch.nn import BCEWithLogitsLoss
from torchmetrics import F1Score
from torch.optim.lr_scheduler import SequentialLR, LambdaLR, StepLR, SequentialLR
import ssl

from random import shuffle
import os
import random

from torchmetrics.classification import BinaryAUROC

import glob
import cv2
from skimage.filters import threshold_otsu
from scipy.stats import kurtosis, skew

import scipy
import scipy.ndimage as ndi

from torch.nn import CrossEntropyLoss
from torchmetrics.classification import F1Score

In [78]:
class Net(pytorch_lightning.LightningModule):
    def __init__(self, model, optimizer, scheduler, train_loader, val_loader):
        super().__init__()
        self._model = model
        self._optimizer = optimizer
        self._scheduler = scheduler
        self.train_loader = train_loader
        self.val_loader = val_loader

        self.loss_function = CrossEntropyLoss()
        self.metric = F1Score(task='multiclass', num_classes=34)

    def forward(self, x):
        return self._model(x)

    def train_dataloader(self):
        return self.train_loader

    def val_dataloader(self):
        return self.val_loader

    def configure_optimizers(self):
        return {'optimizer': self._optimizer, 'lr_scheduler': self._scheduler}

    def training_step(self, batch, batch_idx):
        images, labels = batch
        output = self.forward(images)
        loss = self.loss_function(output, labels)
        self.log_dict({"training_loss": loss})
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        output = self.forward(images)
        loss = self.loss_function(output, labels)
        preds = torch.argmax(output, dim=1)
        metric = self.metric(preds, labels)
        self.log_dict({"f1": metric, "val_loss": loss})
        return {"loss": loss}

In [79]:
class_labels = [
    'BC', 'BC K.M.skab', 'CP1', 'CP3', 'CP4', 'CP6', 'Kabeldon CDC420',
    'Kabeldon CDC440', 'Kabeldon CDC460', 'Kabeldon KSIP423',
    'Kabeldon KSIP433', 'Kabeldon KSIP443', 'Kabeldon KSIP463',
    'Kabeldon KSIP483', 'Kombimodul 2M', 'Kombimodul 3M',
    'Kombimodul 4M', 'KSE09', 'KSE12', 'KSE15', 'KSE18', 'KSE21',
    'KSE27', 'KSE36', 'KSE45', 'MEL1', 'MEL2', 'MEL3', 'MEL4', 'NU',
    'PK20', 'PK35', 'PK48', 'SC'
]

label_to_idx = {label: idx for idx, label in enumerate(class_labels)}


In [80]:
class ImageDataset(Dataset):
    def __init__(self, file_list, labels=None, transform=None, label_to_idx=None):
        self.file_list = file_list
        self.labels = labels
        self.transform = transform
        self.label_to_idx = label_to_idx  # ✅ add this line

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        if self.labels is not None:
            label_str = self.labels[idx]
            label = torch.tensor(self.label_to_idx[label_str])  # ✅ use label_to_idx
            return img, label
        else:
            return img


In [81]:
train_transforms = T.Compose([
    T.Resize(256, interpolation=InterpolationMode.BICUBIC),
    T.RandomResizedCrop(224),
    T.RandomHorizontalFlip(),
    T.RandomVerticalFlip(),
    T.RandomRotation(20),
    T.GaussianBlur(kernel_size=(7, 13), sigma=(0.1, 1.0)),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

val_transforms = T.Compose([
    T.Resize(256, interpolation=InterpolationMode.BICUBIC),
    T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

In [82]:
def get_train_valid_split():
    '''
    Creates train/valid split for cnn. 
    Return train_paths, valid_paths, train_labels, valid_labels.
    '''
    
    base_dir = 'C:/Users/Christian/Desktop/N1_data/image_data'
    train_csv = 'C:/Users/Christian/Documents/Cand_merc/n1_masters_thesis/data/df_true2.csv'
    
    
    df_train = pd.read_csv(train_csv)
    
    
    df_train = df_train[['file_name', 'label']]
    
    df_train['file_name'] = df_train['file_name'].apply(lambda x: os.path.join(base_dir, x))
    all_image_paths = df_train['file_name'].values
    all_labels = df_train['label'].values
    train_paths, val_paths, train_labels, val_labels = train_test_split(all_image_paths, 
                                                                        all_labels, test_size=0.05,        
                                                                        random_state=43,
                                                                        shuffle=False)
    return train_paths, val_paths, train_labels, val_labels

In [83]:
def create_dataloaders(train_paths, val_paths, train_labels, val_labels):
    """
    Returns train and valid dataloader.
    """
    batch_size = 32
    train_data = ImageDataset(train_paths, train_labels, transform=train_transforms, label_to_idx=label_to_idx)
    val_data   = ImageDataset(val_paths,   val_labels,   transform=val_transforms, label_to_idx=label_to_idx)
    train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True,  num_workers=0)
    val_loader   = DataLoader(dataset=val_data,   batch_size=batch_size, shuffle=False, num_workers=0)
    return train_loader, val_loader

In [84]:
def create_model_optimizer_scheduler():
    """
    Returns model, optimizer and scheduler
    """

    model = models.convnext_base(weights="DEFAULT")
    
    for param in model.features.parameters():
        param.requires_grad = False
    
    for param in model.features[-2:].parameters(): 
        param.requires_grad = True
    
    model.classifier = nn.Sequential(
        nn.AdaptiveAvgPool2d((1, 1)),  
        nn.Flatten(),                  
        nn.BatchNorm1d(1024),          
        nn.Linear(1024, 512),          
        nn.ReLU(),                     
        nn.Dropout(0.4),               
        nn.Linear(512, 34)             
    )
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    optimizer = torch.optim.AdamW([
        {'params': model.features[-2:].parameters(), 'lr': 1e-5},
        {'params': model.classifier.parameters(), 'lr': 1e-4}     
    ])
    
    scheduler = StepLR(optimizer, step_size=5, gamma=0.7)

    return model, optimizer, scheduler

In [85]:
def train_model(model, optimizer, scheduler, train_loader, val_loader):
    """
    Trains model
    """
    net = Net(model, optimizer, scheduler, train_loader, val_loader)
    trainer = pytorch_lightning.Trainer(
            devices=1,
            accelerator='cpu',
            max_epochs=5,
            enable_checkpointing=True,
            num_sanity_val_steps=1,
            log_every_n_steps=16,
            callbacks=[ModelCheckpoint(dirpath='models/', filename='{f1:.2f}_{epoch}', monitor='f1', mode='max', save_top_k=1)]
        )
    trainer.fit(net)
    return net

In [86]:
train_paths, val_paths, train_labels, val_labels = get_train_valid_split()

train_loader, val_loader = create_dataloaders(train_paths, val_paths, train_labels, val_labels)

model, optimizer, scheduler = create_model_optimizer_scheduler()

net = train_model(model, optimizer, scheduler, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type              | Params | Mode 
------------------------------------------------------------
0 | _model        | ConvNeXt          | 88.1 M | train
1 | loss_function | CrossEntropyLoss  | 0      | train
2 | metric        | MulticlassF1Score | 0      | train
------------------------------------------------------------
28.0 M    Trainable params
60.1 M    Non-trainable params
88.1 M    Total params
352.435   Total estimated model params size (MB)
389       Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\Christian\Documents\Cand_merc\n1_masters_thesis\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.


                                                                           

c:\Users\Christian\Documents\Cand_merc\n1_masters_thesis\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=5` in the `DataLoader` to improve performance.


Epoch 0:  87%|████████▋ | 218/251 [18:25<02:47,  0.20it/s, v_num=9]


Detected KeyboardInterrupt, attempting graceful shutdown ...


NameError: name 'exit' is not defined

                                                                   