Split Folder

In [43]:
import os
import shutil
import random
from collections import defaultdict

DATASET_DIR = "./dataset"
OUTPUT_DIR = "./splitted_dataset"
RATIOS = (0.7, 0.15, 0.15)
SEED = 1337

random.seed(SEED)

def get_patient_id(fname):
    return "_".join(fname.split("_")[:2])  # OAS1_0028_MR1

# --------------------------------------------------
# 1. Kumpulkan file per (class, patient)
# --------------------------------------------------
patients_per_class = defaultdict(lambda: defaultdict(list))

for cls in os.listdir(DATASET_DIR):
    cls_path = os.path.join(DATASET_DIR, cls)
    if not os.path.isdir(cls_path):
        continue

    for fname in os.listdir(cls_path):
        pid = get_patient_id(fname)
        patients_per_class[cls][pid].append(fname)

# --------------------------------------------------
# 2. Split patient secara stratified per class
# --------------------------------------------------
splits = {"train": [], "val": [], "test": []}

for cls, patient_dict in patients_per_class.items():
    patient_ids = list(patient_dict.keys())
    random.shuffle(patient_ids)

    n = len(patient_ids)

    if n < 2:
        print(f"[WARNING] Class '{cls}' hanya punya {n} patient. Tidak bisa split masuk akal.")
        splits["train"].extend([(cls, pid) for pid in patient_ids])
        continue

    n_train = max(1, int(RATIOS[0] * n))
    n_val   = max(1, int(RATIOS[1] * n)) if n >= 3 else 0

    if n_train + n_val >= n:
        n_train = n - 1
        n_val = 0

    train_ids = patient_ids[:n_train]
    val_ids   = patient_ids[n_train:n_train + n_val]
    test_ids  = patient_ids[n_train + n_val:]

    splits["train"].extend([(cls, pid) for pid in train_ids])
    splits["val"].extend([(cls, pid) for pid in val_ids])
    splits["test"].extend([(cls, pid) for pid in test_ids])

# --------------------------------------------------
# 3. Copy atau move file
# --------------------------------------------------
for split, items in splits.items():
    for cls, pid in items:
        out_dir = os.path.join(OUTPUT_DIR, split, cls)
        os.makedirs(out_dir, exist_ok=True)

        for fname in patients_per_class[cls][pid]:
            src = os.path.join(DATASET_DIR, cls, fname)
            dst = os.path.join(out_dir, fname)
            shutil.copy2(src, dst)  # ganti ke move() kalau mau cepat


# Importing & Configuration

In [44]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, f1_score
import copy
import timm
from tqdm import tqdm
from ultralytics import YOLO

In [45]:
DATASET_DIR = "./splitted_dataset"
BATCH_SIZE = 32
NUM_CLASSES = 4
EPOCH = 20
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Data Transformers

In [46]:
data_transforms = {
    "train": transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
    ]),
}

# Data Loading

In [47]:
image_datasets = {x: datasets.ImageFolder(f"{DATASET_DIR}/{x}", data_transforms[x]) for x in ['train','val']}
dataloaders = {x: DataLoader(image_datasets[x],batch_size=BATCH_SIZE,shuffle=True, num_workers=2) for x in ['train','val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','val']}
class_names = image_datasets['train'].classes

print(class_names)
print(dataset_sizes)

['Mild Dementia', 'Moderate Dementia', 'Non Demented', 'Very mild Dementia']
{'train': 60573, 'val': 12261}


# Preprocessing

In [48]:
y_train = image_datasets['train'].targets
class_counts = np.bincount(y_train)
total_samples = len(y_train)
class_weights = total_samples / (len(class_names) * class_counts)
class_weights = torch.FloatTensor(class_weights).to(DEVICE)

# Training

Training Function

In [49]:
def train_model(model:nn.Module,model_name:str,criterion:nn.CrossEntropyLoss, optimizer:torch.optim.Optimizer, num_epochs = EPOCH):
    best_f1 = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())
    history = {'train_loss':[],'val_f1':[]}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print("-"*10)

        for phase in ['train','val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            
            running_loss = 0.0
            all_preds = []
            all_labels = []

            pbar = tqdm(dataloaders[phase], desc=f'Epoch {epoch+1} {phase}')

            for inputs, labels in pbar:
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs,labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                pbar.set_postfix({'loss':loss.item()})
                running_loss += loss.item() * inputs.size(0)
                preds = torch.argmax(outputs, dim = 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_f1 = f1_score(all_labels,all_preds,average='macro')

            print(f'{phase} Loss: {epoch_loss:.4f} F1: {epoch_f1:.4f}')

            if phase == 'val' and epoch_f1 > best_f1:
                best_f1 = epoch_f1
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(),f'best_models/{model_name}_best.pth')
                print(f'New best model is saved with F1 : {best_f1:.4f}')

    model.load_state_dict(best_model_wts)
    return model


In [50]:
model_to_test = ['resnetv2_50.a1h_in1k', 'convnext_tiny','xception']

In [None]:
for model_name in model_to_test:
    print(f"{'='*10}")
    print(f"{model_name}'s Training")
    print(f'{'='*10}')

    model = timm.create_model(model_name,pretrained=True,num_classes = NUM_CLASSES)
    model = model.to(DEVICE)

    criteration = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
    train_model(model,model_name,criteration,optimizer,num_epochs=EPOCH)

resnetv2_50.a1h_in1k's Training
Epoch 1/20
----------


Epoch 1 train: 100%|██████████| 1893/1893 [04:54<00:00,  6.43it/s, loss=0.503]


train Loss: 0.9214 F1: 0.4735


Epoch 1 val: 100%|██████████| 384/384 [01:34<00:00,  4.04it/s, loss=5.43]


val Loss: 5.9803 F1: 0.0994
New best model is saved with F1 : 0.0994
Epoch 2/20
----------


Epoch 2 train: 100%|██████████| 1893/1893 [05:29<00:00,  5.75it/s, loss=0.435] 


train Loss: 0.3411 F1: 0.7495


Epoch 2 val: 100%|██████████| 384/384 [01:16<00:00,  4.99it/s, loss=10.5]


val Loss: 8.9595 F1: 0.0733
Epoch 3/20
----------


Epoch 3 train: 100%|██████████| 1893/1893 [06:01<00:00,  5.24it/s, loss=0.143]  


train Loss: 0.1544 F1: 0.8942


Epoch 3 val: 100%|██████████| 384/384 [01:08<00:00,  5.57it/s, loss=7.55]


val Loss: 9.4633 F1: 0.0936
Epoch 4/20
----------


Epoch 4 train: 100%|██████████| 1893/1893 [05:55<00:00,  5.33it/s, loss=0.082]  


train Loss: 0.0864 F1: 0.9297


Epoch 4 val: 100%|██████████| 384/384 [00:40<00:00,  9.37it/s, loss=11.6]


val Loss: 10.8881 F1: 0.0921
Epoch 5/20
----------


Epoch 5 train: 100%|██████████| 1893/1893 [05:59<00:00,  5.27it/s, loss=0.00879] 


train Loss: 0.0440 F1: 0.9716


Epoch 5 val: 100%|██████████| 384/384 [00:41<00:00,  9.24it/s, loss=13.2]


val Loss: 11.8324 F1: 0.0720
Epoch 6/20
----------


Epoch 6 train: 100%|██████████| 1893/1893 [06:07<00:00,  5.15it/s, loss=0.000732]


train Loss: 0.0375 F1: 0.9639


Epoch 6 val: 100%|██████████| 384/384 [00:45<00:00,  8.36it/s, loss=16.2]


val Loss: 13.3825 F1: 0.0960
Epoch 7/20
----------


Epoch 7 train: 100%|██████████| 1893/1893 [05:35<00:00,  5.64it/s, loss=0.0646]  


train Loss: 0.0212 F1: 0.9857


Epoch 7 val: 100%|██████████| 384/384 [01:28<00:00,  4.32it/s, loss=16.9]


val Loss: 15.6232 F1: 0.0774
Epoch 8/20
----------


Epoch 8 train: 100%|██████████| 1893/1893 [04:58<00:00,  6.35it/s, loss=0.01]    


train Loss: 0.0200 F1: 0.9877


Epoch 8 val: 100%|██████████| 384/384 [01:24<00:00,  4.56it/s, loss=18.5]


val Loss: 17.7066 F1: 0.1168
New best model is saved with F1 : 0.1168
Epoch 9/20
----------


Epoch 9 train: 100%|██████████| 1893/1893 [04:52<00:00,  6.47it/s, loss=0.0028]  


train Loss: 0.0169 F1: 0.9879


Epoch 9 val: 100%|██████████| 384/384 [00:33<00:00, 11.61it/s, loss=16.5]


val Loss: 16.9765 F1: 0.1017
Epoch 10/20
----------


Epoch 10 train: 100%|██████████| 1893/1893 [04:49<00:00,  6.55it/s, loss=0.0034]  


train Loss: 0.0246 F1: 0.9537


Epoch 10 val: 100%|██████████| 384/384 [00:30<00:00, 12.74it/s, loss=14.6]


val Loss: 17.7500 F1: 0.1006
Epoch 11/20
----------


Epoch 11 train: 100%|██████████| 1893/1893 [04:43<00:00,  6.67it/s, loss=0.0024]  


train Loss: 0.0070 F1: 0.9961


Epoch 11 val: 100%|██████████| 384/384 [00:31<00:00, 12.35it/s, loss=19]  


val Loss: 18.1486 F1: 0.0893
Epoch 12/20
----------


Epoch 12 train: 100%|██████████| 1893/1893 [04:45<00:00,  6.64it/s, loss=0.00154] 


train Loss: 0.0162 F1: 0.9909


Epoch 12 val: 100%|██████████| 384/384 [01:29<00:00,  4.28it/s, loss=16.8]


val Loss: 18.2435 F1: 0.0952
Epoch 13/20
----------


Epoch 13 train: 100%|██████████| 1893/1893 [04:50<00:00,  6.51it/s, loss=0.000288]


train Loss: 0.0075 F1: 0.9956


Epoch 13 val: 100%|██████████| 384/384 [01:56<00:00,  3.29it/s, loss=20.1]


val Loss: 19.7050 F1: 0.1116
Epoch 14/20
----------


Epoch 14 train: 100%|██████████| 1893/1893 [07:39<00:00,  4.12it/s, loss=0.00194] 


train Loss: 0.0111 F1: 0.9944


Epoch 14 val: 100%|██████████| 384/384 [01:29<00:00,  4.27it/s, loss=17.8]


val Loss: 19.6893 F1: 0.1132
Epoch 15/20
----------


Epoch 15 train: 100%|██████████| 1893/1893 [06:17<00:00,  5.02it/s, loss=0.0001]  


train Loss: 0.0081 F1: 0.9949


Epoch 15 val: 100%|██████████| 384/384 [00:45<00:00,  8.43it/s, loss=22]  


val Loss: 20.4493 F1: 0.1008
Epoch 16/20
----------


Epoch 16 train: 100%|██████████| 1893/1893 [06:10<00:00,  5.11it/s, loss=0.00213] 


train Loss: 0.0053 F1: 0.9957


Epoch 16 val: 100%|██████████| 384/384 [00:45<00:00,  8.44it/s, loss=24.1]


val Loss: 22.3011 F1: 0.1368
New best model is saved with F1 : 0.1368
Epoch 17/20
----------


Epoch 17 train: 100%|██████████| 1893/1893 [05:47<00:00,  5.44it/s, loss=1.66e-5] 


train Loss: 0.0112 F1: 0.9881


Epoch 17 val: 100%|██████████| 384/384 [00:37<00:00, 10.38it/s, loss=12.8]


val Loss: 21.2989 F1: 0.1034
Epoch 18/20
----------


Epoch 18 train: 100%|██████████| 1893/1893 [04:42<00:00,  6.69it/s, loss=0.000302]


train Loss: 0.0070 F1: 0.9943


Epoch 18 val: 100%|██████████| 384/384 [00:33<00:00, 11.39it/s, loss=23.8]


val Loss: 21.8475 F1: 0.0948
Epoch 19/20
----------


Epoch 19 train: 100%|██████████| 1893/1893 [04:44<00:00,  6.64it/s, loss=0.000981]


train Loss: 0.0039 F1: 0.9972


Epoch 19 val: 100%|██████████| 384/384 [01:24<00:00,  4.55it/s, loss=20]  


val Loss: 24.3406 F1: 0.1156
Epoch 20/20
----------


Epoch 20 train: 100%|██████████| 1893/1893 [04:41<00:00,  6.72it/s, loss=0.000159]


train Loss: 0.0076 F1: 0.9960


Epoch 20 val: 100%|██████████| 384/384 [00:30<00:00, 12.57it/s, loss=24.4]


val Loss: 23.3647 F1: 0.1092
convnext_tiny's Training
Epoch 1/20
----------


Epoch 1 train:  46%|████▌     | 869/1893 [04:38<05:17,  3.22it/s, loss=2.09] 

In [None]:
yolo_model = torch.hub.load('ultralytics/yolov5','yolov5s',pretrained = True)
train_model(model,'YOLO',criteration,optimizer,num_epochs=EPOCH)


Using cache found in C:\Users\Alvin/.cache\torch\hub\ultralytics_yolov5_master


[31m[1mrequirements:[0m Ultralytics requirement ['gitpython>=3.1.30'] not found, attempting AutoUpdate...
Collecting gitpython>=3.1.30
  Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython>=3.1.30)
  Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython>=3.1.30)
  Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)
Downloading gitpython-3.1.45-py3-none-any.whl (208 kB)
Downloading gitdb-4.0.12-py3-none-any.whl (62 kB)
Downloading smmap-5.0.2-py3-none-any.whl (24 kB)
Installing collected packages: smmap, gitdb, gitpython

   ---------------------------------------- 0/3 [smmap]
   ------------- -------------------------- 1/3 [gitdb]
   ------------- -------------------------- 1/3 [gitdb]
   ------------- -------------------------- 1/3 [gitdb]
   -------------------------- ------------- 2/3 [gitpython]
   -------------------------- ------------- 2/3 [gitpython]

YOLOv5  2025-12-18 Python-3.13.5 torch-2.10.0.dev20251211+cu130 CUDA:0 (NVIDIA GeForce RTX 5060 Laptop GPU, 8151MiB)

Downloading https://github.com/ultralytics/yolov5/releases/download/v7.0/yolov5s.pt to yolov5s.pt...
100%|██████████| 14.1M/14.1M [00:01<00:00, 10.4MB/s]

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Epoch 1/20
----------


Epoch 1 train:  19%|█▉        | 357/1891 [01:07<04:52,  5.25it/s, loss=0.00545] 


KeyboardInterrupt: 

# Evaluate

In [None]:
def evaluate_on_test_set(model:nn.Module,model_name):
    model.load_state_dict(torch.load(f'./best_models/{model_name}_best.pth'))
    model.eval()

    all_preds = []
    all_labels = []

    test_dataset = datasets.ImageFolder(f'{DATASET_DIR}/test',data_transforms['val'])
    test_loader = DataLoader(test_dataset,batch_size =BATCH_SIZE,shuffle=False)

    with torch.no_grad():
        for inputs,labels in test_loader:
            inputs = inputs.to(DEVICE)
            outputs = model(inputs)
            preds = torch.argmax(outputs,dim=1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
    print(classification_report(all_labels,all_preds, target_names=test_dataset.classes))

In [None]:
for model_name in model_to_test:
    evaluate_on_test_set(model,model_name)

                    precision    recall  f1-score   support

     Mild Dementia       1.00      1.00      1.00       751
 Moderate Dementia       1.00      1.00      1.00        74
      Non Demented       1.00      1.00      1.00     10084
Very mild Dementia       1.00      1.00      1.00      2060

          accuracy                           1.00     12969
         macro avg       1.00      1.00      1.00     12969
      weighted avg       1.00      1.00      1.00     12969



FileNotFoundError: [Errno 2] No such file or directory: './best_models/convnext_tiny_best.pth'

In [None]:
evaluate_on_test_set(yolo_model,'YOLO')