# Inputs

In [1]:
import os
import pandas as pd
import yaml

In [2]:
DATASET_PARAMS = yaml.load(open("configs/dataset.yaml"), Loader=yaml.SafeLoader)['DATASET']
PREPROCESSING_PARAMS = yaml.load(open("configs/preprocessing.yaml"), Loader=yaml.SafeLoader)['PREPROCESSING']
EFFICIENTCAPSNET_PARAMS = yaml.load(open("configs/efficientcapsnet.yaml"), Loader=yaml.SafeLoader)['CAPSNET']
TRAINING_PARAMS = yaml.load(open("configs/training.yaml"), Loader=yaml.SafeLoader)

In [3]:
categories =  DATASET_PARAMS['CATEGORIES']

BASE_DIR = os.getcwd()
DATASET_DIR = os.path.join(BASE_DIR, DATASET_PARAMS['DATA_PATH'])

TRAIN_DIR = os.path.join(DATASET_DIR, "train")
VAL_DIR = os.path.join(DATASET_DIR, "val")
TEST_DIR = os.path.join(DATASET_DIR, "test")

In [4]:
import torch

In [5]:
torch.__version__

'2.6.0+cu126'

In [6]:
from src.utils import get_device
DEVICE = get_device()

# Dataset

In [7]:
from src.utils import generate_filenames_df

In [8]:
train_filenames_df = generate_filenames_df(TRAIN_DIR, categories)
val_filenames_df = generate_filenames_df(VAL_DIR, categories)

## Pre Processor

In [9]:
from src.preprocessing import preprocess

target_input_size = tuple(PREPROCESSING_PARAMS['INPUT_SIZE'])
train_transform = preprocess(
    target_input_size=target_input_size,
    rotation_range=PREPROCESSING_PARAMS['ROTATION_RANGE'],
    width_shift_range=PREPROCESSING_PARAMS['WIDTH_SHIFT_RANGE'],
    height_shift_range=PREPROCESSING_PARAMS['HEIGHT_SHIFT_RANGE'],
    brightness_range=PREPROCESSING_PARAMS['BRIGHTNESS_RANGE'],
    zoom_range=PREPROCESSING_PARAMS['ZOOM_RANGE'],
    horizontal_flip=PREPROCESSING_PARAMS['HORIZONTAL_FLIP'],
    vertical_flip=PREPROCESSING_PARAMS['VERTICAL_FLIP'],
    channel_shift_range=PREPROCESSING_PARAMS['CHANNEL_SHIFT_RANGE'],
    fill_mode=PREPROCESSING_PARAMS['FILL_MODE'],
    drop_out=PREPROCESSING_PARAMS['DROP_OUT'],
    shear_range=PREPROCESSING_PARAMS['SHEAR_RANGE']
    )

val_transform = preprocess(target_input_size=target_input_size) # only rescaling

In [10]:
from src.dataset import Dataset

dataset = Dataset(dataframe = train_filenames_df,
    transform=train_transform
)

## Train loader

In [11]:
from src.loader import Loader
train_loader = Loader(train_filenames_df, 
                     batch_size=DATASET_PARAMS['BATCH_SIZE'], 
                     num_workers=EFFICIENTCAPSNET_PARAMS['NUM_WORKERS'], 
                     transform=train_transform, 
                     shuffle=True)

val_loader = Loader(val_filenames_df, 
                    batch_size=DATASET_PARAMS['BATCH_SIZE'], 
                    num_workers=EFFICIENTCAPSNET_PARAMS['NUM_WORKERS'], 
                    transform=val_transform, 
                    shuffle=False)


# Model

### EfficientCapsNet

In [12]:
from src.model import EfficientCapsNet

model = EfficientCapsNet(input_size=(EFFICIENTCAPSNET_PARAMS['INPUT_SIZE']))



### DenseNet

In [13]:
from src.densenet import DenseNet121

model = DenseNet121(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'])




# Training

## Metrics

In [14]:
from src.densenet import DenseNet121

model = DenseNet121(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'])


# Optimizer
from torch.optim import Adam

optimizer = Adam(model.parameters(), lr=TRAINING_PARAMS['LEARNING_RATE'])

# use torcheval metrics
# metrics
from torcheval.metrics import (
    MulticlassAccuracy,
    MulticlassF1Score,
    MulticlassPrecision,
    MulticlassRecall
)

# Metrics
from src.metrics import (
    MulticlassMCC,
    MulticlassSpecificity
)

metrics = {
    "mcc": MulticlassMCC(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], device=DEVICE),
    "accuracy": MulticlassAccuracy(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], average= "macro", device=DEVICE),
    "f1_score": MulticlassF1Score(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], average= "macro", device=DEVICE),
    "precision": MulticlassPrecision(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], average=TRAINING_PARAMS['AVERAGE'], device = DEVICE),
    "recall": MulticlassRecall(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], average=TRAINING_PARAMS['AVERAGE'], device = DEVICE),
    "specificity": MulticlassSpecificity(num_classes=EFFICIENTCAPSNET_PARAMS['NUM_CLASSES'], average=TRAINING_PARAMS['AVERAGE'], device = DEVICE)
}

In [15]:
from src.train import train
from src.utils import get_device
from torch.nn import CrossEntropyLoss

history = train(model=model, 
    train_loader=train_loader, 
    val_loader=val_loader, 
    criterion=CrossEntropyLoss(), 
    optimizer=optimizer, 
    num_epochs=TRAINING_PARAMS['NUM_EPOCHS'], 
    device=get_device(),
    metrics=metrics,
    print_every=TRAINING_PARAMS['PRINT_EVERY'],
    save_patience=TRAINING_PARAMS['SAVE_PATIENCE'],
    save_path=TRAINING_PARAMS['SAVE_PATH'],
    save_model=TRAINING_PARAMS['SAVE_MODEL'],
    save_metrics=TRAINING_PARAMS['SAVE_METRICS']
    )

Training...

Epoch 1/10


  num_correct = mask.new_zeros(num_classes).scatter_(0, target, mask, reduce="add")


Epoch 1/10, Batch 100/1474, Train Loss: 0.0567
Epoch 1/10, Batch 200/1474, Train Loss: 0.5447
Epoch 1/10, Batch 300/1474, Train Loss: 0.9477
Epoch 1 Train - Loss: 0.2082, Metrics: {mcc: 0.7912, accuracy: 0.8357, f1_score: 0.8470, precision: 0.8847, recall: 0.8357, specificity: 0.9362}
Epoch 1 Val - Loss: 1495.8138, Metrics: {mcc: 0.0000, accuracy: 0.3333, f1_score: 0.1330, precision: 0.0831, recall: 0.3333, specificity: 0.4164}
Model saved to artifacts/effcapsnet\epoch_1.pth

Epoch 2/10
Epoch 2/10, Batch 100/1474, Train Loss: 1.7607
Epoch 2/10, Batch 200/1474, Train Loss: 1.6171
Epoch 2/10, Batch 300/1474, Train Loss: 1.4806
Epoch 2 Train - Loss: 0.3355, Metrics: {mcc: 0.4151, accuracy: 0.6020, f1_score: 0.6021, precision: 0.6022, recall: 0.6020, specificity: 0.8067}
Epoch 2 Val - Loss: 485.5997, Metrics: {mcc: 0.0000, accuracy: 0.3333, f1_score: 0.1330, precision: 0.0831, recall: 0.3333, specificity: 0.4164}
Model saved to artifacts/effcapsnet\epoch_2.pth

Epoch 3/10
Epoch 3/10, Batch

KeyboardInterrupt: 