## Importing libraries

In [1]:
import random

import numpy as np
import pandas as pd
import torch
from scipy.special import softmax
from sklearn.metrics import (
    accuracy_score,
    cohen_kappa_score,
    confusion_matrix,
    mean_absolute_error,
)
from skorch import NeuralNetClassifier
from skorch.callbacks import EarlyStopping, LRScheduler
from skorch.dataset import ValidSplit
from torch import cuda, nn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchvision import models
from torchvision.transforms import Compose, ToTensor

from dlordinal.datasets import FGNet
from dlordinal.losses import (
    BetaCrossEntropyLoss,
    BinomialCrossEntropyLoss,
    EMDLoss,
    GeometricCrossEntropyLoss,
    TriangularCrossEntropyLoss,
    WKLoss,
)
from dlordinal.metrics import accuracy_off1, amae, mmae, ranked_probability_score

## Dataset
Download `FGNet` dataset.

In [2]:
fgnet_train = FGNet(
    root="./datasets",
    download=True,
    train=True,
    transform=Compose([ToTensor()]),
)

fgnet_test = FGNet(
    root="./datasets",
    download=True,
    train=False,
    transform=Compose([ToTensor()]),
)

num_classes = len(fgnet_train.classes)
classes = fgnet_train.classes
targets = fgnet_train.targets

# Get CUDA device
device = "cuda" if cuda.is_available() else "cpu"
print(f"Using {device} device")

Files already downloaded and verified
Files already processed and verified
Files already split and verified
Files already downloaded and verified
Files already processed and verified
Files already split and verified
Using cpu device


## Metrics 
Metrics to evaluate different ordinal losses.

In [3]:
def calculate_metrics(y_true, y_pred):

    if np.allclose(np.sum(y_pred, axis=1), 1):
        y_pred_proba = y_pred
    else:
        y_pred_proba = softmax(y_pred, axis=1)

    y_pred_max = np.argmax(y_pred, axis=1)

    # Metrics
    amae_metric = amae(y_true, y_pred)
    mmae_metric = mmae(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred_max)
    acc = accuracy_score(y_true, y_pred_max)
    acc_1off = accuracy_off1(y_true, y_pred)
    qwk = cohen_kappa_score(y_true, y_pred_max, weights="quadratic")
    rps = ranked_probability_score(y_true, y_pred_proba)

    metrics = {
        "ACC": acc,
        "1OFF": acc_1off,
        "MAE": mae,
        "QWK": qwk,
        "AMAE": amae_metric,
        "MMAE": mmae_metric,
        "RPS": rps,
    }

    for key, value in metrics.items():
        print(f"{key}: {value}")

    print(confusion_matrix(y_true, y_pred_max))

    return metrics

## Experiment
We want to do a brief comparison of several (ordinal) losses using `PyTorch` and `Skorch`
with ResNet18, a pre-trained convolutional neural network, as the model architecture. 
Concretely, we compare Cross Entropy (CE) Loss with several ordinal approaches from the dlordinal library:

- Cross Entropy (CE) Loss
- Squared Earth Mover's Distance (EMD) Loss [1]
- Weighted Kappa Loss [2]
- Binomial Cross Entropy Loss  [3]
- Triangular Cross Entropy Loss [4]
- Beta Cross Entropy Loss [5]
- Geometric Cross Entropy Loss [6]

[1] Hou, L., Yu, C. P., & Samaras, D. (2016). Squared earth mover's distance-based loss for training deep neural networks. arXiv preprint arXiv:1611.05916. 

[2] de La Torre, J., Puig, D., & Valls, A. (2018). Weighted kappa loss function for multi-class classification of ordinal data in deep learning. Pattern Recognition Letters, 105, 144-154.

[3] Liu, X., Fan, F., Kong, L., Diao, Z., Xie, W., Lu, J., & You, J. (2020). Unimodal regularized neuron stick-breaking for ordinal classification. Neurocomputing, 388, 34-44.

[4] Vargas, V. M., Gutiérrez, P. A., Barbero-Gómez, J., & Hervás-Martínez, C. (2023). Soft labelling based on triangular distributions for ordinal classification. Information Fusion, 93, 258-267.

[5] Vargas, V. M., Gutiérrez, P. A., & Hervás-Martínez, C. (2022). Unimodal regularisation based on beta distribution for deep ordinal regression. Pattern Recognition, 122, 108310.

[6] Haas, S., & Hüllermeier, E. (2023, September). Rectifying bias in ordinal observational data using unimodal label smoothing. In Joint European Conference on Machine Learning and Knowledge Discovery in Databases (pp. 3-18). Cham: Springer Nature Switzerland.



In [4]:
# Loss functions
losses = [
    CrossEntropyLoss().to(device),
    EMDLoss(num_classes=num_classes).to(device),
    WKLoss(num_classes=num_classes).to(device),
    BinomialCrossEntropyLoss(num_classes=num_classes).to(device),
    TriangularCrossEntropyLoss(num_classes=num_classes).to(device),
    BetaCrossEntropyLoss(num_classes=num_classes).to(device),
    GeometricCrossEntropyLoss(
        num_classes=num_classes, alphas=[0.15, 0.35, 0.35, 0.35, 0.35, 0.15]
    ).to(device),
]

# Evaluate each loss K times with different seeds to obtain a more robust result
K = 5

result = pd.DataFrame()


def set_seed(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    # Add deterministic settings
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


for loss_fn in losses:

    for k in range(K):
        # Make results reproducible
        seed = k
        set_seed(seed)  # Use K different seeds

        # Initialize ResNet18 model
        model = models.resnet18(weights="IMAGENET1K_V1")
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        model.to(device)

        # Skorch estimator
        estimator = NeuralNetClassifier(
            module=model,
            criterion=loss_fn,
            optimizer=Adam,
            lr=0.001,
            max_epochs=30,
            # verbose=0,
            train_split=ValidSplit(
                0.1, random_state=seed
            ),  # Use 10% of the data for validation
            callbacks=[
                EarlyStopping(patience=5, monitor="valid_loss"),
                LRScheduler(policy=ReduceLROnPlateau, patience=3, factor=0.5),
            ],
            device=device,
            batch_size=200,
        )

        print("#" + str(k) + " " + type(loss_fn).__name__)

        estimator.fit(
            X=fgnet_train, y=torch.tensor(fgnet_train.targets, dtype=torch.long)
        )

        test_probs = estimator.predict_proba(fgnet_test)

        metrics = calculate_metrics(np.array(fgnet_test.targets), test_probs)
        print("\n")

        df = pd.DataFrame([metrics])
        df["iteration"] = k
        df["loss"] = type(loss_fn).__name__

        result = pd.concat([result, df], ignore_index=True)

#0 CrossEntropyLoss
  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m1.6403[0m       [32m0.2469[0m        [35m1.9822[0m  10.7012
      2        [36m0.6649[0m       [32m0.3086[0m        3.2406  11.3902
      3        [36m0.2415[0m       [32m0.4815[0m        [35m1.9782[0m  11.7711
      4        [36m0.0831[0m       0.4074        2.1255  28.2944
      5        [36m0.0255[0m       0.4321        3.1614  22.1527
      6        [36m0.0072[0m       0.4444        3.6414  30.5124
      7        [36m0.0045[0m       0.4815        3.5090  31.9690
Stopping since valid_loss has not improved in the last 5 epochs.
ACC: 0.5323383084577115
1OFF: 0.8756218905472637
MAE: 0.6019900497512438
QWK: 0.7598854786073238
AMAE: 0.6873376623376624
MMAE: 1.2857142857142858
RPS: 0.544454429576801
[[15  6  0  1  0  0]
 [ 5 41  0 14  0  0]
 [ 0 10  2 20  1  0]
 [ 0  1  3 37  1  0]
 [ 0  1  2 16 11  0]
 [ 0  0  0

## Result
Displays the mean results for each loss over the K iterations.

In [5]:
result.set_index(["loss", "iteration"], inplace=True)
result.groupby("loss").agg("mean").style.highlight_max(
    axis=0, subset=["ACC", "1OFF", "QWK"], color="green"
).highlight_min(axis=0, subset=["MAE", "AMAE", "MMAE", "RPS"], color="green")

Unnamed: 0_level_0,ACC,1OFF,MAE,QWK,AMAE,MMAE,RPS
loss,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
BetaCrossEntropyLoss,0.578109,0.959204,0.464677,0.852328,0.533016,0.885714,0.337343
BinomialCrossEntropyLoss,0.565174,0.951244,0.488557,0.83659,0.540188,0.928571,0.400021
CrossEntropyLoss,0.507463,0.864677,0.671642,0.723803,0.745231,1.350649,0.543212
EMDLoss,0.553234,0.922388,0.544279,0.796439,0.617395,1.030476,0.438839
GeometricCrossEntropyLoss,0.585075,0.945274,0.476617,0.842377,0.523716,0.817316,0.354377
TriangularCrossEntropyLoss,0.58408,0.944279,0.482587,0.835471,0.535823,0.849351,0.341951
WKLoss,0.447761,0.861692,0.775124,0.58514,0.907316,1.92987,0.652706
