In [None]:
!pip install ucimlrepo
!pip install imbalanced-ensemble
!pip install lightgbm
!pip install pytorch_tabnet



In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
cardiotocography = fetch_ucirepo(id=193)

# data (as pandas dataframes)
X = cardiotocography.data.features
y = cardiotocography.data.targets

# metadata
print(cardiotocography.metadata)

# variable information
print(cardiotocography.variables)


{'uci_id': 193, 'name': 'Cardiotocography', 'repository_url': 'https://archive.ics.uci.edu/dataset/193/cardiotocography', 'data_url': 'https://archive.ics.uci.edu/static/public/193/data.csv', 'abstract': 'The dataset consists of measurements of fetal heart rate (FHR) and uterine contraction (UC) features on cardiotocograms classified by expert obstetricians.', 'area': 'Health and Medicine', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 2126, 'num_features': 21, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['CLASS', 'NSP'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2000, 'last_updated': 'Fri Mar 15 2024', 'dataset_doi': '10.24432/C51S4N', 'creators': ['D. Campos', 'J. Bernardes'], 'intro_paper': None, 'additional_info': {'summary': '2126 fetal cardiotocograms (CTGs) were automatically processed and the respective diagnostic features measured. The CTGs were also classified

In [None]:
X.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,...,Width,Min,Max,Nmax,Nzeros,Mode,Mean,Median,Variance,Tendency
0,120,0.0,0.0,0.0,0.0,0.0,0.0,73,0.5,43,...,64,62,126,2,0,120,137,121,73,1
1,132,0.006,0.0,0.006,0.003,0.0,0.0,17,2.1,0,...,130,68,198,6,1,141,136,140,12,0
2,133,0.003,0.0,0.008,0.003,0.0,0.0,16,2.1,0,...,130,68,198,5,1,141,135,138,13,0
3,134,0.003,0.0,0.008,0.003,0.0,0.0,16,2.4,0,...,117,53,170,11,0,137,134,137,13,1
4,132,0.007,0.0,0.008,0.0,0.0,0.0,16,2.4,0,...,117,53,170,9,0,137,136,138,11,1


In [None]:
y.head()

Unnamed: 0,CLASS,NSP
0,9,2
1,6,1
2,6,1
3,6,1
4,2,1


## Data exploration

In [None]:
y = y["NSP"]

There is some data imbalance here.

In [None]:
print("Number of normal: ", (y == 1).sum())
print("Number of suspect: ", (y == 2).sum())
print("Number of pathological: ", (y == 3).sum())

Number of normal:  1655
Number of suspect:  295
Number of pathological:  176


## Feature selection

In [None]:
print("The features: ")
X.columns

The features: 


Index(['LB', 'AC', 'FM', 'UC', 'DL', 'DS', 'DP', 'ASTV', 'MSTV', 'ALTV',
       'MLTV', 'Width', 'Min', 'Max', 'Nmax', 'Nzeros', 'Mode', 'Mean',
       'Median', 'Variance', 'Tendency'],
      dtype='object')

Below we can see that the features 'LB', 'AC', 'FM', 'UC', 'DL', 'DS', 'DP', 'ASTV', 'MSTV', 'ALTV', 'MLTV' don't seem to show high correlation which is a good sign. The only highly correlated features are 'mode', 'mean' and 'median' which is expected. Highly correlated features can cause unstable training for regression models.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

matrix = X.corr()

plt.figure(figsize=(8,6))
sns.heatmap(matrix, annot=False, cmap="coolwarm", linewidths=0.5)
plt.title("Correlation Heatmap")
plt.show()

I am using L1 regularization for feature selection with multinomial logistic regression. I am no doctor but ChatGPT is and according to him the coefficients below makes sense.

However, I suspect that the "suspect" class may be difficult to predict for a classifier because it lies between the normal and pathological categories and shares features with both.... I think one way around this may be to train a binary classifier that predicts "normal" or "pathological", and classify as suspect if the predicted probability is close to the threshold probability.

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# features = ['LB', 'AC', 'FM', 'UC', 'DL', 'DS', 'DP', 'ASTV', 'MSTV', 'ALTV', 'MLTV']
# X_to_be_scaled = X[features]
X_to_be_scaled = X

# normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_to_be_scaled)

# logistic regression with L1 penalty
log_reg_l1 = LogisticRegression(
    penalty="l1",
    solver="saga",
    multi_class="multinomial",
    C=0.1, # smaller C means stronger regularization
    max_iter=1000,
    random_state=42
)
log_reg_l1.fit(X_scaled, y)

# feature importance
coef = log_reg_l1.coef_



In [None]:
print("coef for normal model: \n", coef[0])
print()
print("coef for normal suspect: \n", coef[1])
print()
print("coef for normal pathological: \n", coef[2])

coef for normal model: 
 [ 0.          2.19435103 -0.21961309  0.58176449  0.          0.
 -1.01593461 -1.07726337  0.02787448 -0.39027387  0.          0.
 -0.04341775 -0.19201244  0.          0.          0.          0.
  0.         -0.44504368  0.        ]

coef for normal suspect: 
 [ 0.          0.          0.          0.         -0.1840667   0.
  0.          0.         -0.09760563  0.          0.          0.
  0.02591755  0.          0.21369675  0.          0.          1.17065434
  0.          0.          0.        ]

coef for normal pathological: 
 [ 0.18140649  0.          0.03600714  0.          0.0988348   0.05994576
  0.21378486  0.69812008  0.          0.5235395   0.          0.
  0.          0.         -0.16950175  0.         -0.62663459  0.
 -0.08829534  0.45970532  0.        ]


## Experiments

These are some quick experiments with simple models so I have not taken any proper measures to handle the data imbalance or prevent overfitting on the training data with a validation set.

In [None]:
from sklearn.model_selection import train_test_split

# X = X[features]

# we should ensure that the train-test split split is stratified
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

y_train -= 1
y_test -= 1


### Linear models

As expected the f1-score for the "suspect" class is a little weaker for logistic regression.

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

log_reg_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("logreg", LogisticRegression(
        penalty="l1", solver="saga", multi_class="multinomial", max_iter=5000
    ))
])

log_reg_pipeline.fit(X_train, y_train)
y_pred_lr = log_reg_pipeline.predict(X_test)

print(classification_report(y_test, y_pred_lr))



              precision    recall  f1-score   support

           0       0.94      0.95      0.95       332
           1       0.62      0.68      0.65        59
           2       0.89      0.69      0.77        35

    accuracy                           0.89       426
   macro avg       0.82      0.77      0.79       426
weighted avg       0.89      0.89      0.89       426



### Tree models and ensembles

As expected with most classical ML problems with tabular data, tree based models perform a little better than logistic/ linear regression because they are non-linear.

In [None]:
tree = DecisionTreeClassifier(
    criterion="gini",
    max_depth=None,
    random_state=42
)

tree.fit(X_train, y_train)
y_pred_tree = tree.predict(X_test)

print(classification_report(y_test, y_pred_tree))

              precision    recall  f1-score   support

           0       0.94      0.95      0.95       332
           1       0.73      0.64      0.68        59
           2       0.81      0.83      0.82        35

    accuracy                           0.90       426
   macro avg       0.82      0.81      0.82       426
weighted avg       0.90      0.90      0.90       426



Since the data is not noisy, boosting methods likely outperform bagging methods.

In [None]:
from lightgbm import LGBMClassifier
from sklearn.metrics import classification_report

lgbm = LGBMClassifier(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=-1,
    random_state=43,
    class_weight="balanced"   # helps with imbalance
)

lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001072 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1216
[LightGBM] [Info] Number of data points in the train set: 1700, number of used features: 20
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.98      0.97       332
           1       0.85      0.80      0.82        59
           2       0.89      0.94      0.92        35

    accuracy                           0.95       426
   macro avg       0.91      0.91      0.90       426
weighted avg       0.95      0.95      0.95       426



In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Base estimator: XGBoost
xgb = XGBClassifier(
    n_estimators=200,        # boosting rounds per base model
    learning_rate=0.05,
    max_depth=20,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=43,
    use_label_encoder=False,
    eval_metric="logloss",
    scale_pos_weight=1
)

xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

# Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Parameters: { "scale_pos_weight", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)



Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.98      0.96       332
           1       0.86      0.71      0.78        59
           2       0.91      0.91      0.91        35

    accuracy                           0.94       426
   macro avg       0.91      0.87      0.89       426
weighted avg       0.93      0.94      0.93       426



SelfPacedEnsembleClassifier is specially designed to handle class imbalance.

In [None]:
from imbens.ensemble import SelfPacedEnsembleClassifier

SPE = SelfPacedEnsembleClassifier(random_state=41)
SPE.fit(X_train, y_train)

y_pred = SPE.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.97      0.97       332
           1       0.82      0.78      0.80        59
           2       0.89      0.94      0.92        35

    accuracy                           0.94       426
   macro avg       0.89      0.90      0.89       426
weighted avg       0.94      0.94      0.94       426



In [None]:
from imbens.ensemble import SMOTEBoostClassifier

SB = SMOTEBoostClassifier(random_state=41)
SB.fit(X_train, y_train)

y_pred = SB.predict(X_test)
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.82      0.89       332
           1       0.46      0.85      0.60        59
           2       0.88      0.83      0.85        35

    accuracy                           0.83       426
   macro avg       0.77      0.83      0.78       426
weighted avg       0.88      0.83      0.84       426



## Clustering

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

knn_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("knn", KNeighborsClassifier(
        n_neighbors=5,
        metric="minkowski",
        weights="uniform"
    ))
])

knn_pipeline.fit(X_train, y_train)
y_pred_knn = knn_pipeline.predict(X_test)

print("\nKNN Classification Report:\n", classification_report(y_test, y_pred_knn))



KNN Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.96      0.93       332
           1       0.61      0.47      0.53        59
           2       0.88      0.66      0.75        35

    accuracy                           0.87       426
   macro avg       0.80      0.70      0.74       426
weighted avg       0.86      0.87      0.86       426



### Neural networks

TabNet

In [None]:
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import classification_report

# Convert data
X_train_np = X_train.values if hasattr(X_train, "values") else X_train
y_train_np = y_train.values if hasattr(y_train, "values") else y_train
X_test_np = X_test.values if hasattr(X_test, "values") else X_test
y_test_np = y_test.values if hasattr(y_test, "values") else y_test

# Initialize TabNet on GPU
tabnet = TabNetClassifier(
    n_d=8, n_a=8, n_steps=3,
    gamma=1.3, n_independent=2, n_shared=2,
    momentum=0.3,
    seed=42,
    verbose=1,
    device_name="cuda"
)

# Train
tabnet.fit(
    X_train_np, y_train_np,
    eval_set=[(X_test_np, y_test_np)],
    eval_metric=['accuracy'],
    max_epochs=200,
    patience=20,
    batch_size=32,
    virtual_batch_size=32,
    num_workers=4
)

# Predict
y_pred = tabnet.predict(X_test_np)

# Report
print("\nTabNet Classification Report:\n", classification_report(y_test_np, y_pred))


epoch 0  | loss: 0.71267 | val_0_accuracy: 0.7277  |  0:00:01s
epoch 1  | loss: 0.47532 | val_0_accuracy: 0.73944 |  0:00:03s
epoch 2  | loss: 0.4108  | val_0_accuracy: 0.723   |  0:00:05s
epoch 3  | loss: 0.38797 | val_0_accuracy: 0.79343 |  0:00:06s
epoch 4  | loss: 0.33988 | val_0_accuracy: 0.78638 |  0:00:09s
epoch 5  | loss: 0.32473 | val_0_accuracy: 0.80516 |  0:00:11s
epoch 6  | loss: 0.30798 | val_0_accuracy: 0.79577 |  0:00:12s
epoch 7  | loss: 0.32647 | val_0_accuracy: 0.8216  |  0:00:14s
epoch 8  | loss: 0.30647 | val_0_accuracy: 0.80516 |  0:00:16s
epoch 9  | loss: 0.31835 | val_0_accuracy: 0.80047 |  0:00:17s
epoch 10 | loss: 0.2919  | val_0_accuracy: 0.79343 |  0:00:19s
epoch 11 | loss: 0.29214 | val_0_accuracy: 0.80751 |  0:00:21s
epoch 12 | loss: 0.32868 | val_0_accuracy: 0.81221 |  0:00:23s
epoch 13 | loss: 0.29238 | val_0_accuracy: 0.82394 |  0:00:25s
epoch 14 | loss: 0.30241 | val_0_accuracy: 0.84272 |  0:00:26s
epoch 15 | loss: 0.28692 | val_0_accuracy: 0.83099 |  0

In [None]:
import torch
import torch.nn.functional as F

# focal loss
class BinaryFocalLoss(torch.nn.Module):
    def __init__(self, alpha=1, gamma=2):
        super(BinaryFocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, inputs, targets):
        # Using the functional version of BCEWithLogitsLoss so that we can get the unreduced
        # losses (for the Hadamard product)
        bce_loss = F.binary_cross_entropy_with_logits(inputs, targets.unsqueeze(1), reduction="none")

        # Apply sigmoid to get probabilities
        probs = torch.sigmoid(inputs)
        # For targets=1, we use p, for targets=0, we use 1-p
        pt = probs * targets + (1 - probs) * (1 - targets)

        # Focusing term
        focal_weight = (1 - pt).pow(self.gamma)

        # Apply alpha weighting
        if self.alpha is not None:
            # Alpha for positive samples, 1-alpha for negative samples
            alpha_weight = self.alpha * targets + (1 - self.alpha) * (1 - targets)
            focal_weight = focal_weight * alpha_weight

        focal_loss = focal_weight * bce_loss

        return torch.mean(focal_loss)

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha  # tensor of shape [num_classes] or None
        self.gamma = gamma

    def forward(self, inputs, targets):
        # inputs: [batch_size, num_classes]
        # targets: [batch_size] (class indices)

        ce_loss = F.cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-ce_loss)  # equivalent to softmax prob of the true class

        focal_loss = ((1 - pt) ** self.gamma) * ce_loss

        if self.alpha is not None:
            at = self.alpha.gather(0, targets)
            focal_loss = at * focal_loss

        return focal_loss.mean()

Testing with 3 classes

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report

y_nn = y.map({1: 0, 2: 1, 3: 2})

X_temp, X_test, y_temp, y_test = train_test_split(
    X, y_nn,
    test_size=0.2,
    stratify=y_nn,
    random_state=40
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.2,
    stratify=y_temp,
    random_state=40
)

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.long)  # labels as integers

X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_t = torch.tensor(y_val.values, dtype=torch.long)

X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.long)

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# using class weights
class_counts = torch.bincount(y_train_t)
class_weights = 1.0 / class_counts
class_weights = class_weights / class_weights.sum()

# the model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            # nn.Linear(hidden_dim, hidden_dim),
            # nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
        )
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]   # number of features
hidden_dim = 12
output_dim = len(set(y_train)) # number of classes

model = MLP(input_dim, hidden_dim, output_dim)

# loss and optimizer
# criterion = nn.CrossEntropyLoss(
#     weight=class_weights
# )
criterion = FocalLoss(alpha=class_weights, gamma=2)
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# training loop
epochs = 60
min_val_loss = float('inf')
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

    # validation
    val_output = model(X_val_t) # if the val set were bigger i would use a dataloader for this
    val_loss = criterion(val_output, y_val_t)
    print(f"Validation Loss: {val_loss.item():.4f}")
    if val_loss.item() < min_val_loss:
        min_val_loss = val_loss.item()
        print("   Saving model...")
        # torch.save(model.state_dict(), 'best_model.pth')

# evaluation
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.numpy())
        all_labels.extend(y_batch.numpy())

print(classification_report(all_labels, all_preds))


Epoch [1/60], Loss: 0.0643
Validation Loss: 0.0622
   Saving model...
Epoch [2/60], Loss: 0.0571
Validation Loss: 0.0565
   Saving model...
Epoch [3/60], Loss: 0.0510
Validation Loss: 0.0508
   Saving model...
Epoch [4/60], Loss: 0.0448
Validation Loss: 0.0458
   Saving model...
Epoch [5/60], Loss: 0.0380
Validation Loss: 0.0412
   Saving model...
Epoch [6/60], Loss: 0.0339
Validation Loss: 0.0376
   Saving model...
Epoch [7/60], Loss: 0.0303
Validation Loss: 0.0350
   Saving model...
Epoch [8/60], Loss: 0.0282
Validation Loss: 0.0325
   Saving model...
Epoch [9/60], Loss: 0.0248
Validation Loss: 0.0305
   Saving model...
Epoch [10/60], Loss: 0.0239
Validation Loss: 0.0287
   Saving model...
Epoch [11/60], Loss: 0.0226
Validation Loss: 0.0278
   Saving model...
Epoch [12/60], Loss: 0.0212
Validation Loss: 0.0258
   Saving model...
Epoch [13/60], Loss: 0.0199
Validation Loss: 0.0251
   Saving model...
Epoch [14/60], Loss: 0.0188
Validation Loss: 0.0242
   Saving model...
Epoch [15/60], 

Simplifying to 2 classes, the performance looks much better.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import classification_report

# y_nn = y[(y == 1) | (y == 3)].map({1: 0, 3: 1})
# X_nn = X[(y == 1) | (y == 3)]

y_nn = y.map({1: 0, 2: 0.5, 3: 1})
X_nn = X

X_temp, X_test, y_temp, y_test = train_test_split(
    X_nn, y_nn,
    test_size=0.2,
    stratify=y_nn,
    random_state=40
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.2,
    stratify=y_temp,
    random_state=40
)

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.float32)  # labels as float because we have 0.5 now

X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_t = torch.tensor(y_val.values, dtype=torch.float32)

X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.float32)

train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# using class weights
# temp_y = torch.tensor(y_train[(y_train == 0) | (y_train == 1)].values, dtype=torch.long)
# class_counts = torch.bincount(temp_y)
# class_weights = 1.0 / class_counts
# class_weights = class_weights / class_weights.sum()

# the model
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            # nn.Linear(hidden_dim, hidden_dim),
            # nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
        )
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]   # number of features
hidden_dim = 32
output_dim = 1 # binary classifier
# output_dim = len(set(y_train)) # number of classes

model = MLP(input_dim, hidden_dim, output_dim)

# loss and optimizer
# criterion = nn.CrossEntropyLoss(
#     weight=class_weights
# )
# criterion = BinaryFocalLoss(
#     # alpha=class_weights,
#     gamma=2
# )
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# training loop
epochs = 30
min_val_loss = float('inf')
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch.unsqueeze(1))
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

    model.eval()
    # validation
    val_output = model(X_val_t) # if the val set were bigger i would use a dataloader for this
    val_loss = criterion(val_output, y_val_t.unsqueeze(1))
    print(f"Validation Loss: {val_loss.item():.4f}")
    if val_loss.item() < min_val_loss:
        min_val_loss = val_loss.item()
        print("   Saving model...")
        # torch.save(model.state_dict(), 'best_model.pth')


Epoch [1/30], Loss: 0.6517
Validation Loss: 0.5960
   Saving model...
Epoch [2/30], Loss: 0.5331
Validation Loss: 0.4798
   Saving model...
Epoch [3/30], Loss: 0.4240
Validation Loss: 0.3848
   Saving model...
Epoch [4/30], Loss: 0.3406
Validation Loss: 0.3223
   Saving model...
Epoch [5/30], Loss: 0.2882
Validation Loss: 0.2824
   Saving model...
Epoch [6/30], Loss: 0.2564
Validation Loss: 0.2553
   Saving model...
Epoch [7/30], Loss: 0.2347
Validation Loss: 0.2371
   Saving model...
Epoch [8/30], Loss: 0.2110
Validation Loss: 0.2230
   Saving model...
Epoch [9/30], Loss: 0.1981
Validation Loss: 0.2135
   Saving model...
Epoch [10/30], Loss: 0.1902
Validation Loss: 0.2079
   Saving model...
Epoch [11/30], Loss: 0.1839
Validation Loss: 0.2036
   Saving model...
Epoch [12/30], Loss: 0.1816
Validation Loss: 0.2009
   Saving model...
Epoch [13/30], Loss: 0.1832
Validation Loss: 0.1984
   Saving model...
Epoch [14/30], Loss: 0.1717
Validation Loss: 0.1976
   Saving model...
Epoch [15/30], 

In [None]:
model.eval()
test_output = model(X_test_t)
test_probs = torch.sigmoid(test_output).squeeze()

# Convert to predicted classes with thresholds
test_pred = torch.where(test_probs < 0.33, 0,
                      torch.where(test_probs > 0.67, 1, 0.5))

# Calculate accuracy
correct = (test_pred == y_test_t).sum().item()
accuracy = correct / len(y_test_t)



print(f"Accuracy: {accuracy:.4f}")
print(classification_report(2 * test_pred, 2 * y_test_t))

Accuracy: 0.8568
              precision    recall  f1-score   support

         0.0       0.95      0.92      0.93       344
         1.0       0.42      0.57      0.49        44
         2.0       0.71      0.66      0.68        38

    accuracy                           0.86       426
   macro avg       0.70      0.71      0.70       426
weighted avg       0.87      0.86      0.86       426



Trying to classify "suspect" below. However I think the model only learn the hard boundary between pathological and normal...

In [None]:
y_suspect = y[y == 2]
X_suspect = scaler.transform(X[y == 2])

X_suspect_t = torch.tensor(X_suspect, dtype=torch.float32)
y_suspect_t = torch.tensor(y_suspect.values, dtype=torch.long)

model.eval()
sussy_output = model(X_suspect_t)
sussy_probs = torch.softmax(sussy_output, dim=1)

sussy_probs[:10]


tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], grad_fn=<SliceBackward0>)

## K-fold cross validation
Since XGBoost and LightGBM seem to perform the best, I will do 5-fold cross validation on them.

In [None]:
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

rf = Pipeline([
      ("impute", SimpleImputer(strategy="median")),
      ("clf", RandomForestClassifier(
          n_estimators=300, class_weight="balanced_subsample",
          random_state=43, n_jobs=-1
      ))
])

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)

y_pred = cross_val_predict(rf, X, y, cv=kf)

print("\nClassification Report (5-fold CV on training set):\n")
print(classification_report(y, y_pred))


Classification Report (5-fold CV on training set):

              precision    recall  f1-score   support

           1       0.95      0.98      0.97      1655
           2       0.87      0.73      0.79       295
           3       0.95      0.89      0.92       176

    accuracy                           0.94      2126
   macro avg       0.92      0.87      0.89      2126
weighted avg       0.94      0.94      0.94      2126



In [None]:
from lightgbm import LGBMClassifier
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.metrics import classification_report

lgbm = LGBMClassifier(
    n_estimators=110,
    learning_rate=0.05,
    max_depth=-1,
    random_state=43,
    class_weight="balanced"
)

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)

y_pred = cross_val_predict(lgbm, X, y, cv=kf)

print("\nClassification Report (5-fold CV on training set):\n")
print(classification_report(y, y_pred))


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000353 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1204
[LightGBM] [Info] Number of data points in the train set: 1700, number of used features: 20
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000761 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1212
[LightGBM] [Info] Number of data points in the train set: 1701, number of used features: 20
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Auto-choosing row-

In [None]:
from xgboost import XGBClassifier

y_xgb = y.map({1: 0, 2: 1, 3: 2}).values

# Compute class weights (balanced: inverse frequency)
class_counts = np.bincount(y_xgb)
total = len(y_xgb)
class_weights = {i: total / (len(class_counts) * count) for i, count in enumerate(class_counts)}

xgb = XGBClassifier(
    n_estimators=120,        # boosting rounds per base model
    learning_rate=0.05,
    max_depth=20,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=43,
    use_label_encoder=False,
    eval_metric="logloss",
    scale_pos_weight=class_weights
)

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=69)
y_pred = cross_val_predict(xgb, X, y_xgb, cv=kf)

print("\nClassification Report (5-fold CV on training set):\n")
print(classification_report(y_xgb, y_pred))


Classification Report (5-fold CV on training set):

              precision    recall  f1-score   support

           0       0.96      0.99      0.97      1655
           1       0.90      0.80      0.85       295
           2       0.96      0.91      0.94       176

    accuracy                           0.95      2126
   macro avg       0.94      0.90      0.92      2126
weighted avg       0.95      0.95      0.95      2126

