## Import Library


In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier


## Load Dataset

In [2]:
df = pd.read_csv("../data/lymphography.csv")

X = df.drop("class", axis=1)
y = df["class"]


## Define Models (Best Params)

In [3]:
models = {
    "KNN": KNeighborsClassifier(n_neighbors=7, weights="distance"),
    "SVM": SVC(C=10, kernel="rbf", gamma="scale"),
    "DT": DecisionTreeClassifier(max_depth=10, min_samples_split=5, min_samples_leaf=2),
    "MLP": MLPClassifier(hidden_layer_sizes=(100,), activation="relu", max_iter=500),
    "RF": RandomForestClassifier(
        n_estimators=200, max_depth=30,
        min_samples_split=2, min_samples_leaf=1,
        random_state=42
    )
}


## Metric Function (with GM)

In [4]:
def geometric_mean(recall_per_class):
    return np.prod(recall_per_class) ** (1 / len(recall_per_class))


## Cross-Validation Evaluation

In [6]:
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

results = []

for name, model in models.items():
    acc, prec, rec, f1, gm = [], [], [], [], []
    
    for train_idx, test_idx in skf.split(X, y):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        acc.append(accuracy_score(y_test, y_pred))
        prec.append(precision_score(y_test, y_pred, average="macro"))
        rec.append(recall_score(y_test, y_pred, average="macro"))
        f1.append(f1_score(y_test, y_pred, average="macro"))
        
        recall_classes = recall_score(y_test, y_pred, average=None)
        gm.append(geometric_mean(recall_classes))
    
    results.append([
        name,
        np.mean(acc),
        np.mean(prec),
        np.mean(rec),
        np.mean(f1),
        np.mean(gm)
    ])


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])

## Results Table

In [7]:
columns = ["Model", "Accuracy", "Precision", "Recall", "F1-score", "GM"]
results_df = pd.DataFrame(results, columns=columns)

results_df

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-score,GM
0,KNN,0.778571,0.636219,0.639187,0.628459,0.323613
1,SVM,0.831905,0.770403,0.765079,0.759476,0.587989
2,DT,0.724286,0.553534,0.528307,0.525304,0.164448
3,MLP,0.825238,0.763042,0.761607,0.755014,0.5808
4,RF,0.859048,0.77624,0.763889,0.757957,0.533742


## Save Metrics

In [8]:
results_df.to_csv("../results/metrics.csv", index=False)


## Conclusion
- Models were evaluated using multiple performance metrics
- Random Forest achieved the most balanced and robust performance
- Geometric Mean helped handle class imbalance
- Results were saved for further analysis and explainability
