In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

from tabpfn import TabPFNClassifier

# -----------------------------
# CONFIG
# -----------------------------
train_path = "Classification_Combined_Data/S1_S2_train_data.csv"
test_path  = "Classification_Combined_Data/S1_S2_test_data.csv"

RANDOM_STATE = 42

label_map = {
    'Not Drowsy': 'alert',
    'Slight': 'drowsy',
    'Moderate': 'drowsy',
    'Very': 'drowsy'
}

# -----------------------------
# 1) Load
# -----------------------------
df_train = pd.read_csv(train_path)
df_test  = pd.read_csv(test_path)

keep = ["Not Drowsy", "Slight", "Moderate", "Very"]
df_train = df_train[df_train["Label"].isin(keep)].copy()
df_test  = df_test[df_test["Label"].isin(keep)].copy()

df_train["MappedLabel"] = df_train["Label"].map(label_map)
df_test["MappedLabel"]  = df_test["Label"].map(label_map)

# -----------------------------
# 2) Encode labels
# -----------------------------
le = LabelEncoder()
y_train = le.fit_transform(df_train["MappedLabel"])
y_test  = le.transform(df_test["MappedLabel"])

# -----------------------------
# 3) Features (no scaling needed for TabPFN)
# -----------------------------
exclude_cols = ["Label", "MappedLabel", "ID", "Study", "window_start"]
feature_cols = [c for c in df_train.columns if c not in exclude_cols]

X_train = df_train[feature_cols].to_numpy()
X_test  = df_test[feature_cols].to_numpy()

# -----------------------------
# 4) Fit TabPFN on GPU
# -----------------------------
clf = TabPFNClassifier(
    device="cuda",              # <-- forces GPU
    random_state=RANDOM_STATE,
    # If you hit VRAM issues, try lower precision:
    # inference_precision="fp16",
)

clf.fit(X_train, y_train)

# -----------------------------
# 5) Evaluate on TEST
# -----------------------------
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)  # shape (n, n_classes)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Macro F1:", f1_score(y_test, y_pred, average="macro"))
print("Weighted F1:", f1_score(y_test, y_pred, average="weighted"))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.7380835380835381
Macro F1: 0.49224288902235735
Weighted F1: 0.6597825968924417

Confusion Matrix:
[[  43  492]
 [  41 1459]]

Classification Report:
              precision    recall  f1-score   support

       alert       0.51      0.08      0.14       535
      drowsy       0.75      0.97      0.85      1500

    accuracy                           0.74      2035
   macro avg       0.63      0.53      0.49      2035
weighted avg       0.69      0.74      0.66      2035



In [1]:
import torch
print(torch.__version__)
print("cuda:", torch.cuda.is_available())
print("gpu:", torch.cuda.get_device_name(0))


2.5.1+cu121
cuda: True
gpu: NVIDIA GeForce RTX 4090


In [13]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

from tabicl import TabICLClassifier

# -----------------------------
# CONFIG
# -----------------------------
train_path = "Classification_Combined_Data/S1_S2_train_data.csv"
test_path  = "Classification_Combined_Data/S1_S2_test_data.csv"

label_map = {
    "Not Drowsy": "alert",
    "Slight": "drowsy",
    "Moderate": "drowsy",
    "Very": "drowsy",
}
keep = ["Not Drowsy", "Slight", "Moderate", "Very"]

# -----------------------------
# 1) Load + map labels
# -----------------------------
df_train = pd.read_csv(train_path)
df_test  = pd.read_csv(test_path)

df_train = df_train[df_train["Label"].isin(keep)].copy()
df_test  = df_test[df_test["Label"].isin(keep)].copy()

df_train["MappedLabel"] = df_train["Label"].map(label_map)
df_test["MappedLabel"]  = df_test["Label"].map(label_map)

# -----------------------------
# 2) Encode labels (for metrics)
# -----------------------------
le = LabelEncoder()
y_train = le.fit_transform(df_train["MappedLabel"])
y_test  = le.transform(df_test["MappedLabel"])

# -----------------------------
# 3) Features
# -----------------------------
exclude_cols = [c for c in ["Label", "MappedLabel", "ID", "Study", "window_start"] if c in df_train.columns]
feature_cols = [c for c in df_train.columns if c not in exclude_cols]

X_train = df_train[feature_cols].to_numpy(dtype=np.float32)
X_test  = df_test[feature_cols].to_numpy(dtype=np.float32)

# -----------------------------
# 4) Fit TabICL on GPU
# -----------------------------
clf = TabICLClassifier(
    device="cuda",        # forces GPU (or use None for auto)
    use_amp="auto",       # mixed precision if supported
    n_estimators=8,       # default is 8; more = slower, often slightly better
    random_state=42,
)

# Optional speedup if you will call predict multiple times with same train set:
# clf.fit(X_train, y_train, kv_cache=True)
clf.fit(X_train, y_train)

# -----------------------------
# 5) Predict + evaluate
# -----------------------------
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Macro F1:", f1_score(y_test, y_pred, average="macro"))
print("Weighted F1:", f1_score(y_test, y_pred, average="weighted"))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))


INFO: You are downloading 'tabicl-classifier-v2-20260212.ckpt', the latest best-performing version, used in our TabICLv2 paper.

Checkpoint 'tabicl-classifier-v2-20260212.ckpt' not cached.
 Downloading from Hugging Face Hub (jingang/TabICL).



To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Accuracy: 0.7454545454545455
Macro F1: 0.5234286840658546
Weighted F1: 0.6776799124604455

Confusion Matrix:
[[  64  471]
 [  47 1453]]

Classification Report:
              precision    recall  f1-score   support

       alert       0.58      0.12      0.20       535
      drowsy       0.76      0.97      0.85      1500

    accuracy                           0.75      2035
   macro avg       0.67      0.54      0.52      2035
weighted avg       0.71      0.75      0.68      2035



In [1]:
import sys
sys.path.append("external/realmlp")

from mlp import Standalone_RealMLP_TD_S_Classifier

In [2]:
import os
import random
import numpy as np
import pandas as pd
import torch

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

from mlp import Standalone_RealMLP_TD_S_Classifier

# -----------------------------
# Reproducibility (optional)
# -----------------------------
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# -----------------------------
# Load + preprocess
# -----------------------------
train_path = "Classification_Combined_Data/S1_S2_train_data.csv"
test_path  = "Classification_Combined_Data/S1_S2_test_data.csv"

label_map = {
    "Not Drowsy": "alert",
    "Slight": "drowsy",
    "Moderate": "drowsy",
    "Very": "drowsy",
}
keep = ["Not Drowsy", "Slight", "Moderate", "Very"]

df_train = pd.read_csv(train_path)
df_test  = pd.read_csv(test_path)

df_train = df_train[df_train["Label"].isin(keep)].copy()
df_test  = df_test[df_test["Label"].isin(keep)].copy()

df_train["MappedLabel"] = df_train["Label"].map(label_map)
df_test["MappedLabel"]  = df_test["Label"].map(label_map)

le = LabelEncoder()
y_train = le.fit_transform(df_train["MappedLabel"])
y_test  = le.transform(df_test["MappedLabel"])

exclude_cols = ["Label", "MappedLabel", "ID", "Study", "window_start"]
feature_cols = [c for c in df_train.columns if c not in exclude_cols]

X_train = df_train[feature_cols].to_numpy(dtype=np.float32)
X_test  = df_test[feature_cols].to_numpy(dtype=np.float32)

# -----------------------------
# Train RealMLP (GPU)
# -----------------------------
clf = Standalone_RealMLP_TD_S_Classifier(device="cuda")  # <- no random_state arg
clf.fit(X_train, y_train)

# -----------------------------
# Evaluate
# -----------------------------
y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Macro F1:", f1_score(y_test, y_pred, average="macro"))
print("Weighted F1:", f1_score(y_test, y_pred, average="weighted"))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.7267813267813268
Macro F1: 0.5126008863525147
Weighted F1: 0.6658137292093002

Confusion Matrix:
 [[  65  470]
 [  86 1414]]

Classification Report:
               precision    recall  f1-score   support

       alert       0.43      0.12      0.19       535
      drowsy       0.75      0.94      0.84      1500

    accuracy                           0.73      2035
   macro avg       0.59      0.53      0.51      2035
weighted avg       0.67      0.73      0.67      2035



