In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import classification_report, confusion_matrix
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
import seaborn as sns

In [None]:
import matplotlib as mpl
import pylab
mpl.rcParams['lines.linewidth'] = 2
mpl.rcParams['lines.color'] = 'r'
mpl.rcParams['font.weight'] = 200
plt.style.use('seaborn-whitegrid')
plt.rc('figure',figsize=(8,8))
mpl.axes.Axes.annotate
mpl.rcParams['font.family'] = "serif"
pylab.rcParams['ytick.major.pad']='15'
pylab.rcParams['xtick.major.pad']='15'
mpl.rcParams['font.weight'] = "semibold"
mpl.rcParams['axes.labelsize'] = 20
mpl.rcParams['axes.linewidth'] = 4
mpl.rcParams['xtick.labelsize'] = 20
mpl.rcParams['ytick.labelsize'] = 20
mpl.rcParams['axes.edgecolor'] = 'black'
mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['legend.fontsize'] = 15

In [None]:
def plot_cm(y_true, y_pred, labels, class_names=None, normalize='true', png_path=None, show=False):
    cm = confusion_matrix(y_true, y_pred, labels=labels, normalize=normalize)
    fig, ax = plt.subplots(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues",
                xticklabels=class_names if class_names else labels,
                yticklabels=class_names if class_names else labels)
    ax.set_xlabel("Predicted Label")
    ax.set_ylabel("True Label")
    if png_path:
        plt.savefig(png_path, bbox_inches='tight')
    if show:
        plt.show()
    plt.close(fig)
    return cm

In [None]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    return data

In [None]:
def preprocess_data(data):

    #X = data.drop(columns=["Cognitive_Load_Label"])
    X = data.drop(columns=["label"])
    y = data["label"]
    #y = data["Cognitive_Load_Label"]
    
    
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)
    
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    return X, y, label_encoder

In [None]:
def split_data(X, y, test_size=0.20, random_state=42):

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_test, y_train, y_test

In [None]:
def analyze_labels(data, label_column):
    unique_classes = data[label_column].unique()  # Find unique classes
    class_counts = data[label_column].value_counts()  # Count samples per class
    
    print(f"Number of unique classes: {len(unique_classes)}")
    print("\nClasses and their sample counts:")
    print(class_counts)

In [None]:
data = load_data("EEGfeatures_with_labels.csv")
print("Data loaded successfully.")

In [None]:
#data = load_data("PPGfeatures_with_labels.csv")
#print("Data loaded successfully.")

In [None]:
data.head(5)

In [None]:
print(data.columns)

In [None]:
X, y, label_encoder = preprocess_data(data)
print("Data preprocessed successfully.")

In [None]:
y

In [None]:
X.shape

In [None]:
y.shape

In [None]:
#analyze_labels(data, 'Cognitive_Load_Label') 
analyze_labels(data, 'label') 

In [None]:
X.shape

In [None]:
X_train, X_test, y_train, y_test = split_data(X, y)
print("Data split successfully.")

In [None]:
print(X_train.shape, X_test.shape)

In [None]:
print(y_train.shape, y_test.shape)

In [None]:
print(type(X_train), type(y_train), type(X_test), type(y_test))

In [None]:
def train_models(X_train, X_test, y_train, y_test, class_map, save_dir="cm_outputs"):
    os.makedirs(save_dir, exist_ok=True)
    random_state = 0

    models = {
        "NB": GaussianNB(),
        "RF": RandomForestClassifier(n_estimators=400, n_jobs=-1, random_state=random_state),
        "MLP": Pipeline([
            ("scaler", StandardScaler()),
            ("clf", MLPClassifier(hidden_layer_sizes=(128, 64), activation="relu", max_iter=400, random_state=random_state))
        ]),
        "DT": DecisionTreeClassifier(random_state=random_state),
        "SVM": Pipeline([
            ("scaler", StandardScaler()),
            ("clf", SVC(kernel="linear", probability=True, random_state=random_state))
        ])
    }

    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        print(f"\n=== {name} ===")
        print(classification_report(y_test, y_pred, target_names=[class_map[i] for i in sorted(class_map)]))

        labels = sorted(class_map)
        class_names = [class_map[i] for i in labels]

        cm_path = os.path.join(save_dir, f"{name.lower()}_cm_norm.png")
        cm = plot_cm(y_test, y_pred, labels=labels, class_names=class_names, normalize='true', png_path=cm_path)

        results[name] = {
            "report": classification_report(y_test, y_pred, target_names=class_names, output_dict=True),
            "cm": cm,
            "cm_path": cm_path
        }

    return results

In [None]:
class_map = {0: "Low", 1: "Medium", 2: "High"}
results = train_models(X_train, X_test, y_train, y_test, class_map)

# Optional: extract and display F1 scores
f1_scores = {
    model: {
        str(k): v["f1-score"] for k, v in result["report"].items() if isinstance(v, dict)
    }
    for model, result in results.items()
}
f1_df = pd.DataFrame(f1_scores).T
print("\nMacro F1 Scores:")
print(f1_df)