In [2]:
import torch
print(torch.cuda.is_available())  # Should be True
print(torch.cuda.device_count())  # Should be > 0

True
1


In [3]:
import pandas as pd
import numpy as np
import logging
import os
import shutil
import time
import multiprocessing
import json
from autogluon.tabular import TabularPredictor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split

In [None]:
import pandas as pd
import shutil
import os
import time
import torch
from sklearn.model_selection import StratifiedKFold
from autogluon.tabular import TabularPredictor
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from concurrent.futures import ThreadPoolExecutor

# Load dataset
FILENAME = "XAPI"
DATA_PATH = "xapi.csv"
TARGET = "Class"
KFOLD = 10  # Number of folds for cross-validation
MAX_THREADS = 1  # Number of threads to use for parallel experiments

# Load dataset
df = pd.read_csv(DATA_PATH)
df.columns = df.columns.str.replace(' ', '')  # Remove spaces in column names

# Separate features and target variable
X = df.drop(columns=[TARGET])
y = df[TARGET]

# Check if CUDA (GPU) is available
gpu_available = 1 if torch.cuda.is_available() else 0

# Create Stratified KFold (ensuring each test set is unique)
skf = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=42)

# Store results
results = []

# Define function to train and evaluate each fold
def run_experiment(fold, train_index, test_index):
    start_time = time.time()

    # Split dataset into training and test sets
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    df_train = X_train.copy()
    df_train[TARGET] = y_train
    df_test = X_test.copy()
    df_test[TARGET] = y_test

    # Define output directory for this fold
    path = f"GPU_{gpu_available}_{FILENAME}_DL_VALIDATION_FOLD{fold}"
    if os.path.exists(path):
        shutil.rmtree(path)  # Remove previous results

    # Create AutoGluon predictor
    predictor = TabularPredictor(
        label=TARGET,
        path=path,
        problem_type="multiclass",
    )

    # Fit model
    predictor.fit(
        df_train,
        num_bag_folds=10,  
        verbosity=0,
        num_gpus=1 if gpu_available else 0,
        excluded_model_types=['RF', 'KNN', 'GBM', 'XGB', 'CAT', 'XT', 'LR'],
        presets="best_quality"
    )

    # Evaluate model
    y_pred = predictor.predict(df_test.drop(columns=[TARGET]))
    y_prob = predictor.predict_proba(df_test.drop(columns=[TARGET]))

    test_acc = accuracy_score(y_test, y_pred)
    test_prec = precision_score(y_test, y_pred, average='weighted')
    test_rec = recall_score(y_test, y_pred, average='weighted')
    test_f1 = f1_score(y_test, y_pred, average='weighted')
    test_roc_auc = roc_auc_score(pd.get_dummies(y_test), y_prob, average='weighted', multi_class='ovr')

    # Save results
    result = {
        'fold': fold,
        'test_accuracy': test_acc,
        'test_precision': test_prec,
        'test_recall': test_rec,
        'test_f1': test_f1,
        'test_roc_auc': test_roc_auc,
    }

    # Save results to CSV
    result_df = pd.DataFrame([result])
    result_df.to_csv(f"test_results_fold{fold}.csv", index=False)

    print(f"\n Fold {fold} completed in {result['runtime_minutes']:.2f} minutes!")

# Run experiments in parallel
with ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y), start=1):
        executor.submit(run_experiment, fold, train_idx, test_idx)

# Combine results from all folds
all_results = []
for fold in range(1, KFOLD + 1):
    temp_df = pd.read_csv(f"test_results_fold{fold}.csv")
    all_results.append(temp_df)

final_results = pd.concat(all_results, ignore_index=True)
final_results.to_csv("final_test_results.csv", index=False)

print("\n All experiments completed! Results saved in `final_test_results.csv`")


In [None]:
# ==========================
# WILCOXON SIGNED-RANK TEST
# ==========================

from scipy.stats import wilcoxon
import pandas as pd
import numpy as np

df_metrics_by_fold = {
    'accuracy': 0.9548,
    'precision': 0.9597,
    'recall': 0.9492,
    'f1': 0.9522,
    'roc_auc': 0.9523
}

# Reference values from Yu et al.
yu_et_al_metrics = {
    'accuracy': 0.7646,
    'precision': 0.6165,
    'recall': 0.6277,
    'f1': 0.6216,
    'roc_auc': None  # Not reported
}

# Dictionary to store Wilcoxon test results
wilcoxon_results = []

# Perform Wilcoxon Signed-Rank Test for each metric
for metric in ['accuracy', 'precision', 'recall', 'f1']:
    if yu_et_al_metrics[metric] is not None:
        # Compute paired differences
        differences = df_metrics_by_fold[metric] - yu_et_al_metrics[metric]
        # Compute Wilcoxon test
        stat, p_value = wilcoxon(differences, method='approx')
                
        # Compute descriptive statistics
        median_diff = np.median(differences)
        mean_diff = np.mean(differences)
        std_diff = np.std(differences)

        # Store results
        wilcoxon_results.append({
            'metric': metric,
            'p-value': p_value,
            'median_difference': median_diff,
            'mean_difference': mean_diff,
            'std_difference': std_diff
        })

# Convert results to DataFrame
df_wilcoxon = pd.DataFrame(wilcoxon_results)
df_wilcoxon