In [838]:
pip install tabpfn

12911.45s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Collecting tabpfn
  Downloading tabpfn-2.0.7-py3-none-any.whl.metadata (25 kB)
Collecting torch<3,>=2.1 (from tabpfn)
  Downloading torch-2.6.0-cp310-none-macosx_11_0_arm64.whl.metadata (28 kB)
Collecting einops<0.9,>=0.2.0 (from tabpfn)
  Downloading einops-0.8.1-py3-none-any.whl.metadata (13 kB)
Collecting huggingface-hub<1,>=0.0.1 (from tabpfn)
  Downloading huggingface_hub-0.29.3-py3-none-any.whl.metadata (13 kB)
Collecting fsspec>=2023.5.0 (from huggingface-hub<1,>=0.0.1->tabpfn)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting networkx (from torch<3,>=2.1->tabpfn)
  Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)
Collecting sympy==1.13.1 (from torch<3,>=2.1->tabpfn)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch<3,>=2.1->tabpfn)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading tabpfn-2.0.7-py3-none-any.whl (127 kB)
Downloading einops-0.8.1-py3

In [915]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc

# Set the global font to be DejaVu Sans, size 10 (or any other sans-serif font of your choice!)
rc('font',**{'family':'sans-serif','sans-serif':['DejaVu Sans'],'size':20})

# Set the font used for MathJax - more on this later
rc('mathtext',**{'default':'regular'})


from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score,ConfusionMatrixDisplay, balanced_accuracy_score, precision_score, recall_score, confusion_matrix, roc_curve, RocCurveDisplay,roc_auc_score
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import cross_val_score, LeaveOneOut, GridSearchCV, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from numpy import sort
from sklearn.ensemble import IsolationForest

from tabpfn import TabPFNClassifier

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE,BorderlineSMOTE,ADASYN
from imblearn.under_sampling import RandomUnderSampler
from imblearn.ensemble import BalancedRandomForestClassifier, EasyEnsembleClassifier


In [1508]:


# load data
DataControls = pd.read_csv('DataFromControls.csv')
DataParkinsons = pd.read_csv('DataFromParkinsons.csv')
DataRBD = pd.read_csv('DataFromRBD.csv')

# Function to identify and remove outliers using IsolationForest


In [1509]:
# define data groups

selectedCols = ['MeanAmplitude','MeanSpeed', 'amplitudeDecay','velocityDecay']

X1 = DataControls[selectedCols].values.astype(float)
y1 = [0]*len(X1)

X2 = DataRBD[selectedCols].values.astype(float)
y2 = [1]*len(X2)


X3 = DataParkinsons[selectedCols].values.astype(float)
y3 = [1]*len(X3)


def remove_outliers(data):
    iso = IsolationForest(contamination=0.1)
    yhat = iso.fit_predict(data)
    mask = yhat != -1
    return data[mask, :]

# Remove outliers from each group
X1 = remove_outliers(X1)
X2 = remove_outliers(X2)
X3 = remove_outliers(X3)

#for binary classification of RBD (1) vs healthy controls (0)
XCR = np.concatenate([X1, X2], axis=0)
yCR = np.array([0]*len(X1)+[1]*len(X2))

#for binary classification of PD (1) vs healthy controls (0)
XCP = np.concatenate([X1,X3], axis=0)
yCP = np.array([0]*len(X1)+[1]*len(X3))

#for binary classification of RBD (0) vs PD (1) 
XRP = np.concatenate([X2, X3], axis=0)
yRP = np.array([0]*len(X2)+[1]*len(X3))

X = np.concatenate([X1, X2, X3], axis=0)
y = np.array(y1+y2+y3)


unique, counts = np.unique(yRP, return_counts=True)
print('original',dict(zip(unique, counts)))

original {np.int64(0): np.int64(19), np.int64(1): np.int64(16)}


We train different binary models to classify the groups. We use the following models:
- Logistic Regression
- Support Vector Machines
- Random Forest 
- Gradient Boosting


The results of this analysis might vary from trial to trial as the SMOTE algorithm adds randomness to the data. Repeat the analysis many times to obtain an average. 

In [1506]:
# Binary classification of PD (1) vs healthy controls (0)

#Apply SMOTE and RandomUnderSampler
over = SMOTE(random_state=1)
under = RandomUnderSampler(random_state=42)
steps = [('o', over), ('u', under)]
pipe = Pipeline(steps=steps)

# Define subjects
subjects = DataControls['ID'].tolist() + DataParkinsons['ID'].tolist()

# Create a dictionary to map each sample to its subject
sample_to_subject = {i: subjects[i] for i in range(len(subjects))}

# Define a function to get unique subjects
def get_unique_subjects(y):
    return list(set([sample_to_subject[i] for i in range(len(y))]))

# Define a function to get indices for a given subject
def get_subject_indices(subject, y):
    return [i for i in range(len(y)) if sample_to_subject[i] == subject]

# Get unique subjects
unique_subjects = get_unique_subjects(yCP)


#different models used 

# model, scale = LogisticRegression(class_weight='balanced'), True  ##need to scale data 
# model, scale = svm.SVC(class_weight='balanced'), True  ##need to scale data 
model, scale = RandomForestClassifier(n_estimators=5,class_weight='balanced'  ), False ##no need to scale data 
# model, scale = EasyEnsembleClassifier(n_estimators=20), False ##no need to scale data
# model, scale =   XGBClassifier(objective= 'binary:logistic',seed=42,n_jobs=-1,nthread=1,early_stopping_rounds=None,eval_metric='logloss',use_label_encoder=False,verbosity=0,class_weight='balanced'), False

# model, scale =  XGBClassifier(
#     n_estimators=50,
#     objective='binary:logistic',
#     scale_pos_weight=(len(yCP) - np.sum(yCP)) / np.sum(yCP),
#     max_delta_step=1,
# ), False


# Initialize list to store predictions
predictions = []
true_labels = []
accuracies = []
# Perform Leave-One-Subject-Out CV

selected_cols = [0,1,2,3]
selected_cols = [1,3]
for subject in unique_subjects:

    test_indices = get_subject_indices(subject, yCP)
    train_indices = [i for i in range(len(yCP)) if i not in test_indices]

    #test data from one subject
    XCP = np.array(XCP)
    X_test, y_test = XCP[:,selected_cols][test_indices,:], yCP[test_indices]
    # Apply SMOTE to the training data
    #SMOTE to augment the data
    
    # X_train, y_train = SMOTE().fit_resample(XCP[:,selected_cols][train_indices,:], yCP[train_indices])
    X_train, y_train = pipe.fit_resample(XCP[:,selected_cols][train_indices,:], yCP[train_indices])
    # X_train, y_train = XCP[train_indices], yCP[train_indices]
    ## Scale the data
    if scale:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    
        # Fit the model
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    predictions.extend(y_pred)
    true_labels.extend(y_test)
     # Calculate accuracy

    accuracies.append(accuracy_score(y_test, y_pred))

    # print(len(y_test), subject, accuracy_score(y_test, y_pred))


# Calculate the accuracy
accuracy = balanced_accuracy_score(true_labels, predictions)
f1score = f1_score(true_labels, predictions, average = 'weighted')
recall = recall_score(true_labels, predictions, average = 'weighted')
precision = precision_score(true_labels, predictions, average='weighted')
tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()
sensitivity = tp /(tp+fn)
specificity = tn / (tn+fp)
roc_auc = roc_auc_score(true_labels, predictions)
print(f"LOO Subject CV Accuracy: {np.mean(accuracies):.2f}")
print(f"LOO Subject CV Accuracy: {accuracy:.2f}")
print(f"LOO Subject CV F1-score: {f1score:.2f}")
print(f"LOO Subject CV recall: {recall:.2f}")
print(f"LOO Subject CV precision: {precision:.2f}")
print(f"LOO Subject CV sensitivity: {sensitivity:.2f}")
print(f"LOO Subject CV specificity: {specificity :.2f}")
print(f"LOO Subject CV Area under the ROC: {roc_auc :.2f}")
print(confusion_matrix(true_labels, predictions))

# roc_display = RocCurveDisplay.from_predictions(true_labels, predictions)
# roc_display = RocCurveDisplay(fpr=roc_auc[0], tpr=roc_auc[1], roc_auc=roc_auc[2])




LOO Subject CV Accuracy: 0.85
LOO Subject CV Accuracy: 0.82
LOO Subject CV F1-score: 0.84
LOO Subject CV recall: 0.83
LOO Subject CV precision: 0.85
LOO Subject CV sensitivity: 0.81
LOO Subject CV specificity: 0.84
LOO Subject CV Area under the ROC: 0.82
[[36  7]
 [ 3 13]]


Results -- binary classification of PD (1) vs healthy controls (0)

RandomForestClassifier(class_weight='balanced', n_estimators=5)
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.85
LOO Subject CV Accuracy: 0.82
LOO Subject CV F1-score: 0.84
LOO Subject CV recall: 0.83
LOO Subject CV precision: 0.85
LOO Subject CV sensitivity: 0.81
LOO Subject CV specificity: 0.84
LOO Subject CV Area under the ROC: 0.82
[[36  7]
 [ 3 13]]


LogisticRegression(class_weight='balanced')
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.78
LOO Subject CV Accuracy: 0.79
LOO Subject CV F1-score: 0.79
LOO Subject CV recall: 0.78
LOO Subject CV precision: 0.82
LOO Subject CV sensitivity: 0.81
LOO Subject CV specificity: 0.77
LOO Subject CV Area under the ROC: 0.79
[[33 10]
 [ 3 13]]


XGBClassifier
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.73
LOO Subject CV Accuracy: 0.74
LOO Subject CV F1-score: 0.74
LOO Subject CV recall: 0.73
LOO Subject CV precision: 0.78
LOO Subject CV sensitivity: 0.75
LOO Subject CV specificity: 0.72
LOO Subject CV Area under the ROC: 0.74
[[31 12]
 [ 4 12]]

svm(class_weight='balanced')
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.73
LOO Subject CV Accuracy: 0.68
LOO Subject CV F1-score: 0.72
LOO Subject CV recall: 0.71
LOO Subject CV precision: 0.74
LOO Subject CV sensitivity: 0.62
LOO Subject CV specificity: 0.74
LOO Subject CV Area under the ROC: 0.68
[[32 11]
 [ 6 10]]


In [1649]:
# Binary classification of RBD (1) vs healthy controls (0)
    

#Apply SMOTE and RandomUnderSampler
over = SMOTE(random_state=1)
under = RandomUnderSampler(random_state=42)
steps = [('o', over), ('u', under)]
pipe = Pipeline(steps=steps)

# Define subjects
subjects = DataControls['ID'].tolist() + DataRBD['ID'].tolist()

# Create a dictionary to map each sample to its subject
sample_to_subject = {i: subjects[i] for i in range(len(subjects))}

# Define a function to get unique subjects
def get_unique_subjects(y):
    return list(set([sample_to_subject[i] for i in range(len(y))]))

# Define a function to get indices for a given subject
def get_subject_indices(subject, y):
    return [i for i in range(len(y)) if sample_to_subject[i] == subject]

# Get unique subjects
unique_subjects = get_unique_subjects(yCR)


# model, scale = LogisticRegression(class_weight='balanced'), True ##need to scale data 
# model, scale = svm.SVC(class_weight='balanced'), True  ##need to scale data 
model, scale = RandomForestClassifier(n_estimators=20, class_weight='balanced'), False ##no need to scale data 
# model, scale = EasyEnsembleClassifier(n_estimators=20), False ##no need to scale data

# model, scale =  XGBClassifier(
#     n_estimators=30,
#     objective='binary:logistic',
#     scale_pos_weight=(len(yCR) - np.sum(yCR)) / np.sum(yCR),
#     max_delta_step=1,
# ), False

# Initialize list to store predictions
predictions = []
true_labels = []
accuracies = []
# Perform Leave-One-Subject-Out CV

selected_cols = [0,1,2,3]
selected_cols = [1,3] 
for subject in unique_subjects:

    test_indices = get_subject_indices(subject, yCR)
    train_indices = [i for i in range(len(yCR)) if i not in test_indices]

    #test data from one subject
    XCR = np.array(XCR)
    X_test, y_test = XCR[:,selected_cols][test_indices,:], yCR[test_indices]
    # Apply SMOTE to the training data
    #SMOTE to augment the data
    
    # X_train, y_train = SMOTE().fit_resample(XCR[:,selected_cols][train_indices,:], yCR[train_indices])
    X_train, y_train = pipe.fit_resample(XCR[:,selected_cols][train_indices,:], yCR[train_indices])
    # X_train, y_train = XCR[:,selected_cols][train_indices,:], yCP[train_indices]
    ## Scale the data
    if scale:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    
        # Fit the model
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    predictions.extend(y_pred)
    true_labels.extend(y_test)
     # Calculate accuracy

    accuracies.append(accuracy_score(y_test, y_pred))

    # print(len(y_test), subject, accuracy_score(y_test, y_pred))


# Calculate the accuracy
accuracy = balanced_accuracy_score(true_labels, predictions)
f1score = f1_score(true_labels, predictions, average = 'weighted')
recall = recall_score(true_labels, predictions, average = 'weighted')
precision = precision_score(true_labels, predictions, average='weighted')
tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()
sensitivity = tp /(tp+fn)
specificity = tn / (tn+fp)
roc_auc = roc_auc_score(true_labels, predictions)
print(f"LOO Subject CV Accuracy: {np.mean(accuracies):.2f}")
print(f"LOO Subject CV Accuracy: {accuracy:.2f}")
print(f"LOO Subject CV F1-score: {f1score:.2f}")
print(f"LOO Subject CV recall: {recall:.2f}")
print(f"LOO Subject CV precision: {precision:.2f}")
print(f"LOO Subject CV sensitivity: {sensitivity:.2f}")
print(f"LOO Subject CV specificity: {specificity :.2f}")
print(f"LOO Subject CV Area under the ROC: {roc_auc :.2f}")
print(confusion_matrix(true_labels, predictions))

# roc_display = RocCurveDisplay.from_predictions(true_labels, predictions)
# roc_display = RocCurveDisplay(fpr=roc_auc[0], tpr=roc_auc[1], roc_auc=roc_auc[2])




LOO Subject CV Accuracy: 0.80
LOO Subject CV Accuracy: 0.78
LOO Subject CV F1-score: 0.79
LOO Subject CV recall: 0.79
LOO Subject CV precision: 0.80
LOO Subject CV sensitivity: 0.74
LOO Subject CV specificity: 0.81
LOO Subject CV Area under the ROC: 0.78
[[35  8]
 [ 5 14]]


Results Binary classification of RBD (1) vs healthy controls (0)

RandomForestClassifier(class_weight='balanced', n_estimators=10)
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.79
LOO Subject CV Accuracy: 0.78
LOO Subject CV F1-score: 0.79
LOO Subject CV recall: 0.79
LOO Subject CV precision: 0.80
LOO Subject CV sensitivity: 0.74
LOO Subject CV specificity: 0.81
LOO Subject CV Area under the ROC: 0.78
[[35  8]
 [ 5 14]]


LogisticRegression(class_weight='balanced')
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.74
LOO Subject CV Accuracy: 0.71
LOO Subject CV F1-score: 0.73
LOO Subject CV recall: 0.73
LOO Subject CV precision: 0.75
LOO Subject CV sensitivity: 0.68
LOO Subject CV specificity: 0.74
LOO Subject CV Area under the ROC: 0.71
[[32 11]
 [ 6 13]]


XGBClassifier
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.73
LOO Subject CV Accuracy: 0.72
LOO Subject CV F1-score: 0.72
LOO Subject CV recall: 0.71
LOO Subject CV precision: 0.75
LOO Subject CV sensitivity: 0.74
LOO Subject CV specificity: 0.70
LOO Subject CV Area under the ROC: 0.72
[[30 13]
 [ 5 14]]

svm(class_weight='balanced')
SelectedCols = ['MeanSpeed','velocityDecay']
LOO Subject CV Accuracy: 0.83
LOO Subject CV Accuracy: 0.77
LOO Subject CV F1-score: 0.81
LOO Subject CV recall: 0.81
LOO Subject CV precision: 0.81
LOO Subject CV sensitivity: 0.68
LOO Subject CV specificity: 0.86
LOO Subject CV Area under the ROC: 0.77
[[37  6]
 [ 6 13]]

In [1752]:
# Binary classification of RBD (0) vs PD (1)

#Apply SMOTE and RandomUnderSampler
over = SMOTE(random_state=1)
under = RandomUnderSampler(random_state=42)
steps = [('o', over), ('u', under)]
pipe = Pipeline(steps=steps)

# Define subjects
subjects = DataControls['ID'].tolist() + DataRBD['ID'].tolist()

# Create a dictionary to map each sample to its subject
sample_to_subject = {i: subjects[i] for i in range(len(subjects))}

# Define a function to get unique subjects
def get_unique_subjects(y):
    return list(set([sample_to_subject[i] for i in range(len(y))]))

# Define a function to get indices for a given subject
def get_subject_indices(subject, y):
    return [i for i in range(len(y)) if sample_to_subject[i] == subject]

# Get unique subjects
unique_subjects = get_unique_subjects(yRP)


# model, scale = LogisticRegression(class_weight='balanced'), True  ##need to scale data 
# model, scale = svm.SVC(class_weight='balanced'), True  ##need to scale data 
model, scale = RandomForestClassifier(n_estimators=30, class_weight='balanced'), False ##no need to scale data 
# model, scale = EasyEnsembleClassifier(n_estimators=20), False ##no need to scale data
# model, scale =   XGBClassifier(objective= 'binary:logistic',seed=42,n_jobs=-1,nthread=1,early_stopping_rounds=None,eval_metric='logloss',use_label_encoder=False,verbosity=0,class_weight='balanced'), False

# model, scale =  XGBClassifier(
#     n_estimators=5,
#     objective='binary:logistic',
#     scale_pos_weight=(len(yCR) - np.sum(yCR)) / np.sum(yCR),
#     max_delta_step=1,
# ), False

# Initialize list to store predictions
predictions = []
true_labels = []
accuracies = []
# Perform Leave-One-Subject-Out CV

selected_cols = [0,1,2,3]
selected_cols = [0,1] 
for subject in unique_subjects:

    test_indices = get_subject_indices(subject, yRP)
    train_indices = [i for i in range(len(yRP)) if i not in test_indices]

    #test data from one subject
    XCR = np.array(XRP)
    X_test, y_test = XRP[:,selected_cols][test_indices,:], yRP[test_indices]
    # Apply SMOTE to the training data
    #SMOTE to augment the data
    
    X_train, y_train = SMOTE().fit_resample(XRP[:,selected_cols][train_indices,:], yRP[train_indices])
    # X_train, y_train = pipe.fit_resample(XRP[:,selected_cols][train_indices,:], yRP[train_indices])
    # X_train, y_train = XRP[:,selected_cols][train_indices,:], yRP[train_indices]
    ## Scale the data
    if scale:
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    
        # Fit the model
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
    
    predictions.extend(y_pred)
    true_labels.extend(y_test)
     # Calculate accuracy

    accuracies.append(accuracy_score(y_test, y_pred))

    # print(len(y_test), subject, accuracy_score(y_test, y_pred))


# Calculate the accuracy
accuracy = balanced_accuracy_score(true_labels, predictions)
f1score = f1_score(true_labels, predictions, average = 'weighted')
recall = recall_score(true_labels, predictions, average = 'weighted')
precision = precision_score(true_labels, predictions, average='weighted')
tn, fp, fn, tp = confusion_matrix(true_labels, predictions).ravel()
sensitivity = tp /(tp+fn)
specificity = tn / (tn+fp)
roc_auc = roc_auc_score(true_labels, predictions)
print(f"LOO Subject CV Accuracy: {np.mean(accuracies):.2f}")
print(f"LOO Subject CV Accuracy: {accuracy:.2f}")
print(f"LOO Subject CV F1-score: {f1score:.2f}")
print(f"LOO Subject CV recall: {recall:.2f}")
print(f"LOO Subject CV precision: {precision:.2f}")
print(f"LOO Subject CV sensitivity: {sensitivity:.2f}")
print(f"LOO Subject CV specificity: {specificity :.2f}")
print(f"LOO Subject CV Area under the ROC: {roc_auc :.2f}")
print(confusion_matrix(true_labels, predictions))

# roc_display = RocCurveDisplay.from_predictions(true_labels, predictions)
# roc_display = RocCurveDisplay(fpr=roc_auc[0], tpr=roc_auc[1], roc_auc=roc_auc[2])





LOO Subject CV Accuracy: 0.83
LOO Subject CV Accuracy: 0.84
LOO Subject CV F1-score: 0.83
LOO Subject CV recall: 0.83
LOO Subject CV precision: 0.85
LOO Subject CV sensitivity: 0.94
LOO Subject CV specificity: 0.74
LOO Subject CV Area under the ROC: 0.84
[[14  5]
 [ 1 15]]


Results Binary classification of RBD (0) vs PD (1)

RandomForestClassifier(class_weight='balanced', n_estimators=20)
SelectedCols = ['MeanAmplitude','MeanSpeed']
LOO Subject CV Accuracy: 0.83
LOO Subject CV Accuracy: 0.84
LOO Subject CV F1-score: 0.83
LOO Subject CV recall: 0.83
LOO Subject CV precision: 0.85
LOO Subject CV sensitivity: 0.94
LOO Subject CV specificity: 0.74
LOO Subject CV Area under the ROC: 0.84
[[14  5]
 [ 1 15]]


LogisticRegression(class_weight='balanced')
SelectedCols = ['MeanAmplitude','MeanSpeed']
LOO Subject CV Accuracy: 0.71
LOO Subject CV Accuracy: 0.77
LOO Subject CV F1-score: 0.77
LOO Subject CV recall: 0.77
LOO Subject CV precision: 0.78
LOO Subject CV sensitivity: 0.81
LOO Subject CV specificity: 0.74
LOO Subject CV Area under the ROC: 0.77
[[14  5]
 [ 3 13]]


svm(class_weight='balanced')
SelectedCols = ['MeanAmplitude','MeanSpeed']
LOO Subject CV Accuracy: 0.71
LOO Subject CV Accuracy: 0.78
LOO Subject CV F1-score: 0.77
LOO Subject CV recall: 0.77
LOO Subject CV precision: 0.79
LOO Subject CV sensitivity: 0.88
LOO Subject CV specificity: 0.68
LOO Subject CV Area under the ROC: 0.78
[[13  6]
 [ 2 14]]

XGBClassifier
SelectedCols = ['MeanAmplitude','MeanSpeed']
LOO Subject CV Accuracy: 0.77
LOO Subject CV Accuracy: 0.82
LOO Subject CV F1-score: 0.80
LOO Subject CV recall: 0.80
LOO Subject CV precision: 0.86
LOO Subject CV sensitivity: 1.00
LOO Subject CV specificity: 0.63
LOO Subject CV Area under the ROC: 0.82
[[12  7]
 [ 0 16]]
