# B 02 Lazy Classifier: PTB Dataset


## 1. Imports

In [1]:
import numpy as np
import pandas as pd
from lazypredict.Supervised import LazyClassifier


In [3]:
print("\n" + "="*80)
print("Evaluating MIT-trained 5-class XGBoost mapped to binary on full PTB (train+val)")
print("="*80)

# Load PTB split (both train and test)
# validation is left out to compare 
ptb_dir = "/home/christianm/Projects/Repos/heartbeat_classification/data/processed/ptb"

X_ptb_train_sm = pd.read_csv(f"{ptb_dir}/X_ptb_train_sm.csv").values
y_ptb_train_sm = pd.read_csv(f"{ptb_dir}/y_ptb_train_sm.csv").iloc[:, 0].values.astype(int)
X_ptb_test = pd.read_csv(f"{ptb_dir}/X_ptb_test.csv").values
y_ptb_test = pd.read_csv(f"{ptb_dir}/y_ptb_test.csv").iloc[:, 0].values.astype(int)
X_ptb_val = pd.read_csv(f"{ptb_dir}/X_ptb_val.csv").values
y_ptb_val = pd.read_csv(f"{ptb_dir}/y_ptb_val.csv").iloc[:, 0].values.astype(int)

print("PTB dataset")
print(f"\tTraining size: {X_ptb_train_sm.shape}, {y_ptb_train_sm.shape}")
print(f"\tTraining SMOTE size: {X_ptb_train_sm.shape}, {y_ptb_train_sm.shape}")
print(f"\tTest Size: {X_ptb_test.shape}, {y_ptb_test.shape}")
# print(f"\tVal Size: {X_ptb_val.shape}, {y_ptb_val.shape}")


Evaluating MIT-trained 5-class XGBoost mapped to binary on full PTB (train+val)
PTB dataset
	Training size: (13438, 187), (13438,)
	Training SMOTE size: (13438, 187), (13438,)
	Test Size: (2909, 187), (2909,)


In [4]:
#LazyClassifier on PTB SMOTE training data, test data
clf = LazyClassifier(verbose=1,ignore_warnings=True, custom_metric=None)

models,predictions = clf.fit(X_ptb_train_sm, X_ptb_test, y_ptb_train_sm, y_ptb_test)

  0%|          | 0/32 [00:00<?, ?it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.7954623581986937, 'Balanced Accuracy': 0.8184384013185002, 'ROC AUC': 0.8184384013185002, 'F1 Score': 0.804798913632478, 'Time taken': 10.613378047943115}
{'Model': 'BaggingClassifier', 'Accuracy': 0.9432794774836714, 'Balanced Accuracy': 0.9458961681087763, 'ROC AUC': 0.9458961681087763, 'F1 Score': 0.9441267104960197, 'Time taken': 19.013993501663208}
{'Model': 'BernoulliNB', 'Accuracy': 0.6132691646613957, 'Balanced Accuracy': 0.6432341515098005, 'ROC AUC': 0.6432341515098005, 'F1 Score': 0.6332544740242653, 'Time taken': 0.1459810733795166}
{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.7848057751804743, 'Balanced Accuracy': 0.7859806345282241, 'ROC AUC': 0.7859806345282241, 'F1 Score': 0.7930685368623994, 'Time taken': 9.205572605133057}
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.9082158817463045, 'Balanced Accuracy': 0.8938739772794161, 'ROC AUC': 0.893873977279416, 'F1 Score': 0.9089292288858235, 'Time taken': 2.71457719802

In [5]:
#results for different models sorted by accuracy
models

Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
XGBClassifier,0.98,0.97,0.97,0.98,5.15
LGBMClassifier,0.97,0.97,0.97,0.97,0.5
ExtraTreesClassifier,0.98,0.97,0.97,0.98,1.59
RandomForestClassifier,0.97,0.97,0.97,0.97,10.98
BaggingClassifier,0.94,0.95,0.95,0.94,19.01
SVC,0.91,0.92,0.92,0.91,9.13
KNeighborsClassifier,0.9,0.91,0.91,0.9,0.22
DecisionTreeClassifier,0.91,0.89,0.89,0.91,2.71
LabelPropagation,0.87,0.89,0.89,0.87,4.52
LabelSpreading,0.87,0.89,0.89,0.87,9.75


In [6]:
#models sorted by F1 score instead of accuracy
print(models.sort_values(by='F1 Score', ascending=False))

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
ExtraTreesClassifier               0.98               0.97     0.97      0.98   
XGBClassifier                      0.98               0.97     0.97      0.98   
LGBMClassifier                     0.97               0.97     0.97      0.97   
RandomForestClassifier             0.97               0.97     0.97      0.97   
BaggingClassifier                  0.94               0.95     0.95      0.94   
SVC                                0.91               0.92     0.92      0.91   
DecisionTreeClassifier             0.91               0.89     0.89      0.91   
KNeighborsClassifier               0.90               0.91     0.91      0.90   
ExtraTreeClassifier                0.89               0.88     0.88      0.89   
LabelPropagation                   0.87               0.89     0.89      0.87   
LabelSpreading              