# Base Algorithms Performances

## Libraries

In [15]:
import time
import os
os.chdir("../input")

import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

## Helper Functions

In [2]:
def train_and_evaluate_classifier(clf, X, y, cv=5):
    start = time.time()
    
    y_pred = cross_val_predict(clf, X, y, cv=cv)

    if hasattr(clf, "predict_proba"):
        y_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba")[:, 1]
    else:
        y_proba = None

    end = time.time()
    elapsed_time = end - start

    metrics = {
        'Accuracy': accuracy_score(y, y_pred),
        'Precision': precision_score(y, y_pred, zero_division=0),
        'Recall': recall_score(y, y_pred, zero_division=0),
        'F1-Score': f1_score(y, y_pred, zero_division=0),
        'AUC-ROC': roc_auc_score(y, y_proba) if y_proba is not None else None,
        'AUC-PR': average_precision_score(y, y_proba) if y_proba is not None else None,
        'Training Time (s)': elapsed_time
    }

    return metrics

## Data Reading & Preprocessing

In [3]:
df = pd.read_csv("heart.csv")

In [4]:
X = df.drop("output", axis=1)
y = df["output"]

## Training

In [16]:
results_df = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC', 'AUC-PR', 'Training Time (s)'])

classifiers = {
    "XGBoost": xgb.XGBClassifier(),
    "LightGBM": lgb.LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0),
    "AdaBoost": AdaBoostClassifier(),
    "GradientBoosting": GradientBoostingClassifier(),
    "SVM": SVC(probability=True),
    "k-NN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Logistic Regression": LogisticRegression()
}

for name, clf in classifiers.items():
    metrics = train_and_evaluate_classifier(clf, X, y, cv=5)
    results_df.loc[name] = metrics

results_df.sort_index(inplace=True)

results_df.reset_index(drop=False, inplace=True)
results_df.index.name = "#"

[LightGBM] [Info] Number of positive: 132, number of negative: 110
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 246
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 13
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.545455 -> initscore=0.182322
[LightGBM] [Info] Start training from score 0.182322
[LightGBM] [Info] Number of positive: 132, number of negative: 110
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 239
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 13
[LightGBM] [Info] [binary:BoostFro

## Evaluation

In [None]:
results_df

Unnamed: 0_level_0,index,Accuracy,Precision,Recall,F1-Score,AUC-ROC,AUC-PR,Training Time (s)
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,AdaBoost,0.785479,0.787356,0.830303,0.80826,0.852394,0.831548,0.303673
1,CatBoost,0.831683,0.839286,0.854545,0.846847,0.90549,0.908784,5.212208
2,GradientBoosting,0.811881,0.806818,0.860606,0.832845,0.882433,0.889629,0.623605
3,LightGBM,0.808581,0.809249,0.848485,0.828402,0.881116,0.891522,0.719569
4,Logistic Regression,0.825083,0.814607,0.878788,0.845481,0.900132,0.909625,0.045545
5,Random Forest,0.808581,0.812865,0.842424,0.827381,0.905599,0.913284,0.452898
6,SVM,0.643564,0.630137,0.836364,0.71875,0.736978,0.762513,0.068166
7,XGBoost,0.788779,0.802395,0.812121,0.807229,0.883531,0.889905,0.703178
8,k-NN,0.643564,0.657459,0.721212,0.687861,0.651823,0.645021,0.014637
