# Base Algorithms Performances

## Libraries

In [1]:
import time
import os
os.chdir("../input")

import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

import warnings
warnings.filterwarnings("ignore")

## Helper Functions

In [2]:
def train_and_evaluate_classifier(clf, X, y, cv=5):
    start = time.time()
    
    y_pred = cross_val_predict(clf, X, y, cv=cv)

    if hasattr(clf, "predict_proba"):
        y_proba = cross_val_predict(clf, X, y, cv=cv, method="predict_proba")[:, 1]
    else:
        y_proba = None

    end = time.time()
    elapsed_time = end - start

    metrics = {
        'Accuracy': accuracy_score(y, y_pred),
        'Precision': precision_score(y, y_pred, zero_division=0),
        'Recall': recall_score(y, y_pred, zero_division=0),
        'F1-Score': f1_score(y, y_pred, zero_division=0),
        'AUC-ROC': roc_auc_score(y, y_proba) if y_proba is not None else None,
        'AUC-PR': average_precision_score(y, y_proba) if y_proba is not None else None,
        'Training Time (s)': elapsed_time
    }

    return metrics

## Data Reading & Preprocessing

In [3]:
df = pd.read_csv("processed_heart.csv")

In [4]:
X = df.drop("output", axis=1)
y = df["output"]

## Training

In [5]:
results_df = pd.DataFrame(columns=['Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC-ROC', 'AUC-PR', 'Training Time (s)'])

classifiers = {
    "XGBoost": xgb.XGBClassifier(),
    "LightGBM": lgb.LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0),
    "AdaBoost": AdaBoostClassifier(),
    "GradientBoosting": GradientBoostingClassifier(),
    "SVM": SVC(probability=True),
    "k-NN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Logistic Regression": LogisticRegression()
}

for name, clf in classifiers.items():
    metrics = train_and_evaluate_classifier(clf, X, y, cv=5)
    results_df.loc[name] = metrics

results_df.sort_index(inplace=True)

results_df.reset_index(drop=False, inplace=True)
results_df.index.name = "#"

[LightGBM] [Info] Number of positive: 132, number of negative: 110
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 239
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.545455 -> initscore=0.182322
[LightGBM] [Info] Start training from score 0.182322
[LightGBM] [Info] Number of positive: 132, number of negative: 110
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 232
[LightGBM] [Info] Number of data points in the train set: 242, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.545455 -> initscore=0.182322
[LightGBM] [Info] Start training from score 0.182322
[LightGBM] [Info] Number of po

## Evaluation

In [6]:
results_df

Unnamed: 0_level_0,index,Accuracy,Precision,Recall,F1-Score,AUC-ROC,AUC-PR,Training Time (s)
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,AdaBoost,0.788779,0.788571,0.836364,0.811765,0.822486,0.777471,0.252304
1,CatBoost,0.811881,0.817647,0.842424,0.829851,0.885419,0.884108,5.299788
2,GradientBoosting,0.805281,0.797753,0.860606,0.827988,0.867106,0.862184,0.410484
3,LightGBM,0.782178,0.792899,0.812121,0.802395,0.859332,0.865042,0.652417
4,Logistic Regression,0.821782,0.80663,0.884848,0.843931,0.887484,0.887077,0.045349
5,Random Forest,0.805281,0.80814,0.842424,0.824926,0.88094,0.876625,0.46392
6,SVM,0.653465,0.637615,0.842424,0.725849,0.737242,0.761519,0.06478
7,XGBoost,0.782178,0.789474,0.818182,0.803571,0.8574,0.858712,0.695703
8,k-NN,0.643564,0.657459,0.721212,0.687861,0.652152,0.644282,0.018963
