In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv("/kaggle/input/heart-failure-clinical-data/heart_failure_clinical_records_dataset.csv")

dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.describe()

In [None]:
dataset["op"] = dataset["DEATH_EVENT"]

In [None]:
dataset.drop(["DEATH_EVENT"], axis = 1, inplace = True)

In [None]:
biColumns = []
rest = []
for col in dataset.columns: 
    if (len(dataset[col].unique()) == 2 and col != "op"):
        biColumns.append(col)
    else: 
        if (col != "op"):
            rest.append(col)

rest

In [None]:
fig, axis = plt.subplots(2, 2, figsize = (30, 15))
for col, axes in zip(biColumns, axis.flat[:]):
    sns.countplot(data = dataset, x = col, ax = axes, fill = True, hue = "op")

fig, ax = plt.subplots(1, 1, figsize = (30, 10))
sns.countplot(data = dataset, x = biColumns[-1], ax = ax, fill = True, hue = "op")

In [None]:
fig, axis = plt.subplots(2, 2, figsize = (30, 15))
for col, axes in zip(rest, axis.flat[:]):
    sns.kdeplot(data = dataset, x = col, ax = axes, fill = True, hue = "op")

fig, ax = plt.subplots(1, 1, figsize = (30, 10))
sns.kdeplot(data = dataset, x = rest[-1], ax = ax, fill = True, hue = "op")

In [None]:
fig = plt.subplots(figsize = (30, 15))
sns.heatmap(dataset.corr(), annot = True)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

x_train, x_test, y_train, y_test = train_test_split(dataset.iloc[ : , : -1], dataset.iloc[ : , -1], train_size = 0.80, random_state = 2)

x_train = MinMaxScaler().fit_transform(x_train)
x_test = MinMaxScaler().fit_transform(x_test)

In [None]:
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import xgboost as xgb
from lightgbm import LGBMClassifier as lgbm
from catboost import CatBoostClassifier
import warnings
warnings.filterwarnings('ignore')

models = {
    "adaboost": AdaBoostClassifier(n_estimators = 150, ),
    "XgBoost": xgb.XGBClassifier(),
    "catBoost": CatBoostClassifier(logging_level="Silent"),
    "LightGBM": lgbm(),
    "bagging": BaggingClassifier(n_estimators = 150),
    "randomForest": RandomForestClassifier(n_estimators = 150),
    "gBoost": GradientBoostingClassifier(),
    "decisionTree": DecisionTreeClassifier(max_depth = 5, criterion = "gini"),
    "Xtree": ExtraTreeClassifier(),
    "SVM": SVC(),
    "logisticRegression": LogisticRegression(),
    "KNeighour": KNeighborsClassifier(),
}

accuracy_scores = []
predicted = []
for i in models: 
    models[i].fit(x_train, y_train)
    y_pred = models[i].predict(x_test)
    accuracy_scores.append(int(accuracy_score(y_pred, y_test) * 100))
    predicted.append(y_pred)

for j, k in zip(accuracy_scores, models):
    print (' \n ', k, ' accuracy : ', j, ' %  ')

In [None]:
plt.figure(figsize = (25, 8))
ax = sns.barplot(x = list(models.keys()), y = accuracy_scores)
for i in ax.patches:
    width, height = i.get_width(), i.get_height()
    x, y = i.get_xy() 
    ax.annotate(f'{round(height,2)}%', (x + width/2, y + height*1.02), ha='center', fontsize = 'x-large')

print (list(models.keys())[accuracy_scores.index(max(accuracy_scores))], " : " ,max(accuracy_scores), " %")

In [None]:
from sklearn.metrics import classification_report

for i, j in zip(list(models.keys()), predicted):
    print (' \n \n ', i, ' : \n \n', classification_report(j, y_test))

In [None]:
dataset