In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score

In [None]:
df = pd.read_csv("/kaggle/input/heart-disease-diagnosis-dataset/dataset_heart.csv")

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
numerical = ['age', 'resting blood pressure', 'serum cholestoral', 'max heart rate',
            'oldpeak']

In [None]:
def plots(df, x, y):
    grouped = df.groupby(y)
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 10))
    sns.histplot(df, x=x, kde=True, ax=axes[0])
    sns.histplot(df, x=x, hue=y, kde=True, ax=axes[1])
    sns.barplot(x=grouped[x].mean().index,
               y=grouped[x].mean(),ax=axes[2])
    for container in axes[2].containers:
        axes[2].bar_label(container, size=15, color='black')
        
    axes[3].pie(df[y].value_counts(), 
               labels=df[y].value_counts().index,
               autopct="%0.2f%%")
    plt.suptitle("{} histplot distribution, barplots grouped by {} and pie chart".format(x.capitalize(), y.capitalize()),
                size=20)
    plt.tight_layout()
    plt.show()

# Genral histograms, grouped histograms, grouped barplots, pie charts

In [None]:
for i in numerical:
    plots(df, i, 'heart disease')

In [None]:
mapping = {
    1: 0,
    2: 1
}

df['heart disease'] = df['heart disease'].map(mapping)

In [None]:
x = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42, test_size=0.1)

# Training pipeline

In [None]:
def training(model,name):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    acc = accuracy_score(y_pred, y_test)
    acc *= 100
    acc = round(acc, 2)
    print("{}: {}%".format(name, acc))
    return model

# Defining models and tuning their parameters

In [None]:
rfc = RandomForestClassifier(max_depth=45)
svc = SVC(C=0.5)
abc = AdaBoostClassifier(learning_rate=0.1)
knn = KNeighborsClassifier(n_neighbors=15)
lgr = LogisticRegression()
xgb = XGBClassifier()

models = [rfc, svc, abc, knn, lgr, xgb]
names =['Random Forest', 'SVC', 'Ada Boost', 'K-Nearest Neighbors', 'Logistic Regression', 'XGB']

# Results

In [None]:
trained = []
for i, j in zip(models, names):
    trained += [training(i, j)]