In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [None]:
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
# from sklearn.impute import SimpleImputer


In [None]:
df = pd.read_csv(
    './data/heart.csv')

df.head()


Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [None]:
df.columns


Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

In [None]:
# x is independent variables
# y consists of dependent variable

x = df.iloc[:, :-1].values
y = df.iloc[:, -1].values


In [None]:
# This shows a list of features that consist of null values

df[df.columns].isnull().sum()

# Luckily we have no null values


Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [None]:
# THIS BLOCK IS USED INCASE WE HAVE ANY NULL VALUES.

# Replacing the null value for that feature with the mean of all other values

# imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
# imputer.fit(x[:, [9]])
# x[:, [9]] = imputer.transform(x[:, [9]])


In [None]:
# Encoding categorical data

ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [
                       1, 2, 6, 8, 10])], remainder='passthrough')
x = np.array(ct.fit_transform(x))


In [None]:
def trainAndTest(testSize, x, y):
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=testSize, random_state=0)

    sc = StandardScaler()
    x_train[:, 14:20] = sc.fit_transform(x_train[:, 14:20])
    x_test[:, 14:20] = sc.transform(x_test[:, 14:20])

    classifier = XGBClassifier(use_label_encoder=False, eval_metric="logloss")
    classifier.fit(x_train, y_train)

    y_pred = classifier.predict(x_test)
    accuracy_score_ = accuracy_score(y_test, y_pred)

    return("{:.0f}-{:.4f}".format((1-testSize)*100, accuracy_score_))


In [None]:
# Splitting the dataset into training set and test set

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=0)


In [None]:
# Feature Scaling

sc = StandardScaler()
x_train[:, 14:20] = sc.fit_transform(x_train[:, 14:20])
x_test[:, 14:20] = sc.transform(x_test[:, 14:20])


In [None]:
# Training the classifier with XG Boost

classifier = XGBClassifier(use_label_encoder=False, eval_metric="logloss")
classifier.fit(x_train, y_train)


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              eval_metric='logloss', gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

In [None]:

# Finding the predictions of each model

y_pred = classifier.predict(x_test)


In [None]:

# Finding the accuracy of each model


print("XG Boost - ", accuracy_score(y_test, y_pred))

# Conclusion

# So we observe that accuracy turns out to be
# 86.4% when the classifier is trained with xg boost


XG Boost -  0.840958605664488
