<h1>Heart Failure Prediction - AutoML<h1>

In [1]:
!pip install scikit-learn==0.23.2



In [3]:
import sklearn
print(sklearn.__version__)

0.23.2


In [4]:
# Reading the data set
import pandas as pd
df = pd.read_csv('heart.csv')

In [5]:
df.head(2).style.set_properties(**{"background-color": "black","color": "white", "border-color": "black","font-size":"10pt",'width': 200})

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1


In [6]:
from pycaret.classification import *

In [7]:
'''setup()-function initializes the environment in pycaret and creates the transformation pipeline 
to prepare the data for modeling and deployment.'''
s = setup(data = df, target = 'HeartDisease', session_id=123,
 numeric_imputation = 'mean',
             silent = True,
            remove_outliers = True,
            normalize = True)

Unnamed: 0,Description,Value
0,session_id,123
1,Target,HeartDisease
2,Target Type,Binary
3,Label Encoded,
4,Original Data,"(918, 12)"
5,Missing Values,False
6,Numeric Features,5
7,Categorical Features,6
8,Ordinal Features,False
9,High Cardinality Features,False


In [13]:
#Compare with all the classification models
'''Compare_model()-It is a function that actually allows to you create a model is unimaginatively called create_model(). 
This function creates a model and scores it using stratified cross validation'''
best_model = compare_models(round = 4)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.8638,0.9239,0.889,0.8657,0.8767,0.7244,0.7257,0.095
et,Extra Trees Classifier,0.8638,0.9215,0.8859,0.868,0.8762,0.7247,0.7266,0.075
ridge,Ridge Classifier,0.8637,0.0,0.8796,0.8721,0.8748,0.7251,0.727,0.005
lr,Logistic Regression,0.8621,0.9244,0.8886,0.8623,0.8744,0.7213,0.7232,0.504
lda,Linear Discriminant Analysis,0.8621,0.9267,0.8766,0.8719,0.8731,0.7218,0.7237,0.008
gbc,Gradient Boosting Classifier,0.8605,0.921,0.8742,0.8715,0.8723,0.7186,0.7197,0.034
knn,K Neighbors Classifier,0.8589,0.9049,0.877,0.8686,0.8716,0.7148,0.7173,0.01
lightgbm,Light Gradient Boosting Machine,0.8523,0.9184,0.8766,0.8576,0.8662,0.7013,0.7033,0.366
nb,Naive Bayes,0.8457,0.9197,0.8281,0.8803,0.8482,0.692,0.6985,0.005
xgboost,Extreme Gradient Boosting,0.8442,0.9152,0.8621,0.8564,0.8583,0.6852,0.6871,0.146


In [14]:
print(best_model)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=-1, oob_score=False, random_state=123, verbose=0,
                       warm_start=False)


In [15]:
# check all the available models
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model._logistic.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors._classification.KNeighborsCl...,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree._classes.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model._stochastic_gradient.SGDC...,True
rbfsvm,SVM - Radial Kernel,sklearn.svm._classes.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process._gpc.GaussianProcessC...,False
mlp,MLP Classifier,sklearn.neural_network._multilayer_perceptron....,False
ridge,Ridge Classifier,sklearn.linear_model._ridge.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble._forest.RandomForestClassifier,True


In [19]:
rf = create_model("rf",fold = 5, round = 4)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8852,0.9526,0.8806,0.9077,0.8939,0.769,0.7694
1,0.877,0.9065,0.9254,0.8611,0.8921,0.7497,0.7523
2,0.7951,0.8989,0.7612,0.85,0.8031,0.5908,0.5947
3,0.8607,0.9402,0.9242,0.8356,0.8777,0.7167,0.7217
4,0.8678,0.9326,0.9091,0.8571,0.8824,0.7317,0.7334
Mean,0.8572,0.9262,0.8801,0.8623,0.8698,0.7116,0.7143
SD,0.0321,0.0203,0.0616,0.0243,0.0339,0.0629,0.062


In [20]:
tuned_rf = tune_model(rf, optimize = 'AUC',round = 4)

Unnamed: 0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,0.8197,0.9434,0.7941,0.871,0.8308,0.6387,0.6418
1,0.9016,0.9532,0.9118,0.9118,0.9118,0.8007,0.8007
2,0.8525,0.8453,0.9412,0.8205,0.8767,0.6952,0.7054
3,0.918,0.9665,0.9091,0.9375,0.9231,0.8354,0.8359
4,0.7377,0.869,0.6667,0.8148,0.7333,0.4803,0.4897
5,0.8525,0.9253,0.8788,0.8529,0.8657,0.7021,0.7025
6,0.8852,0.9502,0.9091,0.8824,0.8955,0.7683,0.7687
7,0.918,0.9589,0.9697,0.8889,0.9275,0.8336,0.8378
8,0.8689,0.9491,0.9091,0.8571,0.8824,0.7345,0.7361
9,0.9,0.9562,0.9394,0.8857,0.9118,0.7966,0.7984


In [21]:
plot_model(tuned_rf, plot = 'parameter')

Unnamed: 0,Parameters
bootstrap,True
ccp_alpha,0.0
class_weight,balanced_subsample
criterion,entropy
max_depth,4
max_features,log2
max_leaf_nodes,
max_samples,
min_impurity_decrease,0.0002
min_impurity_split,


In [22]:
evaluate_model(tuned_rf)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

In [23]:
predict_model(tuned_rf)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8841,0.9334,0.9295,0.8735,0.9006,0.7618,0.7639


Unnamed: 0,Age,RestingBP,Cholesterol,MaxHR,Oldpeak,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,...,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,HeartDisease,Label,Score
0,-1.830509,-1.268915,-1.773372,-0.490732,0.095737,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.9027
1,-1.194126,-0.129872,-0.153288,0.494292,-0.834112,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9229
2,0.078639,-1.268915,0.098725,0.179084,-0.834112,1.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9492
3,1.669596,0.439649,-0.783321,-1.121148,0.560661,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.9470
4,-1.406254,0.439649,0.341737,1.991529,-0.834112,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,0.608958,-0.129872,-0.639313,-0.490732,-0.834112,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,1,0.7943
272,0.290767,-0.699394,-1.773372,0.415490,-0.834112,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1,1,0.7716
273,-1.618381,-0.699394,0.701756,-0.333129,-0.834112,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9536
274,-1.512317,-0.129872,0.161728,-0.687737,-0.834112,1.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9510


In [24]:
''' The finalize_model() function fits the model onto the complete dataset including the 
test/hold-out sample (30% in this case). '''
final_rf= finalize_model(tuned_rf)

In [25]:
print(final_rf)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                       class_weight='balanced_subsample', criterion='entropy',
                       max_depth=4, max_features='log2', max_leaf_nodes=None,
                       max_samples=None, min_impurity_decrease=0.0002,
                       min_impurity_split=None, min_samples_leaf=5,
                       min_samples_split=9, min_weight_fraction_leaf=0.0,
                       n_estimators=130, n_jobs=-1, oob_score=False,
                       random_state=123, verbose=0, warm_start=False)


In [26]:
predict_model(final_rf)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.8986,0.9438,0.9359,0.8902,0.9125,0.792,0.7934


Unnamed: 0,Age,RestingBP,Cholesterol,MaxHR,Oldpeak,Sex_M,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,...,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up,HeartDisease,Label,Score
0,-1.830509,-1.268915,-1.773372,-0.490732,0.095737,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.9135
1,-1.194126,-0.129872,-0.153288,0.494292,-0.834112,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9339
2,0.078639,-1.268915,0.098725,0.179084,-0.834112,1.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9523
3,1.669596,0.439649,-0.783321,-1.121148,0.560661,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1,1,0.9271
4,-1.406254,0.439649,0.341737,1.991529,-0.834112,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
271,0.608958,-0.129872,-0.639313,-0.490732,-0.834112,1.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1,1,0.8091
272,0.290767,-0.699394,-1.773372,0.415490,-0.834112,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1,1,0.7853
273,-1.618381,-0.699394,0.701756,-0.333129,-0.834112,0.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9633
274,-1.512317,-0.129872,0.161728,-0.687737,-0.834112,1.0,0.0,1.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0,0,0.9431


In [28]:
'''Save_model() allows you to save the model along with entire transformation pipeline for later use.'''
save_model(final_rf,'Final RF Model 24NOV2021')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=False, features_todrop=[],
                                       id_columns=[],
                                       ml_usecase='classification',
                                       numerical_features=[],
                                       target='HeartDisease',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='not_available',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None,
                                 numeri...
                  RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                         class_weight='balanced_subsample',
                                         criterion='entropy', max_depth=4,
            