In [88]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

In [89]:
df = pd.read_csv('../data/heart_disease.csv')
df.head()

Unnamed: 0,MaxHeartRate,oldpeak,Cholerstoral,Age,MajorVessels,RestingBloodPressure,ThaliumStressResult,ChestPainType,target
0,0.017197,1.087338,-0.2649,0.948726,0.0,0.757525,6.0,1.0,0
1,-1.821905,0.397182,0.760415,1.392002,3.0,1.61122,3.0,4.0,2
2,-0.902354,1.346147,-0.342283,1.392002,2.0,-0.6653,7.0,4.0,1
3,1.637359,2.122573,0.063974,-1.932564,0.0,-0.09617,3.0,3.0,0
4,0.980537,0.310912,-0.825922,-1.489288,0.0,-0.09617,3.0,2.0,0


In [90]:
X = df.drop('target', axis=1)
y = df['target']

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=42)

In [92]:
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [93]:
y_pred_lr = lr.predict(X_test)
y_pred_proba_lr = lr.predict_proba(X_test)

In [94]:
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Logistic Regression Precision:", precision_score(y_test, y_pred_lr, average='macro'))
print("Logistic Regression Recall:", recall_score(y_test, y_pred_lr, average='macro'))
print("Logistic Regression F1 Score:", f1_score(y_test, y_pred_lr, average='macro'))
print("Logistic Regression ROC AUC Score:", roc_auc_score(y_test, y_pred_proba_lr, multi_class='ovr'))

Logistic Regression Accuracy: 0.5081967213114754
Logistic Regression Precision: 0.2664285714285714
Logistic Regression Recall: 0.30391351943076084
Logistic Regression F1 Score: 0.2704120879120879
Logistic Regression ROC AUC Score: 0.8171509207790646


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [95]:
joblib.dump(lr, '../models/logistic_regression_model.pkl')

['../models/logistic_regression_model.pkl']

In [96]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [97]:
y_pred_dt = dt.predict(X_test)
y_pred_proba_dt = dt.predict_proba(X_test)

In [98]:
print("Decision Tree Classifier Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Decision Tree Classifier Precision:", precision_score(y_test, y_pred_dt, average='macro'))
print("Decision Tree Classifier Recall:", recall_score(y_test, y_pred_dt, average='macro'))
print("Decision Tree Classifier F1 Score:", f1_score(y_test, y_pred_dt, average='macro'))
print("Decision Tree Classifier ROC AUC Score:", roc_auc_score(y_test, y_pred_proba_dt, multi_class='ovr'))

Decision Tree Classifier Accuracy: 0.5081967213114754
Decision Tree Classifier Precision: 0.48695402298850576
Decision Tree Classifier Recall: 0.3483032293377121
Decision Tree Classifier F1 Score: 0.3716054391105101
Decision Tree Classifier ROC AUC Score: 0.6084512105755948


In [99]:
joblib.dump(dt, '../models/decision_tree_classifier_model.pkl')

['../models/decision_tree_classifier_model.pkl']

In [100]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [101]:
y_pred_rf = rf.predict(X_test)
y_pred_proba_rf = rf.predict_proba(X_test)

In [102]:
print("Random Forest Classifier Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Random Forest Classifier Precision:", precision_score(y_test, y_pred_rf, average='macro'))
print("Random Forest Classifier Recall:", recall_score(y_test, y_pred_rf, average='macro'))
print("Random Forest Classifier F1 Score:", f1_score(y_test, y_pred_rf, average='macro'))
print("Random Forest Classifier ROC AUC Score:", roc_auc_score(y_test, y_pred_proba_rf, multi_class='ovr'))

Random Forest Classifier Accuracy: 0.5245901639344263
Random Forest Classifier Precision: 0.24451127819548873
Random Forest Classifier Recall: 0.27088122605363985
Random Forest Classifier F1 Score: 0.253527815468114
Random Forest Classifier ROC AUC Score: 0.7678330652533402


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [103]:
joblib.dump(rf, '../models/random_forest_classifier_model.pkl')

['../models/random_forest_classifier_model.pkl']

In [104]:
svm = SVC(probability=True, random_state=42)
svm.fit(X_train, y_train)

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


In [105]:
y_pred_svm = svm.predict(X_test)
y_pred_proba_svm = svm.predict_proba(X_test)

In [106]:
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("SVM Precision:", precision_score(y_test, y_pred_svm, average='macro'))
print("SVM Recall:", recall_score(y_test, y_pred_svm, average='macro'))
print("SVM F1 Score:", f1_score(y_test, y_pred_svm, average='macro'))
print("SVM ROC AUC Score:", roc_auc_score(y_test, y_pred_proba_svm, multi_class='ovr'))

SVM Accuracy: 0.5573770491803278
SVM Precision: 0.3444155844155844
SVM Recall: 0.328024083196497
SVM F1 Score: 0.3147691953493726
SVM ROC AUC Score: 0.8060896083889991


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [107]:
joblib.dump(svm, '../models/svm_model.pkl')

['../models/svm_model.pkl']