# Classification Models Comparison

#### import required packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### load the data 

In [2]:
df = pd.read_csv('heart_disease.csv')
print(df.head())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  


#### data cleansing process

In [3]:
# decide x and y
x = df.drop(['target', 'trestbps', 'chol', 'fbs', 'restecg'], axis=1)
y = df['target']

In [20]:
# split the data into train and test
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=1234567)

#### model building

#### Logistic Regression

In [21]:
def logistic_regression():
    from sklearn.linear_model import LogisticRegressionCV

    # create the model
    model = LogisticRegressionCV(max_iter=1000)

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'Logistic Regression')

#### naive bayes

In [22]:
def nb():
    from sklearn.naive_bayes import GaussianNB

    # create the model
    model = GaussianNB()

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'Naive Bayes')

#### svm

In [23]:
def svm():
    from sklearn.svm import SVC
    
    # create the model
    model = SVC(C=2.0)

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'SVM')

#### KNN

In [24]:
def knn():
    from sklearn.neighbors import KNeighborsClassifier
    
    # create the model
    model = KNeighborsClassifier()

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'KNN')

#### decision tree

In [25]:
def decision_tree():
    from sklearn.tree import DecisionTreeClassifier
    
    # create the model
    model = DecisionTreeClassifier()

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'Decision Tree')

#### random forest

In [26]:
def random_forest():
    from sklearn.ensemble import RandomForestClassifier
    
    # create the model
    model = RandomForestClassifier(n_estimators=100)

    # fit the model
    model.fit(x_train, y_train)
    
    evaluate_model(model, 'Random Forest')

#### evaluation

In [27]:
def evaluate_model(model, name):
    # predict the values for x_test
    y_prediction = model.predict(x_test)
    
    from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
    
    print(f"---- {name} evaluation ----")
    
    # accuracy
    print(f"accuracy of the model = {accuracy_score(y_test, y_prediction) * 100: 0.2f} %")
    
    # classification_report
    print(classification_report(y_test, y_prediction))
    
    print(f"---- {name} evaluation ----")

In [28]:
nb()

---- Naive Bayes evaluation ----
accuracy of the model =  86.89 %
              precision    recall  f1-score   support

           0       0.89      0.74      0.81        23
           1       0.86      0.95      0.90        38

    accuracy                           0.87        61
   macro avg       0.88      0.84      0.85        61
weighted avg       0.87      0.87      0.87        61

---- Naive Bayes evaluation ----


In [29]:
svm()

---- SVM evaluation ----
accuracy of the model =  68.85 %
              precision    recall  f1-score   support

           0       0.64      0.39      0.49        23
           1       0.70      0.87      0.78        38

    accuracy                           0.69        61
   macro avg       0.67      0.63      0.63        61
weighted avg       0.68      0.69      0.67        61

---- SVM evaluation ----


In [30]:
decision_tree()

---- Decision Tree evaluation ----
accuracy of the model =  83.61 %
              precision    recall  f1-score   support

           0       0.81      0.74      0.77        23
           1       0.85      0.89      0.87        38

    accuracy                           0.84        61
   macro avg       0.83      0.82      0.82        61
weighted avg       0.83      0.84      0.83        61

---- Decision Tree evaluation ----


In [31]:
random_forest()

---- Random Forest evaluation ----
accuracy of the model =  81.97 %
              precision    recall  f1-score   support

           0       0.77      0.74      0.76        23
           1       0.85      0.87      0.86        38

    accuracy                           0.82        61
   macro avg       0.81      0.80      0.81        61
weighted avg       0.82      0.82      0.82        61

---- Random Forest evaluation ----


In [32]:
logistic_regression()

---- Logistic Regression evaluation ----
accuracy of the model =  86.89 %
              precision    recall  f1-score   support

           0       0.94      0.70      0.80        23
           1       0.84      0.97      0.90        38

    accuracy                           0.87        61
   macro avg       0.89      0.83      0.85        61
weighted avg       0.88      0.87      0.86        61

---- Logistic Regression evaluation ----


In [33]:
knn()

---- KNN evaluation ----
accuracy of the model =  75.41 %
              precision    recall  f1-score   support

           0       0.72      0.57      0.63        23
           1       0.77      0.87      0.81        38

    accuracy                           0.75        61
   macro avg       0.74      0.72      0.72        61
weighted avg       0.75      0.75      0.75        61

---- KNN evaluation ----
