In [31]:
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn import svm
from sklearn.model_selection import *
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score

In [32]:
#read in dataset
cardio_data = pd.read_csv(r'cardio_cleaned.csv').dropna()
cardio_data_labels = list(cardio_data)

X = cardio_data[cardio_data_labels[0:15]]
y = cardio_data['cardio']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #80% training and 20% test

In [33]:
#evaluating model
def evaluate(C, X_test, y_test):
    y_pred = C.predict(X_test)
    
    k_fold = KFold(10, shuffle = False, random_state = 0) #10-fold cross-validation
    cv_scores = cross_val_score(C, X_test, y_test.values.ravel(), cv = k_fold)

    print("Cross-validation score means: ", cv_scores.mean())
    
    print("Accuracy: ", accuracy_score(y_test, y_pred))
    
    print(classification_report(y_test, y_pred))

In [15]:
#training SVM SVC w/rbf kernel
C = svm.SVC(gamma = 'scale') #create classifier
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Accuracy:  0.6026017933264736
              precision    recall  f1-score   support

         0.0       0.58      0.78      0.66      6884
         1.0       0.65      0.42      0.51      6722

   micro avg       0.60      0.60      0.60     13606
   macro avg       0.62      0.60      0.59     13606
weighted avg       0.61      0.60      0.59     13606



In [16]:
#training random forest classifier
C = RandomForestClassifier(n_estimators = 100)
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Accuracy:  0.5587240923122152
              precision    recall  f1-score   support

         0.0       0.56      0.60      0.58      6884
         1.0       0.56      0.52      0.54      6722

   micro avg       0.56      0.56      0.56     13606
   macro avg       0.56      0.56      0.56     13606
weighted avg       0.56      0.56      0.56     13606



In [17]:
#training Gaussian Naive Bayes classifier
C = GaussianNB()
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Accuracy:  0.6003968837277671
              precision    recall  f1-score   support

         0.0       0.58      0.79      0.67      6884
         1.0       0.66      0.40      0.50      6722

   micro avg       0.60      0.60      0.60     13606
   macro avg       0.62      0.60      0.58     13606
weighted avg       0.62      0.60      0.58     13606



In [46]:
#training supervised MLP
C = MLPClassifier(solver = 'lbfgs', hidden_layer_sizes = (100,))
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Cross-validation score means:  0.5948834118511475
Accuracy:  0.6002498897545201
              precision    recall  f1-score   support

         0.0       0.58      0.78      0.66      6884
         1.0       0.65      0.42      0.51      6722

   micro avg       0.60      0.60      0.60     13606
   macro avg       0.61      0.60      0.59     13606
weighted avg       0.61      0.60      0.59     13606



In [19]:
#training KNN
C = KNeighborsClassifier(n_neighbors = 3)
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Accuracy:  0.5365280023519036
              precision    recall  f1-score   support

         0.0       0.54      0.55      0.54      6884
         1.0       0.53      0.53      0.53      6722

   micro avg       0.54      0.54      0.54     13606
   macro avg       0.54      0.54      0.54     13606
weighted avg       0.54      0.54      0.54     13606



In [20]:
#Training logistic regression model
C = LogisticRegression(solver = 'lbfgs')
C.fit(X_train, y_train) #train classifier

evaluate(C, X_test, y_test)

Accuracy:  0.5998824048214023
              precision    recall  f1-score   support

         0.0       0.57      0.81      0.67      6884
         1.0       0.67      0.38      0.48      6722

   micro avg       0.60      0.60      0.60     13606
   macro avg       0.62      0.60      0.58     13606
weighted avg       0.62      0.60      0.58     13606

