In [398]:
import matplotlib.pyplot as plt
import numpy as np
import csv

from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [399]:
file = open("heart.csv", 'r')
csv_reader = csv.reader(file)

header = next(csv_reader)
#print(header)

X, y = [], []
for features in csv_reader:
  y.append(features.pop(-1))
  X.append(features)

X = np.array(X, dtype=float)
y = np.array(y, dtype=float)

In [400]:
lst_y_pred_logR, lst_y_pred_KNN, lst_y_test = [], [], []
iterations = 10

for _ in range(iterations):
  X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.20,
                                                    shuffle=True,
                                                    stratify=y)
  
  model_logR = LogisticRegression(max_iter=1000, fit_intercept=True)
  model_logR.fit(X_train, y_train);

  model_KNN = KNeighborsClassifier(n_neighbors=5)
  model_KNN.fit(X_train, y_train);

  y_pred_logR = model_logR.predict(X_test)
  y_pred_KNN = model_KNN.predict(X_test)

  lst_y_pred_logR.append(y_pred_logR)
  lst_y_pred_KNN.append(y_pred_KNN)
  lst_y_test.append(y_test)

In [401]:
mse_logR = 0
mse_KNN = 0

rmse_logR = 0
rmse_KNN = 0

accuracy_logR = 0
accuracy_KNN = 0

for i in range(iterations):
  mse_logR += mean_squared_error(lst_y_test[i], lst_y_pred_logR[i])
  mse_KNN += mean_squared_error(lst_y_test[i], lst_y_pred_KNN[i])

  rmse_logR += mean_squared_error(lst_y_test[i], lst_y_pred_logR[i], squared=False)
  rmse_KNN += mean_squared_error(lst_y_test[i], lst_y_pred_KNN[i], squared=False)

  accuracy_logR += accuracy_score(lst_y_test[i], lst_y_pred_logR[i])
  accuracy_KNN += accuracy_score(lst_y_test[i], lst_y_pred_KNN[i])


print("LogR Error with MSE = ", mse_logR/iterations)
print("KNN Error with MSE  = ", mse_KNN/iterations, '\n\n')

print("LogR Error with RMSE = ", rmse_logR/iterations)
print("KNN Error with RMSE  = ", rmse_KNN/iterations, '\n\n')

print("LogR Accuracy = ", accuracy_logR/iterations)
print("KNN Accuracy  = ", accuracy_KNN/iterations, '\n\n')

print("LogR Confusion Matrix (results over iteration 0)\n", confusion_matrix(lst_y_test[0], lst_y_pred_logR[0]), '\n')
print("KNN Confusion Matrix (results over iteration 0)\n", confusion_matrix(lst_y_test[0], lst_y_pred_KNN[0]))

LogR Error with MSE =  0.15901639344262294
KNN Error with MSE  =  0.34590163934426227 


LogR Error with RMSE =  0.394450867610488
KNN Error with RMSE  =  0.585992525469997 


LogR Accuracy =  0.840983606557377
KNN Accuracy  =  0.6540983606557377 


LogR Confusion Matrix (results over iteration 0)
 [[21  7]
 [ 2 31]] 

KNN Confusion Matrix (results over iteration 0)
 [[16 12]
 [ 8 25]]
