In [2]:
# Pandas
import pandas as pd

# Sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix

In [3]:
# Putting data into DataFrame
df = pd.read_csv('heart.csv', sep=',')

# Convert categorical values into numerical categorical values (Sex, ChestPainType, RestingECG, ExerciseAngina, St_Slope)
df['Sex'] = pd.factorize(df['Sex'])[0]
df['ChestPainType'] = pd.factorize(df['ChestPainType'])[0]
df['RestingECG'] = pd.factorize(df['RestingECG'])[0]
df['ExerciseAngina'] = pd.factorize(df['ExerciseAngina'])[0]
df['ST_Slope'] = pd.factorize(df['ST_Slope'])[0]

# Normalized Data
df_norm = pd.DataFrame(preprocessing.normalize(df), columns = df.columns)
df_norm["HeartDisease"] = df["HeartDisease"]

In [4]:
# Function to run model and display information

def run_model(model, data, norm):
    
    X_train, X_test, y_train, y_test = train_test_split(data.drop("HeartDisease", 1), data.HeartDisease, test_size = 0.2)
    X_train = X_train.astype('float')
    X_test = X_test.astype('float')
    y_train = y_train.astype('float')
    y_test = y_test.astype('float')
    
    print("- Model: ", model, "\n")
    
    if norm == True:
        print("- Data: Normalized", "\n")
    else:
        print("- Data: Not normalized", "\n")
    
    model.fit(X_train, y_train)
    score = model.score(X_test, y_test)
    print("- Accuracy score: ", score, "\n")

    predictions = model.predict(X_test)
    print("- Classification Report: \n\n", classification_report(y_test, predictions))

    matrix = confusion_matrix(y_test, predictions)
    print("- Confusion Matrix: \n")
    print("\tTrue Positive: ", matrix[1][0], "\tTrue Negative: ", matrix[0][0])
    print("\tFalse Positive: ", matrix[0][1], "\tFalse Negative: ", matrix[1][0])
    
    print("\n-------------------------------------------------------\n")
    
    return score

In [5]:
# Running models: Logistic Regression & K-Neighbor & Support Vector Machine

run_model(LogisticRegression(max_iter = 40000), df, False)
run_model(LogisticRegression(max_iter = 40000), df_norm, True)

run_model(KNeighborsClassifier(n_neighbors = 9), df, False)
run_model(KNeighborsClassifier(n_neighbors = 9), df_norm, True)

run_model(SVC(max_iter = 40000), df, False)
temp = run_model(SVC(max_iter = 40000), df_norm, True) # To not show the returned score for visual cleanliness

- Model:  LogisticRegression(max_iter=40000) 

- Data: Not normalized 

- Accuracy score:  0.8641304347826086 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.85      0.86      0.86        86
         1.0       0.88      0.87      0.87        98

    accuracy                           0.86       184
   macro avg       0.86      0.86      0.86       184
weighted avg       0.86      0.86      0.86       184

- Confusion Matrix: 

	True Positive:  13 	True Negative:  74
	False Positive:  12 	False Negative:  13

-------------------------------------------------------

- Model:  LogisticRegression(max_iter=40000) 

- Data: Normalized 

- Accuracy score:  0.7119565217391305 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.65      0.70      0.67        79
         1.0       0.76      0.72      0.74       105

    accuracy                           0.71       184
   macro avg     

In [15]:
# Function to calculate average accuracy score

def avg_accuracy (model, data, norm, iteration):
    
    score = 0
    for i in range(iteration):
        score = score + run_model(model, data, norm)
    
    return score / iteration

def print_avg_accuracy (model, norm, iteration, score):
    print("- Model: ", model, "\n")
    
    if norm == True:
        print("- Data: Normalized", "\n")
    else:
        print("- Data: Not normalized", "\n")
    
    print("- Iterations: ", iteration, '\n')
    
    print("- Average accuracy score: ", score)
    
    print("\n-------------------------------------------------------\n")

lr_unnorm_score = avg_accuracy(LogisticRegression(max_iter = 40000), df, False, 20)
lr_norm_score = avg_accuracy(LogisticRegression(max_iter = 40000), df, True, 20)
knc_unnorm_score = avg_accuracy(KNeighborsClassifier(n_neighbors = 9), df, False, 20)
knc_norm_score = avg_accuracy(KNeighborsClassifier(n_neighbors = 9), df, True, 20)
svc_unnorm_score = avg_accuracy(SVC(max_iter = 40000), df, False, 20)
svc_norm_score = avg_accuracy(SVC(max_iter = 40000), df, True, 20)

- Model:  LogisticRegression(max_iter=40000) 

- Data: Not normalized 

- Accuracy score:  0.842391304347826 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.84      0.78      0.81        78
         1.0       0.85      0.89      0.87       106

    accuracy                           0.84       184
   macro avg       0.84      0.83      0.84       184
weighted avg       0.84      0.84      0.84       184

- Confusion Matrix: 

	True Positive:  12 	True Negative:  61
	False Positive:  17 	False Negative:  12

-------------------------------------------------------

- Model:  LogisticRegression(max_iter=40000) 

- Data: Not normalized 

- Accuracy score:  0.8586956521739131 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.88      0.80      0.84        85
         1.0       0.84      0.91      0.87        99

    accuracy                           0.86       184
   macro avg  

- Accuracy score:  0.8858695652173914 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.89      0.86      0.88        86
         1.0       0.88      0.91      0.89        98

    accuracy                           0.89       184
   macro avg       0.89      0.88      0.89       184
weighted avg       0.89      0.89      0.89       184

- Confusion Matrix: 

	True Positive:  9 	True Negative:  74
	False Positive:  12 	False Negative:  9

-------------------------------------------------------

- Model:  LogisticRegression(max_iter=40000) 

- Data: Not normalized 

- Accuracy score:  0.8641304347826086 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.88      0.83      0.85        86
         1.0       0.85      0.90      0.88        98

    accuracy                           0.86       184
   macro avg       0.87      0.86      0.86       184
weighted avg       0.86      0.86

- Accuracy score:  0.8532608695652174 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.81      0.85      0.83        78
         1.0       0.88      0.86      0.87       106

    accuracy                           0.85       184
   macro avg       0.85      0.85      0.85       184
weighted avg       0.85      0.85      0.85       184

- Confusion Matrix: 

	True Positive:  15 	True Negative:  66
	False Positive:  12 	False Negative:  15

-------------------------------------------------------

- Model:  LogisticRegression(max_iter=40000) 

- Data: Normalized 

- Accuracy score:  0.8532608695652174 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.87      0.84      0.85        92
         1.0       0.84      0.87      0.86        92

    accuracy                           0.85       184
   macro avg       0.85      0.85      0.85       184
weighted avg       0.85      0.85  

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Not normalized 

- Accuracy score:  0.7663043478260869 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.76      0.74      0.75        87
         1.0       0.77      0.79      0.78        97

    accuracy                           0.77       184
   macro avg       0.77      0.76      0.77       184
weighted avg       0.77      0.77      0.77       184

- Confusion Matrix: 

	True Positive:  20 	True Negative:  64
	False Positive:  23 	False Negative:  20

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Not normalized 

- Accuracy score:  0.6358695652173914 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.55      0.60      0.57        75
         1.0       0.71      0.66      0.68       109

    accuracy                           0.64       184
   macro av

- Confusion Matrix: 

	True Positive:  33 	True Negative:  50
	False Positive:  30 	False Negative:  33

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Normalized 

- Accuracy score:  0.7119565217391305 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.70      0.70      0.70        89
         1.0       0.72      0.73      0.72        95

    accuracy                           0.71       184
   macro avg       0.71      0.71      0.71       184
weighted avg       0.71      0.71      0.71       184

- Confusion Matrix: 

	True Positive:  26 	True Negative:  62
	False Positive:  27 	False Negative:  26

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Normalized 

- Accuracy score:  0.717391304347826 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.

- Accuracy score:  0.7771739130434783 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.78      0.70      0.74        83
         1.0       0.77      0.84      0.81       101

    accuracy                           0.78       184
   macro avg       0.78      0.77      0.77       184
weighted avg       0.78      0.78      0.78       184

- Confusion Matrix: 

	True Positive:  16 	True Negative:  58
	False Positive:  25 	False Negative:  16

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Normalized 

- Accuracy score:  0.7391304347826086 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.71      0.73      0.72        85
         1.0       0.76      0.75      0.76        99

    accuracy                           0.74       184
   macro avg       0.74      0.74      0.74       184
weighted avg       0.74      0.74 

- Accuracy score:  0.7336956521739131 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.67      0.68      0.67        74
         1.0       0.78      0.77      0.78       110

    accuracy                           0.73       184
   macro avg       0.72      0.72      0.72       184
weighted avg       0.73      0.73      0.73       184

- Confusion Matrix: 

	True Positive:  25 	True Negative:  50
	False Positive:  24 	False Negative:  25

-------------------------------------------------------

- Model:  SVC(max_iter=40000) 

- Data: Not normalized 

- Accuracy score:  0.6847826086956522 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.61      0.63      0.62        75
         1.0       0.74      0.72      0.73       109

    accuracy                           0.68       184
   macro avg       0.67      0.68      0.67       184
weighted avg       0.69      0.68      0.69   

- Accuracy score:  0.7282608695652174 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.72      0.71      0.72        89
         1.0       0.73      0.75      0.74        95

    accuracy                           0.73       184
   macro avg       0.73      0.73      0.73       184
weighted avg       0.73      0.73      0.73       184

- Confusion Matrix: 

	True Positive:  24 	True Negative:  63
	False Positive:  26 	False Negative:  24

-------------------------------------------------------

- Model:  SVC(max_iter=40000) 

- Data: Normalized 

- Accuracy score:  0.7445652173913043 

- Classification Report: 

               precision    recall  f1-score   support

         0.0       0.72      0.71      0.71        82
         1.0       0.77      0.77      0.77       102

    accuracy                           0.74       184
   macro avg       0.74      0.74      0.74       184
weighted avg       0.74      0.74      0.74       

In [16]:
print_avg_accuracy(LogisticRegression(max_iter = 40000), False, 20, lr_unnorm_score)
print_avg_accuracy(LogisticRegression(max_iter = 40000), True, 20, lr_norm_score)
print_avg_accuracy(KNeighborsClassifier(n_neighbors = 9), False, 20, knc_unnorm_score)
print_avg_accuracy(KNeighborsClassifier(n_neighbors = 9), True, 20, knc_norm_score)
print_avg_accuracy(SVC(max_iter = 40000), False, 20, svc_unnorm_score)
print_avg_accuracy(SVC(max_iter = 40000), True, 20, svc_norm_score)

- Model:  LogisticRegression(max_iter=40000) 

- Data: Not normalized 

- Iterations:  20 

- Average accuracy score:  0.8404891304347828

-------------------------------------------------------

- Model:  LogisticRegression(max_iter=40000) 

- Data: Normalized 

- Iterations:  20 

- Average accuracy score:  0.8521739130434783

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Not normalized 

- Iterations:  20 

- Average accuracy score:  0.7054347826086957

-------------------------------------------------------

- Model:  KNeighborsClassifier(n_neighbors=9) 

- Data: Normalized 

- Iterations:  20 

- Average accuracy score:  0.7016304347826088

-------------------------------------------------------

- Model:  SVC(max_iter=40000) 

- Data: Not normalized 

- Iterations:  20 

- Average accuracy score:  0.7304347826086959

-------------------------------------------------------

- Model:  SVC(max_iter=40000) 

- Data: N