In [36]:
#Import relevant libraries for use within the program
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split # Import train_test_split function from scikit learn
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [37]:
# Configure printing to 3 decimal places:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

### Loading the data

In [38]:
# Load the dataset from the csv file
path = './heart.csv'
dataframe = pd.read_csv(path) #Dataframe having the csv file of heart.csv dataset

In [39]:
# Print out the first 10 records to ensure the data loaded correctly
dataframe.head(10)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
5,58,0,0,100,248,0,0,122,0,1.0,1,0,2,1
6,58,1,0,114,318,0,2,140,0,4.4,0,3,1,0
7,55,1,0,160,289,0,0,145,1,0.8,1,1,3,0
8,46,1,0,120,249,0,0,144,0,0.8,2,0,3,0
9,54,1,0,122,286,0,0,116,1,3.2,1,2,2,0


In [40]:
dataframe.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434,0.696,0.942,131.612,246.0,0.149,0.53,149.114,0.337,1.072,1.385,0.754,2.324,0.513
std,9.072,0.46,1.03,17.517,51.593,0.357,0.528,23.006,0.473,1.175,0.618,1.031,0.621,0.5
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,211.0,0.0,0.0,132.0,0.0,0.0,1.0,0.0,2.0,0.0
50%,56.0,1.0,1.0,130.0,240.0,0.0,1.0,152.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,275.0,0.0,1.0,166.0,1.0,1.8,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


## Removing outliers

In [41]:
def rem_outlier(dataframe, feature): # Defining a function that accepts a dataset and one of it's properties to remove outliers from that property (Age, cholesterol and so on)
#     Set the upper and lower bounds to +/- 3 standard devations
    lower_bound = dataframe[feature].mean() - 3*dataframe[feature].std()
    upper_bound = dataframe[feature].mean() + 3*dataframe[feature].std()
    
#     Remove all datapoints above and below 3 standard deviations
    dataframe.loc[dataframe[feature] < lower_bound , feature] = lower_bound
    dataframe.loc[dataframe[feature] > upper_bound, feature] = upper_bound

def clean(df):
#     Remove outliers from properties inside for loop
    for var in ['trestbps', 'chol', 'thalach', 'age']:
        rem_outlier(df, var)

## Training (Radial basis function)

In [42]:
# Get the inputs (x values) from the dataframe
x = dataframe.drop(['target'], axis=1).values

# Get the latent variable (y value) from the dataframe
y = dataframe['target'].values.copy()

In [43]:
# Split data for supervised learning
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state = 0)

In [44]:
# Initialise the SVM
svc = SVC()
# Train the algorithm on training data-
svc.fit(X_train, y_train)

SVC()

## Prediction test and score

In [45]:
# Get the predictions based on the testing data
predictions = svc.predict(X_test)

In [46]:
# Print the classification report, accuracy, and confusion martix
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))

              precision    recall  f1-score   support

           0       0.76      0.70      0.73       123
           1       0.74      0.80      0.77       134

    accuracy                           0.75       257
   macro avg       0.75      0.75      0.75       257
weighted avg       0.75      0.75      0.75       257

0.7509727626459144
[[ 86  37]
 [ 27 107]]


## Training (linear)

In [47]:
# Initialise the SVM
svc = SVC(kernel='linear')
# Train the algorithm on training data-
svc.fit(X_train, y_train)

SVC(kernel='linear')

## Prediction test and score

In [48]:
# Get the predictions based on the testing data
predictions = svc.predict(X_test)

In [49]:
# Print the classification report, accuracy, and confusion martix
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))

              precision    recall  f1-score   support

           0       0.89      0.79      0.84       123
           1       0.82      0.91      0.87       134

    accuracy                           0.85       257
   macro avg       0.86      0.85      0.85       257
weighted avg       0.86      0.85      0.85       257

0.8521400778210116
[[ 97  26]
 [ 12 122]]


## Training (5th degree polynomial)

In [79]:
# Initialise the SVM
svc = SVC(kernel='poly', degree=5)
# Train the algorithm on training data-
svc.fit(X_train, y_train)

SVC(degree=5, kernel='poly')

## Prediction test and score

In [80]:
# Get the predictions based on the testing data
predictions = svc.predict(X_test)

In [81]:
# Print the classification report, accuracy, and confusion martix
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))

              precision    recall  f1-score   support

           0       0.73      0.77      0.75       123
           1       0.78      0.74      0.76       134

    accuracy                           0.75       257
   macro avg       0.76      0.76      0.75       257
weighted avg       0.76      0.75      0.75       257

0.754863813229572
[[95 28]
 [35 99]]


## Training (sigmoid)

In [82]:
# Initialise the SVM
svc = SVC(kernel='sigmoid')
# Train the algorithm on training data-
svc.fit(X_train, y_train)

SVC(kernel='sigmoid')

## Prediction test and score

In [83]:
# Get the predictions based on the testing data
predictions = svc.predict(X_test)

In [84]:
# Print the classification report, accuracy, and confusion martix
print(classification_report(y_test, predictions))
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))

              precision    recall  f1-score   support

           0       0.54      0.50      0.52       123
           1       0.57      0.61      0.59       134

    accuracy                           0.56       257
   macro avg       0.55      0.55      0.55       257
weighted avg       0.56      0.56      0.56       257

0.556420233463035
[[61 62]
 [52 82]]
