In [1]:
import tensorflow as tf

import matplotlib.pyplot as plt
import pandas as pd

from sklearn.neural_network import MLPClassifier
import mglearn

In [2]:
#Reading the dataset and checking what it contain (first 5 rows)
dataset_path = 'heart.csv'

dataset = pd.read_csv(dataset_path)
dataset.head()



Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
#...and its column names
column_names  = dataset.keys()
column_names

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'],
      dtype='object')

In [4]:
#here I split train and test sets, train is 80% and test is 20%
train_dataset = dataset.sample(frac = 0.8, random_state = 0)
test_dataset = dataset.drop(train_dataset.index)

In [5]:
#extracting the labels from the dataset (labels are 1 - healthy and 0 - unhealthy heart)
train_labels = train_dataset.pop('target')
test_labels = test_dataset.pop('target')



In [6]:
#checking out first 5 rows of train_labels
train_labels.head()

225    0
152    1
228    0
201    0
52     1
Name: target, dtype: int64

In [7]:
#taking look at main characteristics of dataset
train_dataset.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
count,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0,242.0
mean,54.128099,0.68595,0.96281,131.396694,247.181818,0.144628,0.528926,149.797521,0.330579,0.991322,1.413223,0.68595,2.322314
std,8.904992,0.465098,1.031989,17.352331,51.235037,0.352454,0.516522,22.935873,0.471396,1.162148,0.606595,0.985649,0.614012
min,34.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0
25%,48.0,0.0,0.0,120.0,212.0,0.0,0.0,134.5,0.0,0.0,1.0,0.0,2.0
50%,54.5,1.0,1.0,130.0,241.5,0.0,1.0,153.0,0.0,0.6,1.0,0.0,2.0
75%,60.75,1.0,2.0,140.0,274.0,0.0,1.0,167.75,1.0,1.6,2.0,1.0,3.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,195.0,1.0,6.2,2.0,4.0,3.0


In [8]:
#building and training the model 
mlp = MLPClassifier( random_state = 0, hidden_layer_sizes = [100, 100])
mlp.fit(train_dataset, train_labels)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=[100, 100], learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=0, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [9]:
#accuracies obtained on training and test sets
print('Accuracy on training set: {:.2f}'.format(mlp.score(train_dataset, train_labels)))
print('Accuracy on test set: {:.2f}'.format(mlp.score(test_dataset, test_labels)))


Accuracy on training set: 0.81
Accuracy on test set: 0.77
