# Classifier training

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import scipy.stats as sm
import sklearn

# Loading the data

In [4]:
names=['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
df = pd.read_csv('data/heart.csv')

df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
y = df.iloc[:, -1].values
X = df.iloc[:, :-1].values

## Split data
Split the data into training and test sets (`80-20` split)

In [36]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
print("X_train:", X_train.shape, "X_test:", X_test.shape)
print("y_train:", y_train.shape, "y_test:", y_test.shape)

X_train: (242, 13) X_test: (61, 13)
y_train: (242,) y_test: (61,)


## Scale data

In [37]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Training the classifiers
## Neural Network

In [54]:
import tensorflow as tf
from tensorflow import keras

classifier = keras.Sequential()

# adding the input layer (input_dim) and the first hidden layer
classifier.add(keras.layers.Dense(6, bias_initializer = 'uniform', activation = 'relu', input_dim = 13))

# adding the second hidden layer
classifier.add(keras.layers.Dense(13, activation = 'relu'))

# adding the output layer which contains 1 neuron 
# (this is a binary classification problem)
classifier.add(keras.layers.Dense(1, activation = 'sigmoid'))

In [6]:
# compiling the neural network
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# fitting the model 
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

NameError: name 'classifier' is not defined

In [87]:
y_pred = classifier.predict(X_test)
y_pred = y_pred.flatten().round()

print(y_test)
print(y_pred)

[0 1 0 0 1 1 0 1 1 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 1 0 0 0 1 1 1 1 1 0 1 0 1
 1 1 0 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 0]
[0 0 0 1 1 1 0 1 1 0 0 0 1 1 0 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 1 1 1
 0 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0]


In [57]:
y_actu = pd.Series(y_test, name='Actual')
y_pred = pd.Series(y_pred, name='Predicted')

df_confusion = pd.crosstab(y_actu, y_pred)
print(df_confusion)

Predicted  0.0  1.0
Actual             
0           22    2
1            6   31


In [58]:
accuracy = 100 * np.sum(y_test == y_pred) / len(y_test)
print("Test accuracy:", accuracy)

y_train_pred = classifier.predict(X_train).flatten().round()
accuracy_train = 100 * np.sum(y_train == y_train_pred) / len(y_train)
print("Train accuracy:", accuracy_train)

Test accuracy: 86.88524590163935
Train accuracy: 91.32231404958678


## Decision Tree classifier

In [82]:
def get_accuracy():
    print("Sklearn accuracy:", sklearn.metrics.accuracy_score(y_test, y_pred))
    print("\nConfusion matrix (predicted vs actual)")
    print(pd.DataFrame(np.c_[sklearn.metrics.confusion_matrix(y_test, y_pred)]))  

In [84]:
from sklearn.tree import DecisionTreeClassifier  

# Create a decision tree Classifier
classifier = DecisionTreeClassifier()  

# Fit the model
classifier.fit(X_train, y_train)  

# Predict the response for test dataset
y_pred = classifier.predict(X_test)  

# Get accuracy results
get_accuracy()

Sklearn accuracy: 0.7704918032786885

Confusion matrix (predicted vs actual)
    0   1
0  18   7
1   7  29


## Support Vector Machine

In [85]:
from sklearn.svm import SVC

# Create an SVM Classifier
classifier = SVC(kernel='linear') # Linear Kernel

# Train the model and predict against the validation set
y_pred = predict(classifier)  

# Get accuracy results
get_accuracy()

Sklearn accuracy: 0.8360655737704918

Confusion matrix (predicted vs actual)
    0   1
0  19   6
1   4  32


## Logistic regression

In [86]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')

# Fit the model
classifier.fit(X_train, y_train)  

# Predict the response for test dataset
y_pred = classifier.predict(X_test)  

# Get accuracy results
get_accuracy()

Sklearn accuracy: 0.8360655737704918

Confusion matrix (predicted vs actual)
    0   1
0  19   6
1   4  32
