# Medical data classification using MLP

# Load dataset

In [35]:
from scipy.io import loadmat
path="./ML/dataset/"
data = loadmat(path+'pima_dataset.mat')
X=data['norm_data']
y=data['target']
X.shape

(768, 8)

In [36]:
import numpy as np
import math
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [37]:
from sklearn.neural_network import MLPClassifier

In [38]:
from sklearn.metrics import *

# Classification performance measure function

In [39]:
def classification_performance(conf_matrix):
    accureacy=(conf_matrix[0][0]+conf_matrix[1][1])/(conf_matrix[0][0]+conf_matrix[0][1]+conf_matrix[1][0]+conf_matrix[1][1])
    recall=(conf_matrix[0][0])/(conf_matrix[0][0]+conf_matrix[1][0])
    specificity=(conf_matrix[1][1])/(conf_matrix[0][1]+conf_matrix[1][1])
    precision=(conf_matrix[0][0])/(conf_matrix[0][0]+conf_matrix[0][1])
    F1_score=(2*precision*recall)/(precision+recall)
    GM=math.sqrt(recall*specificity)
    FPR=1-specificity
    return accureacy,recall,specificity,precision,F1_score,GM,FPR

In [47]:
kf = KFold(n_splits=10)
iter=0;
for train, test in kf.split(X):
    
    print("Fold:%d" % (iter))
    #print("%s %s" % (train, test))
    X_train=X[train]
    y_train=y[train]
    X_test=X[test]
    y_test=y[test]
    hidden_nodes=X_train.shape[1]
    clf = MLPClassifier(solver='sgd', alpha=1e-5,hidden_layer_sizes=(hidden_nodes, 2), random_state=1, max_iter=200)
    clf.fit(X_train, y_train)
    y_pred=clf.predict(X_test)
    conf_matrix=confusion_matrix(y_test, y_pred)
    acc, recall, spec, precision, F1, GM, FPR=classification_performance(conf_matrix)
    print("Accuracy: %0.2f, Recall=%0.2f, specificity=%0.2f, precision=%0.2f, F1=%0.2f, GM=%0.2f, FPR=%0.2f" % (acc, recall, spec, precision, F1, GM, FPR))
    iter=iter+1

Fold:0


  y = column_or_1d(y, warn=True)
  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.58, Recall=0.58, specificity=nan, precision=1.00, F1=0.74, GM=nan, FPR=nan
Fold:1


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.71, Recall=0.71, specificity=nan, precision=1.00, F1=0.83, GM=nan, FPR=nan
Fold:2


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.56, Recall=0.56, specificity=nan, precision=1.00, F1=0.72, GM=nan, FPR=nan
Fold:3


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.61, Recall=0.61, specificity=nan, precision=1.00, F1=0.76, GM=nan, FPR=nan
Fold:4


  y = column_or_1d(y, warn=True)


Accuracy: 0.64, Recall=0.64, specificity=0.00, precision=0.98, F1=0.78, GM=0.00, FPR=1.00
Fold:5


  y = column_or_1d(y, warn=True)


Accuracy: 0.60, Recall=0.61, specificity=0.00, precision=0.98, F1=0.75, GM=0.00, FPR=1.00
Fold:6


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.82, Recall=0.82, specificity=nan, precision=1.00, F1=0.90, GM=nan, FPR=nan
Fold:7


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.68, Recall=0.68, specificity=nan, precision=1.00, F1=0.81, GM=nan, FPR=nan
Fold:8


  after removing the cwd from sys.path.
  y = column_or_1d(y, warn=True)


Accuracy: 0.68, Recall=0.68, specificity=nan, precision=1.00, F1=0.81, GM=nan, FPR=nan
Fold:9
Accuracy: 0.61, Recall=0.61, specificity=nan, precision=1.00, F1=0.75, GM=nan, FPR=nan


  after removing the cwd from sys.path.


In [48]:
conf_matrix

array([[46,  0],
       [30,  0]], dtype=int64)

In [42]:
y_train.shape

(219, 1)

In [45]:
from sklearn import svm
clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)

  y = column_or_1d(y, warn=True)


1.0

In [47]:
clf = MLPClassifier(solver='sgd', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(5, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='sgd',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [49]:
clf.predict(X_test)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint8)