In [66]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
from keras.optimizers import SGD

In [67]:
# Read breast cancer csv file
# Preprocessing - removed mising rows 
data = np.loadtxt('wisconsin_breast_cancer_NN.csv', delimiter =",")

In [68]:
x=data[:,0:9]  #0 to 8 columns are the features
#creating feature data set
y=data[:,9]   # 9 th column is the class that needs to be predicted

In [69]:
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

In [70]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=9, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))

In [71]:
# Compile model
optimizer = SGD(lr=0.01, momentum=0.02)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [72]:
# Fit the model
model.fit(x_train, y_train, nb_epoch=150, batch_size=10, verbose=0)

<keras.callbacks.History at 0x7f8e9fccaa10>

In [73]:
scores = model.evaluate(x_train, y_train)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))



In [74]:
# Model accuracy is 97.51% on the training set

In [75]:
y_pred_class=model.predict(x_test) # this return the probabilities of the prediction being 1
proba_cancer=y_pred_class
y_pred_class  = y_pred_class > 0.5      # in the next 2 lines we will convert probability to binary 0 or 1
y_pred_class = y_pred_class.astype(int)


In [76]:
from sklearn import metrics
print metrics.accuracy_score(y_test, y_pred_class)

0.959064327485


In [77]:
print metrics.confusion_matrix(y_test, y_pred_class)

[[102   5]
 [  2  62]]


In [78]:
confusion =metrics.confusion_matrix(y_test, y_pred_class)
TP = confusion[1,1] # true positive 
TN = confusion[0,0] # true negatives
FP = confusion[0,1] # false positives
FN = confusion[1,0] # false negatives

In [79]:
# Let us see the sensitivity of our neural network model
print TP / float(TP+FN)

0.96875


In [80]:
# Let us calculate specificity
print TN / float(TN+FP)

0.953271028037


In [81]:
# precison - when it is predicting cancer how precise is it 
# positive predictive value 
print TP / float(TP+FP)

0.925373134328


In [82]:
# Negative predictive value
print TN / float(TN+ FN)

0.980769230769


In [83]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, proba_cancer)

0.99357476635514019