## Neural Networks with TensorFlow -- Binary Classification for Breast Cancer

#### Import libraries and breast cancer dataset

In [37]:
import numpy as np
import pandas as pd

import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import metrics

import os
import sys
import shutil

from sklearn.datasets import load_breast_cancer

#### Load breast cancer dataset

In [38]:
data = load_breast_cancer()
target = 1 - data.target

# MUST reshape target (y) to be list of lists for TensorFlow 
target = data.target.reshape(-1,1)

In [39]:
list(data.target_names)

['malignant', 'benign']

In [40]:
list(data.feature_names)

['mean radius',
 'mean texture',
 'mean perimeter',
 'mean area',
 'mean smoothness',
 'mean compactness',
 'mean concavity',
 'mean concave points',
 'mean symmetry',
 'mean fractal dimension',
 'radius error',
 'texture error',
 'perimeter error',
 'area error',
 'smoothness error',
 'compactness error',
 'concavity error',
 'concave points error',
 'symmetry error',
 'fractal dimension error',
 'worst radius',
 'worst texture',
 'worst perimeter',
 'worst area',
 'worst smoothness',
 'worst compactness',
 'worst concavity',
 'worst concave points',
 'worst symmetry',
 'worst fractal dimension']

#### Split the dataset into train and test sets

In [26]:
# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data.data, target)

#### Apply standard scalar

In [29]:
ss = StandardScaler()
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

#### Examine features

In [30]:
# Examine features used in training
X_train[:5]

array([[-0.10866029, -0.43633778, -0.1629593 , -0.22848008,  0.2200272 ,
        -0.49442708, -0.55323932, -0.45239241, -0.29839028, -0.29942571,
        -0.62098286, -0.52150393, -0.70008294, -0.4536807 , -1.31276428,
        -0.97038837, -0.48289931, -0.44842986, -0.96091682, -0.87672706,
        -0.25585074, -0.38517446, -0.3303219 , -0.32741978, -0.55032765,
        -0.75138443, -0.4596545 , -0.36165306, -0.60956607, -0.80740465],
       [ 2.33955594,  0.04206598,  2.53096201,  2.47641853,  2.60278348,
         3.29174724,  4.25358297,  3.49660385,  2.73436154,  1.04935334,
         3.17266736,  0.53725695,  3.88663289,  3.15873166, -0.19578116,
         3.28419994,  2.21539182,  1.98354227,  1.26469637,  0.42164823,
         2.50675536,  0.34733023,  2.94639079,  2.57682546,  1.65068655,
         2.71659888,  3.18346447,  2.66911501,  1.87899686,  0.72950295],
       [-0.08580646, -0.8723215 , -0.13769343, -0.18579696, -0.59558092,
        -0.73227477, -0.99402033, -1.02005735, -1

In [33]:
# Examine features used in testing
X_test[:5]

array([[-1.52731161, -0.62487128, -1.50867737, -1.19980949,  0.53676821,
        -0.59597787, -1.12715058, -1.28045002,  0.63278231,  1.13303154,
         0.44774997,  1.48144251,  0.16634476, -0.25511412,  2.98266775,
        -0.34846636, -1.00941026, -1.89086571, -0.25437295,  1.04958436,
        -1.24044027, -0.50453349, -1.26800126, -0.96866185,  0.59402435,
        -0.7734774 , -1.26645761, -1.73558713, -0.72776067,  0.22752253],
       [-0.66857906, -1.13155507, -0.63721205, -0.64694771, -1.11244467,
        -0.16731452, -0.27839791, -0.5889171 , -1.15595238,  0.6508182 ,
        -0.81888188, -1.05990187, -0.81845329, -0.63749387, -0.03626094,
         0.70848668,  0.23616954,  0.49626953,  0.22680779,  1.08136924,
        -0.77178387, -1.35476215, -0.75916928, -0.69654613, -1.10277345,
         0.13663041, -0.02059059, -0.13717976, -0.47032312,  0.77839715],
       [-0.52288591, -0.36563772, -0.55602992, -0.54802669, -0.71291907,
        -0.7397615 , -0.63690316, -0.66962651,  0

#### Create model

In [35]:
tf.reset_default_graph()

# shape None == allow dynamic number of rows; X_train.shape[1] == number of features
X = tf.placeholder(dtype=tf.float32, shape=(None, X_train.shape[1]), name='inputs')
y = tf.placeholder(dtype=tf.float32, shape=(None), name='targets')

# First hidden layer
# X == features coming in, 30 == number of features == neurons
h1 = tf.layers.dense(X, 30, name='input_layer_1', activation=tf.nn.relu)

# Second hidden layer
# h1 == features coming in, 26 == number of neurons == arbitrary
h2 = tf.layers.dense(h1, 26, name='input_layer_2', activation=tf.nn.relu)

# Last/output layar always has 1 neuron for binary classification problems
y_hat = tf.layers.dense(h2, 1, name='y_hat', activation=tf.nn.sigmoid)
final_output = tf.identity(y_hat, name='classifications') 

loss = tf.losses.log_loss(y, y_hat)

# Define training operation
training_op = tf.train.AdamOptimizer(.01).minimize(loss)

#### Train model and save/export it using TensorFlow Serving 

In [36]:
# Initialize
init = tf.global_variables_initializer()

# If model already exists, delete it
try:
    if os.path.isfile('./models/saved_model.pb'):
        shutil.rmtree('./models')
except OSError as e:
    print('OSError: ', e.strerror)

with tf.Session() as sess:
    init.run()
    # Train model with 1000 epochs
    for epoch in range(1000):
        sess.run(training_op, feed_dict={X: X_train, y: y_train})
        if (epoch % 100 == 0):
            training_loss = sess.run(loss, feed_dict={X: X_train, y: y_train})
            test_loss = sess.run(loss, feed_dict={X: X_test, y: y_test})
            print('epoch:',epoch,' | training loss:', training_loss, ' | test loss:', test_loss)

    # Get the tensors needed for serving
    graph = tf.get_default_graph()
    inputs = graph.get_tensor_by_name('inputs:0')
    classifications = graph.get_tensor_by_name('classifications:0')
    
    # Create tensors info needed for serving
    model_input = tf.saved_model.utils.build_tensor_info(inputs)
    model_output = tf.saved_model.utils.build_tensor_info(classifications)

    # Build signature definition needed for serving
    signature_definition = tf.saved_model.signature_def_utils.build_signature_def(
        inputs={'inputs': model_input},
        outputs={'outputs': model_output},
        method_name= tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME)

    builder = tf.saved_model.builder.SavedModelBuilder('./models')

    builder.add_meta_graph_and_variables(
        sess, [tf.saved_model.tag_constants.SERVING],
        signature_def_map={
            tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                signature_definition
        })
    
    # Save the model so it can be served with a TF model server
    builder.save()
    
    # Use model to classify test data
    classifications_on_test_data = sess.run(y_hat, feed_dict={X: X_test})
    
    # Check model accuracy
    classes = (classifications_on_test_data > .5).astype(int)
    
    print('\nAccuracy Score:',metrics.accuracy_score(y_test, classes))

epoch: 0  | training loss: 0.61675286  | test loss: 0.6248219
epoch: 100  | training loss: 0.00030252174  | test loss: 0.24269032
epoch: 200  | training loss: 6.68025e-05  | test loss: 0.29234645
epoch: 300  | training loss: 3.0104544e-05  | test loss: 0.3077362
epoch: 400  | training loss: 1.7146984e-05  | test loss: 0.3185355
epoch: 500  | training loss: 1.1088368e-05  | test loss: 0.32272467
epoch: 600  | training loss: 7.753676e-06  | test loss: 0.32627067
epoch: 700  | training loss: 5.713634e-06  | test loss: 0.32928047
epoch: 800  | training loss: 4.371246e-06  | test loss: 0.3319225
epoch: 900  | training loss: 3.4393101e-06  | test loss: 0.33427384
INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: ./models/saved_model.pb

Accuracy Score: 0.965034965034965
