In [17]:
# Adrian Marinovich
# Springboard - Data Science Career Track 
# Smiles dataset
# Neural networks

In [1]:
import numpy as np
import os
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.datasets import fetch_mldata
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import SGDClassifier

# make output stable
np.random.seed(42)

# setup plots
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

  from ._conv import register_converters as _register_converters


In [2]:
X = np.load('smile/smile_X.npy')
y = np.load('smile/smile_y.npy')

X_train_scaled = np.load('smile/X_train_scaled.npy')
X_test_scaled = np.load('smile/X_test_scaled.npy')
y_train = np.load('smile/y_train.npy')
y_test = np.load('smile/y_test.npy')

In [3]:
# make validation set
X_valid, X_train = X_train_scaled[:100], X_train_scaled[100:]
y_valid, y_train = y_train[:100], y_train[100:]

In [4]:
# Using high-level API tf.learn

feature_cols = [tf.feature_column.numeric_column("X", shape=[64 * 64])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_train}, y=y_train, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpkb52x1ck', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa0027e2588>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpkb52x1ck/model.ckpt.
INFO:ten

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9fe527abe0>

In [5]:
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_test_scaled}, y=y_test, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-19-06:43:06
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpkb52x1ck/model.ckpt-720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-19-06:43:06
INFO:tensorflow:Saving dict for global step 720: accuracy = 0.8817734, average_loss = 0.7981251, global_step = 720, loss = 81.0097
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 720: /tmp/tmpkb52x1ck/model.ckpt-720


In [6]:
eval_results

{'accuracy': 0.8817734,
 'average_loss': 0.7981251,
 'loss': 81.0097,
 'global_step': 720}

In [7]:
y_pred_iter = dnn_clf.predict(input_fn=test_input_fn)
y_pred = list(y_pred_iter)
y_pred[0]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpkb52x1ck/model.ckpt-720
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'logits': array([ 74.191154,  75.94948 , -41.133698, -59.252388, -33.749897,
        -58.961876, -32.31364 , -19.380312, -41.753387, -82.995255],
       dtype=float32),
 'probabilities': array([0.14700042, 0.8529996 , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       dtype=float32),
 'class_ids': array([1]),
 'classes': array([b'1'], dtype=object)}

In [18]:
# plain TensorFlow
n_inputs = 64*64  # set to cropped image
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [19]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [20]:
def neuron_layer(X, n_neurons, name, activation=None):
    with tf.name_scope(name):
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs)
        init = tf.truncated_normal((n_inputs, n_neurons), stddev=stddev)
        W = tf.Variable(init, name="kernel")
        b = tf.Variable(tf.zeros([n_neurons]), name="bias")
        Z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(Z)
        else:
            return Z

In [21]:
with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, name="hidden1",
                           activation=tf.nn.relu)
    hidden2 = neuron_layer(hidden1, n_hidden2, name="hidden2",
                           activation=tf.nn.relu)
    logits = neuron_layer(hidden2, n_outputs, name="outputs")

In [22]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                              logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

In [23]:
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [24]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [25]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [26]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [27]:
n_epochs = 40
batch_size = 50

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "./my_model_final.ckpt")

0 Batch accuracy: 0.9 Validation accuracy: 0.8
1 Batch accuracy: 0.98 Validation accuracy: 0.84
2 Batch accuracy: 0.98 Validation accuracy: 0.84
3 Batch accuracy: 1.0 Validation accuracy: 0.83
4 Batch accuracy: 1.0 Validation accuracy: 0.86
5 Batch accuracy: 1.0 Validation accuracy: 0.87
6 Batch accuracy: 1.0 Validation accuracy: 0.88
7 Batch accuracy: 1.0 Validation accuracy: 0.88
8 Batch accuracy: 1.0 Validation accuracy: 0.87
9 Batch accuracy: 1.0 Validation accuracy: 0.88
10 Batch accuracy: 1.0 Validation accuracy: 0.87
11 Batch accuracy: 1.0 Validation accuracy: 0.88
12 Batch accuracy: 1.0 Validation accuracy: 0.88
13 Batch accuracy: 1.0 Validation accuracy: 0.87
14 Batch accuracy: 1.0 Validation accuracy: 0.87
15 Batch accuracy: 1.0 Validation accuracy: 0.88
16 Batch accuracy: 1.0 Validation accuracy: 0.86
17 Batch accuracy: 1.0 Validation accuracy: 0.87
18 Batch accuracy: 1.0 Validation accuracy: 0.86
19 Batch accuracy: 1.0 Validation accuracy: 0.86
20 Batch accuracy: 1.0 Valida

In [None]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

In [34]:
# https://github.com/ageron/handson-ml/blob/master/tensorflow_graph_in_jupyter.py

from __future__ import absolute_import, division, print_function, unicode_literals

# This module defines the show_graph() function to visualize a TensorFlow graph within Jupyter.

# As far as I can tell, this code was originally written by Alex Mordvintsev at:
# https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb

# The original code only worked on Chrome (because of the use of <link rel="import"...>, but the version below
# uses Polyfill (copied from this StackOverflow answer: https://stackoverflow.com/a/41463991/38626)
# so that it can work on other browsers as well.

import numpy as np
import tensorflow as tf
from IPython.display import clear_output, Image, display, HTML

def strip_consts(graph_def, max_const_size=32):
    """Strip large constant values from graph_def."""
    strip_def = tf.GraphDef()
    for n0 in graph_def.node:
        n = strip_def.node.add() 
        n.MergeFrom(n0)
        if n.op == 'Const':
            tensor = n.attr['value'].tensor
            size = len(tensor.tensor_content)
            if size > max_const_size:
                tensor.tensor_content = b"<stripped %d bytes>"%size
    return strip_def

def show_graph(graph_def, max_const_size=32):
    """Visualize TensorFlow graph."""
    if hasattr(graph_def, 'as_graph_def'):
        graph_def = graph_def.as_graph_def()
    strip_def = strip_consts(graph_def, max_const_size=max_const_size)
    code = """
        <script src="//cdnjs.cloudflare.com/ajax/libs/polymer/0.3.3/platform.js"></script>
        <script>
          function load() {{
            document.getElementById("{id}").pbtxt = {data};
          }}
        </script>
        <link rel="import" href="https://tensorboard.appspot.com/tf-graph-basic.build.html" onload=load()>
        <div style="height:600px">
          <tf-graph-basic id="{id}"></tf-graph-basic>
        </div>
    """.format(data=repr(str(strip_def)), id='graph'+str(np.random.rand()))

    iframe = """
        <iframe seamless style="width:1200px;height:620px;border:0" srcdoc="{}"></iframe>
    """.format(code.replace('"', '&quot;'))
    display(HTML(iframe))

In [44]:
show_graph(tf.get_default_graph())