# T81-558: Applications of Deep Neural Networks
**Class 14: High Performance TensorFlow**
* Instructor: [Jeff Heaton](https://sites.wustl.edu/jeffheaton/), School of Engineering and Applied Science, [Washington University in St. Louis](https://engineering.wustl.edu/Programs/Pages/default.aspx)
* For more information visit the [class website](https://sites.wustl.edu/jeffheaton/t81-558/).

# Helpful Resources

* [Amazon Web Services (AWS)](https://aws.amazon.com/)
* [Installing a Python Jupyter Notebook Server on AWS](https://gist.github.com/iamatypeofwalrus/5183133)
* [Installing AWS for GPU TensorFlow Usage](https://gist.github.com/iamatypeofwalrus/5183133)
* [How to Use Tensorflow with a GPU](https://www.tensorflow.org/versions/r0.12/how_tos/using_gpu/index.html)
* [CIFAR-10 Tutorial on TensorFlow](https://www.tensorflow.org/versions/r0.12/tutorials/deep_cnn/index.html)
* [The CIFAR-10 Dataset](https://www.cs.toronto.edu/~kriz/cifar.html)
* [All AWS Instances](https://aws.amazon.com/ec2/instance-types/)
* [AWS Instance Prices](https://aws.amazon.com/ec2/pricing/on-demand/)
* [Amazon AWS GPU Instances](https://aws.amazon.com/ec2/instance-types/p2/)
* [Install CUDA on Mac](http://stackoverflow.com/questions/38710339/library-not-loaded-rpath-libcudart-7-5-dylib-tensorflow-error-on-mac)
* [Setup TensorFlow GPU for MAC](https://www.tensorflow.org/versions/r0.12/get_started/os_setup.html#optional-setup-gpu-for-mac)
* [Very Helpful Guide for TF/GPU](https://gist.github.com/Mistobaan/dd32287eeb6859c6668d)
* [nVIDIA Suggested Hardware](http://www.nvidia.com/object/gpu-accelerated-applications-tensorflow-configurations.html)

In [1]:

from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Encode text values to dummy variables(i.e. [1,0,0],[0,1,0],[0,0,1] for red,green,blue)
def encode_text_dummy(df,name):
    dummies = pd.get_dummies(df[name])
    for x in dummies.columns:
        dummy_name = "{}-{}".format(name,x)
        df[dummy_name] = dummies[x]
    df.drop(name, axis=1, inplace=True)

# Encode text values to indexes(i.e. [1],[2],[3] for red,green,blue).
def encode_text_index(df,name):
    le = preprocessing.LabelEncoder()
    df[name] = le.fit_transform(df[name])
    return le.classes_

# Encode a numeric column as zscores
def encode_numeric_zscore(df,name,mean=None,sd=None):
    if mean is None:
        mean = df[name].mean()

    if sd is None:
        sd = df[name].std()

    df[name] = (df[name]-mean)/sd

# Convert all missing values in the specified column to the median
def missing_median(df, name):
    med = df[name].median()
    df[name] = df[name].fillna(med)

# Convert a Pandas dataframe to the x,y inputs that TensorFlow needs
def to_xy(df,target):
    result = []
    for x in df.columns:
        if x != target:
            result.append(x)

    # find out the type of the target column.  Is it really this hard? :(
    target_type = df[target].dtypes
    target_type = target_type[0] if hasattr(target_type, '__iter__') else target_type
    print(target_type)
    
    # Encode to int for classification, float otherwise. TensorFlow likes 32 bits.
    if target_type in (np.int64, np.int32):
        # Classification
        return df.as_matrix(result).astype(np.float32),df.as_matrix([target]).astype(np.int32)
    else:
        # Regression
        return df.as_matrix(result).astype(np.float32),df.as_matrix([target]).astype(np.float32)

# Nicely formatted time string
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)

# Regression chart, we will see more of this chart in the next class.
def chart_regression(pred,y):
    t = pd.DataFrame({'pred' : pred.flatten(), 'y' : y_test.flatten()})
    t.sort_values(by=['y'],inplace=True)
    a = plt.plot(t['y'].tolist(),label='expected')
    b = plt.plot(t['pred'].tolist(),label='prediction')
    plt.ylabel('output')
    plt.legend()
    plt.show()

In [30]:
# Test TF
import tensorflow as tf

# Creates a graph.
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
# Creates a session with log_device_placement set to True.
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
# Runs the op.
print(sess.run(c))

[[ 22.  28.]
 [ 49.  64.]]


In [1]:
import tensorflow as tf
print("Tensor Flow Version: {}".format(tf.__version__))

Tensor Flow Version: 0.12.0-rc0


# When GPU's Help



* GPU: Elapsed time: 0:32:00.36
* CPU: Elapsed time: 1:13:44.48

In [None]:
#  Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""This showcases how simple it is to build image classification networks.
It follows description from this TensorFlow tutorial:
    https://www.tensorflow.org/versions/master/tutorials/mnist/pros/index.html#deep-mnist-for-experts
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from sklearn import metrics
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import learn
import time
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

start_time = time.time()


def max_pool_2x2(tensor_in):
  return tf.nn.max_pool(
      tensor_in, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def conv_model(feature, target, mode):
  """2-layer convolution model."""
  # Convert the target to a one-hot tensor of shape (batch_size, 10) and
  # with a on-value of 1 for each one-hot vector of length 10.
  target = tf.one_hot(tf.cast(target, tf.int32), 10, 1, 0)

  # Reshape feature to 4d tensor with 2nd and 3rd dimensions being
  # image width and height final dimension being the number of color channels.
  feature = tf.reshape(feature, [-1, 28, 28, 1])

  # First conv layer will compute 32 features for each 5x5 patch
  with tf.variable_scope('conv_layer1'):
    h_conv1 = layers.convolution(feature, 32, kernel_size=[5, 5],
                                 activation_fn=tf.nn.relu)
    h_pool1 = max_pool_2x2(h_conv1)

  # Second conv layer will compute 64 features for each 5x5 patch.
  with tf.variable_scope('conv_layer2'):
    h_conv2 = layers.convolution(h_pool1, 64, kernel_size=[5, 5],
                                 activation_fn=tf.nn.relu)
    h_pool2 = max_pool_2x2(h_conv2)
    # reshape tensor into a batch of vectors
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])

  # Densely connected layer with 1024 neurons.
  h_fc1 = layers.dropout(
      layers.fully_connected(
          h_pool2_flat, 1024, activation_fn=tf.nn.relu), keep_prob=0.5,
      is_training=mode == tf.contrib.learn.ModeKeys.TRAIN)

  # Compute logits (1 per class) and compute loss.
  logits = layers.fully_connected(h_fc1, 10, activation_fn=None)
  loss = tf.contrib.losses.softmax_cross_entropy(logits, target)

  # Create a tensor for training op.
  train_op = layers.optimize_loss(
      loss, tf.contrib.framework.get_global_step(), optimizer='SGD',
      learning_rate=0.001)

  return tf.argmax(logits, 1), loss, train_op


def main(unused_args):
  ### Download and load MNIST dataset.
  mnist = learn.datasets.load_dataset('mnist')

  ### Linear classifier.
  feature_columns = learn.infer_real_valued_columns_from_input(
      mnist.train.images)
  classifier = learn.LinearClassifier(
      feature_columns=feature_columns, n_classes=10)
  classifier.fit(mnist.train.images, mnist.train.labels.astype(np.int32),
                 batch_size=100, steps=1000)
  score = 0 #metrics.accuracy_score(
      #mnist.test.labels, list(classifier.predict(mnist.test.images)))
  print('Accuracy: {0:f}'.format(score))

  ### Convolutional network
  classifier = learn.Estimator(model_fn=conv_model)
  classifier.fit(mnist.train.images, mnist.train.labels,
                 batch_size=100, steps=20000)
  score = 0 #metrics.accuracy_score(
      #mnist.test.labels, list(classifier.predict(mnist.test.images)))
  print('Accuracy: {0:f}'.format(score))

  elapsed_time = time.time() - start_time
  print("Elapsed time: {}".format(hms_string(elapsed_time)))


if __name__ == '__main__':
  tf.app.run()

# When a GPU Might Not Help



In [2]:
import tensorflow.contrib.learn as skflow
from sklearn import metrics
import numpy as np
import pandas as pd
import time
from IPython.display import display, HTML 

FEATURE_SIZE = 1000

start_time = time.time()
x = np.array([range(FEATURE_SIZE)]).astype(np.float32)
print(x.shape)
# Fit regression DNN model.
regressor = skflow.DNNRegressor(
    feature_columns=skflow.infer_real_valued_columns_from_input(x),
    hidden_units=[5000,2500,1000,500,250,25],
    label_dimension=FEATURE_SIZE)
regressor = skflow.SKCompat(regressor)
regressor.fit(x=x,y=x,steps=1000)
pred = regressor.predict(x)

pred = pred['scores']

score = np.sqrt(metrics.mean_squared_error(pred,x))
print("Fold score (RMSE): {}".format(score))
np.set_printoptions(suppress=True)

elapsed_time = time.time() - start_time
print("Elapsed time: {}".format(hms_string(elapsed_time)))

(1, 1000)
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_num_ps_replicas': 0, '_master': '', '_is_chief': True, 'keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x114d63dd8>, '_environment': 'local', '_task_id': 0, 'save_summary_steps': 100, '_evaluation_master': '', '_task_type': None, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, 'save_checkpoints_secs': 600, 'tf_random_seed': None, 'save_checkpoints_steps': None, 'keep_checkpoint_max': 5}
INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_1:fraction_of_zero_values is illegal; using dnn/hiddenlayer_1_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_