# TensorFlow Example: Classification

In [1]:

# This notebook modified by Adam Smith

# Original version copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0

"""An Example of a DNNClassifier for the Iris dataset."""

import argparse
import tensorflow as tf

import iris_data


  from ._conv import register_converters as _register_converters


In [2]:
# This code can be modified to read arguments from the command line, when appropriate. 
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,
                    help='number of training steps')
args = parser.parse_args([])

First, we load the data into Pandas dataframes

In [3]:
# Fetch the data
(train_x, train_y), (test_x, test_y) = iris_data.load_data()


Let's look at the data. 

In [4]:
type(train_x)

pandas.core.frame.DataFrame

In [5]:
train_x.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth
0,6.4,2.8,5.6,2.2
1,5.0,2.3,3.3,1.0
2,4.9,2.5,4.5,1.7
3,4.9,3.1,1.5,0.1
4,5.7,3.8,1.7,0.3


In [6]:
type(train_y)

pandas.core.series.Series

In [7]:
train_y.head()

0    2
1    1
2    2
3    0
4    0
Name: Species, dtype: int64

The test/train split is 80/20:

In [8]:
train_x.size, test_x.size

(480, 120)

We now instantiate the classifer. This object is specific to discrete classification.

- features

- network structure

- Number of classes

- (Optionally) Lots of other stuff (activitation function, optimization methods, etc)

Defaults worth knowing: 

- Activitation function is ReLU 

- "Dropout" regularization is not used

In [9]:

# Feature columns describe how to use the input.
# We are adding one numeric feature for each column of the training data
my_feature_columns = []
for key in train_x.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
    

# Build 2 hidden layer DNN with 10, 10 units respectively.
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
        # Two hidden layers of 10 nodes each.
    hidden_units=[10, 10],
        # The model must choose between 3 classes.
    n_classes=3,
        ## We can also set the directory where model information will be saved.
    ##model_dir='models/iris'
    )

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/h0/cnbyp_4x2f1c3jrqv08jnjgw0000gn/T/tmpr4ddz47t', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x18147ae5c0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [10]:
type(classifier)

tensorflow.python.estimator.canned.dnn.DNNClassifier

We are now ready to __train__!

We pass the input to the classifer as a function. 

That function takes no arguments and returns a `tf.data.Dataset` object. 

In [11]:
classifier.train(
    input_fn=lambda:iris_data.train_input_fn(train_x, train_y,
                                                 args.batch_size),
    steps=args.train_steps)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/h0/cnbyp_4x2f1c3jrqv08jnjgw0000gn/T/tmpr4ddz47t/model.ckpt.
INFO:tensorflow:loss = 128.72586, step = 1
INFO:tensorflow:global_step/sec: 639.092
INFO:tensorflow:loss = 17.5421, step = 101 (0.157 sec)
INFO:tensorflow:global_step/sec: 864.358
INFO:tensorflow:loss = 10.320238, step = 201 (0.116 sec)
INFO:tensorflow:global_step/sec: 807.891
INFO:tensorflow:loss = 6.2506757, step = 301 (0.124 sec)
INFO:tensorflow:global_step/sec: 789.054
INFO:tensorflow:loss = 5.9085226, step = 401 (0.127 sec)
INFO:tensorflow:global_step/sec: 858.398
INFO:tensorflow:loss = 7.9417143, step = 501 (0.116 sec)
INFO:tensorflow:global_step/sec: 878.179
INFO:tensorflow:loss = 7.0496063, step = 601 (0.114 sec)
INFO:tensorflow

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x18147ae4a8>

In [12]:
# This code is copied from iris_data.py
def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset

The most straightforward way to construct input functions is directly from a dataframe (can also do this from a numpy array):

```python
import pandas as pd
# pandas input_fn.
my_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=pd.DataFrame({"x": x_data}),
    y=pd.Series(y_data),
    ...)
```

You can see and other examples here: 
 https://www.tensorflow.org/versions/r1.3/get_started/input_fn

In [13]:
classifier.get_variable_names()

['dnn/hiddenlayer_0/bias',
 'dnn/hiddenlayer_0/bias/t_0/Adagrad',
 'dnn/hiddenlayer_0/kernel',
 'dnn/hiddenlayer_0/kernel/t_0/Adagrad',
 'dnn/hiddenlayer_1/bias',
 'dnn/hiddenlayer_1/bias/t_0/Adagrad',
 'dnn/hiddenlayer_1/kernel',
 'dnn/hiddenlayer_1/kernel/t_0/Adagrad',
 'dnn/logits/bias',
 'dnn/logits/bias/t_0/Adagrad',
 'dnn/logits/kernel',
 'dnn/logits/kernel/t_0/Adagrad',
 'global_step']

In [14]:
# We can insect the weights and biases of the resulting model:
classifier.get_variable_value('dnn/hiddenlayer_0/kernel')

array([[ 0.7102564 , -0.00301552,  0.2513975 , -0.67328316, -0.21836346,
         0.4909714 ,  0.35639822, -0.36489418,  0.11256502, -0.44818056],
       [ 1.2686031 ,  0.36127937,  0.4128691 , -0.8769065 ,  0.50733244,
         1.0569389 , -0.38552013, -0.39856014, -0.00377   , -0.6275349 ],
       [ 0.21148968, -0.56791836, -0.37399024,  0.5805862 ,  0.08956772,
         0.22763884, -0.5008432 ,  0.8895586 , -0.34642193, -0.00462624],
       [ 0.21686938,  0.18809235, -0.6485498 ,  0.5600173 , -0.3405326 ,
        -0.48366976, -0.1870741 ,  1.0098534 ,  0.2903242 ,  0.14058575]],
      dtype=float32)

In [15]:
eval_result = classifier.evaluate(
        input_fn=lambda:iris_data.eval_input_fn(test_x, test_y,
                                                args.batch_size))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-04-03-21:45:41
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/h0/cnbyp_4x2f1c3jrqv08jnjgw0000gn/T/tmpr4ddz47t/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-04-03-21:45:42
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.93333334, average_loss = 0.06211599, global_step = 1000, loss = 1.8634797

Test set accuracy: 0.933



In [16]:
# eval_result is a dictionary with a few basic statistics
for key in eval_result.keys():
    print(key, ": ", eval_result[key])

accuracy :  0.93333334
average_loss :  0.06211599
loss :  1.8634797
global_step :  1000
