In [1]:
# February, 2018
# https://github.com/itsergiu/Tensorflow/tree/master/iris_data
# https://www.tensorflow.org/get_started/get_started_for_beginners
# https://github.com/tensorflow/models/blob/master/samples/core/get_started/iris_data.py

Author do not assume and hereby disclaim any liability to any party for any loss, damage, or disruption caused by errors or omissions, whether such errors or omissions result from negligence, accident, or any other cause. The software is provided "as is", without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement.

The purpose of this appplication is to provide Tensorflow iris_data.py code in format of Jupyter Notebook.

This may help you to speed up your tests and understanding.

In [17]:
# Check library version
import sys
import tensorflow as tf
print(sys.version)
print("Tensorflow version ", tf.VERSION) # Program requires at least TensorFlow v1.4
# Upgrading or installing Tensorflow with Anaconda:
# https://www.tensorflow.org/install/install_windows#installing_with_anaconda
# Alternatively you may run in Anaconda promt: pip install tensorflow

3.6.1 |Anaconda custom (64-bit)| (default, May 11 2017, 13:25:24) [MSC v.1900 64 bit (AMD64)]
Tensorflow version  1.5.0


In [3]:
import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

def maybe_download():
    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)

    return train_path, test_path

def load_data(y_name='Species'):
    """Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
    train_path, test_path = maybe_download()

    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop(y_name)

    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop(y_name)

    return (train_x, train_y), (test_x, test_y)


def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset


def eval_input_fn(features, labels, batch_size):
    """An input function for evaluation or prediction"""
    features=dict(features)
    if labels is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset


# The remainder of this file contains a simple example of a csv parser,
#     implemented using a the `Dataset` class.

# `tf.parse_csv` sets the types of the outputs to match the examples given in
#     the `record_defaults` argument.
CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]

def _parse_line(line):
    # Decode the line into its fields
    fields = tf.decode_csv(line, record_defaults=CSV_TYPES)

    # Pack the result into a dictionary
    features = dict(zip(CSV_COLUMN_NAMES, fields))

    # Separate the label from the features
    label = features.pop('Species')

    return features, label


def csv_input_fn(csv_path, batch_size):
    # Create a dataset containing the text lines.
    dataset = tf.data.TextLineDataset(csv_path).skip(1)

    # Parse each line.
    dataset = dataset.map(_parse_line)

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset


In [4]:
# Call load_data() to parse the CSV file.
(train_feature, train_label), (test_feature, test_label) = load_data()

In [5]:
# The code to create a feature column:
my_feature_columns = [
    tf.feature_column.numeric_column(key='SepalLength'),
    tf.feature_column.numeric_column(key='SepalWidth'),
    tf.feature_column.numeric_column(key='PetalLength'),
    tf.feature_column.numeric_column(key='PetalWidth')
]

In [6]:
# This Estimator builds a neural network that classifies examples. 
# The following call instantiates DNNClassifier:
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[10, 10],
    n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\SIATCO~1.SLA\\AppData\\Local\\Temp\\tmp852m4dl6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002192C4F6C50>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [7]:
# Instantiating a tf.Estimator.DNNClassifier creates a framework for learning the model.
# Basically, we've wired a network but haven't yet let data flow through it.
# To train the neural network, call the Estimator object's train method.
batch_size = 10
train_steps = 100 # You may increase to 1000 to 10000
classifier.train(
    input_fn=lambda:train_input_fn(train_feature, train_label, batch_size),
    steps=train_steps)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\SIATCO~1.SLA\AppData\Local\Temp\tmp852m4dl6\model.ckpt.
INFO:tensorflow:loss = 18.084215, step = 1
INFO:tensorflow:Saving checkpoints for 100 into C:\Users\SIATCO~1.SLA\AppData\Local\Temp\tmp852m4dl6\model.ckpt.
INFO:tensorflow:Loss for final step: 3.6797957.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x2192c4f6400>

In [8]:
# To evaluate a model's effectiveness, each Estimator provides an evaluate method.
# The call to classifier.evaluate is similar to the call to classifier.train.
# classifier.evaluate must get its examples from the test set rather than the training set.
# In other words, to fairly assess a model's effectiveness, 
# the examples used to evaluate a model must be different from the examples used to train the model.
# The eval_input_fn function serves a batch of examples from the test set
eval_result = classifier.evaluate(input_fn=lambda:eval_input_fn(test_feature, test_label, batch_size))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Starting evaluation at 2018-02-23-09:10:18
INFO:tensorflow:Restoring parameters from C:\Users\SIATCO~1.SLA\AppData\Local\Temp\tmp852m4dl6\model.ckpt-100
INFO:tensorflow:Finished evaluation at 2018-02-23-09:10:18
INFO:tensorflow:Saving dict for global step 100: accuracy = 0.93333334, average_loss = 0.36960983, global_step = 100, loss = 3.6960983

Test set accuracy: 0.933



In [9]:
# Now let's use the trained model to make some predictions on unlabeled examples;
# that is, on examples that contain features but not a label.
# For now, we're simply going to manually provide the following four unlabeled examples:
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9, 5.0],
    'SepalWidth': [3.3, 3.0, 3.1, 3.0],
    'PetalLength': [1.7, 4.2, 5.4, 2.0],
    'PetalWidth': [0.5, 1.5, 2.1, 1.0],
            }

In [10]:
# Every Estimator provides a predict method, which premade_estimator.py calls as follows:
predictions = classifier.predict(
        input_fn=lambda:eval_input_fn(predict_x, labels=None, batch_size=10))

In [11]:
# The predict method returns a python iterable, yielding a dictionary of prediction results for each example.
# This dictionary contains several keys. The probabilities key holds a list of three floating-point values, 
# each representing the probability that the input example is a particular Iris species. 
# For example, consider the following probabilities list

list_predictions = list(predictions)
list_predictions

INFO:tensorflow:Restoring parameters from C:\Users\SIATCO~1.SLA\AppData\Local\Temp\tmp852m4dl6\model.ckpt-100


[{'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logits': array([ 3.0198143, -1.3653761, -0.8638741], dtype=float32),
  'probabilities': array([0.96802115, 0.01206204, 0.01991684], dtype=float32)},
 {'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logits': array([-0.60536885,  0.6419004 ,  0.04566121], dtype=float32),
  'probabilities': array([0.15629055, 0.54402   , 0.29968944], dtype=float32)},
 {'class_ids': array([2], dtype=int64),
  'classes': array([b'2'], dtype=object),
  'logits': array([-0.9931713 ,  0.62524873,  2.102529  ], dtype=float32),
  'probabilities': array([0.03552673, 0.17923632, 0.78523695], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logits': array([ 1.8591244, -0.7018992, -0.6718559], dtype=float32),
  'probabilities': array([0.86444867, 0.0667576 , 0.06879365], dtype=float32)}]

In [12]:
# Same probabilities list with iterate code
for pred_dict in list_predictions:
    print('\n', pred_dict)


 {'logits': array([ 3.0198143, -1.3653761, -0.8638741], dtype=float32), 'probabilities': array([0.96802115, 0.01206204, 0.01991684], dtype=float32), 'class_ids': array([0], dtype=int64), 'classes': array([b'0'], dtype=object)}

 {'logits': array([-0.60536885,  0.6419004 ,  0.04566121], dtype=float32), 'probabilities': array([0.15629055, 0.54402   , 0.29968944], dtype=float32), 'class_ids': array([1], dtype=int64), 'classes': array([b'1'], dtype=object)}

 {'logits': array([-0.9931713 ,  0.62524873,  2.102529  ], dtype=float32), 'probabilities': array([0.03552673, 0.17923632, 0.78523695], dtype=float32), 'class_ids': array([2], dtype=int64), 'classes': array([b'2'], dtype=object)}

 {'logits': array([ 1.8591244, -0.7018992, -0.6718559], dtype=float32), 'probabilities': array([0.86444867, 0.0667576 , 0.06879365], dtype=float32), 'class_ids': array([0], dtype=int64), 'classes': array([b'0'], dtype=object)}


In [13]:
expected = ["Setosa", "Versicolor", "Virginica","Ask a botanist"]
for pred_dict, expec in zip(list_predictions, expected):
    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]
    print(template.format(SPECIES[class_id], 100 * probability, expec))    


Prediction is "Setosa" (96.8%), expected "Setosa"

Prediction is "Versicolor" (54.4%), expected "Versicolor"

Prediction is "Virginica" (78.5%), expected "Virginica"

Prediction is "Setosa" (86.4%), expected "Ask a botanist"


In [14]:
# This document provides a short introduction to machine learning.
# Because premade_estimators.py relies on high-level APIs, much of the mathematical complexity in machine learning is hidden.
# If you intend to become more proficient in machine learning, we recommend ultimately learning more about gradient descent,
# batching, and neural networks.

In [15]:
# Enjoy it!