## Getting started with Graph Execution
link: https://www.tensorflow.org/get_started/get_started_for_beginners

In [2]:
import tensorflow as tf
import pandas as pd

ImportError: No module named 'pandas'

## Create the train/test feature/labels

In [3]:
TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']

In [4]:
def load_data(label_name = 'Species'):
    """Parses the csv file in TRAIN_URL and TEST_URL."""

    # Create a local copy of the training set.
    train_path = tf.keras.utils.get_file(fname = TRAIN_URL.split('/')[-1], origin = TRAIN_URL)
    print(train_path)
    # train_path now holds the pathname: ~/.keras/datasets/iris_training.csv

    # Parse the local CSV file.
    train = pd.read_csv(filepath_or_buffer = train_path, names = CSV_COLUMN_NAMES,  # list of column names
                        header = 0  # ignore the first row of the CSV file.
                       )

    # train now holds a pandas DataFrame, which is data structure
    # analogous to a table.

    # 1. Assign the DataFrame's labels (the right-most column) to train_label.
    # 2. Delete (pop) the labels from the DataFrame.
    # 3. Assign the remainder of the DataFrame to train_features
    train_features, train_label = train, train.pop(label_name)
    
    # Apply the preceding logic to the test set.
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_features, test_label = test, test.pop(label_name)

    # Return four DataFrames.
    return (train_features, train_label), (test_features, test_label)

In [None]:
# Call load_data() to parse the CSV file.
(train_feature, train_label), (test_feature, test_label) = load_data()

In [None]:
print(type(train_feature))
print(train_feature[0:4])

In [None]:
print(type(train_label))
print(train_label[0:4])

## Define a tensorflow estimator class
To define a tensorflow estimator class, feature_columns need to be created according to the dataset.

In [5]:
# Create feature columns for all features
my_feature_columns = []
for key in train_feature.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

# Above is equivalent to:
# my_feature_columns = [
#     tf.feature_column.numeric_column(key='SepalLength'),
#     tf.feature_column.numeric_column(key='SepalWidth'),
#     tf.feature_column.numeric_column(key='PetalLength'),
#     tf.feature_column.numeric_column(key='PetalWidth')
# ]

NameError: name 'train_feature' is not defined

In [None]:
# A tf.feature_column is required for the definition of a tf.estimator below
print(type(my_feature_columns))
print(my_feature_columns)

In [None]:
classifier = tf.estimator.DNNClassifier(feature_columns = my_feature_columns, hidden_units = [10, 10], n_classes = 3)

In [40]:
print(type(classifier))
print(classifier)

<class 'tensorflow.python.estimator.canned.dnn.DNNClassifier'>
<tensorflow.python.estimator.canned.dnn.DNNClassifier object at 0x0000022ADB77D0F0>


In [57]:
def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    # The dataset API (tf.data.Dataset) is a high-level TensorFlow API for reading data and transforming it into a form that the train method requires.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    print(type(dataset))
    print(dataset)
    
    # Shuffle, repeat, and batch the examples.
    # dataset.shuffle(x): Training works best if the training examples are in random order. To randomize the examples, 
    # call tf.data.Dataset.shuffle. Setting the buffer_size to a value larger than the number of 
    # examples (120) ensures that the data will be well shuffled.
    # dataset.repeat(): During training, the train method typically processes the examples multiple times. 
    # Calling the tf.data.Dataset.repeat method without any arguments ensures that the 
    # train method has an infinite supply of (now shuffled) training set examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    print(type(dataset))
    print(dataset)
    
    # Return the dataset.
    return dataset.make_one_shot_iterator().get_next()

In [58]:
a = train_input_fn(train_feature, train_label, 100)

<class 'tensorflow.python.data.ops.dataset_ops.TensorSliceDataset'>
<TensorSliceDataset shapes: ({SepalWidth: (), SepalLength: (), PetalLength: (), PetalWidth: ()}, ()), types: ({SepalWidth: tf.float64, SepalLength: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>
<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
<BatchDataset shapes: ({SepalWidth: (?,), SepalLength: (?,), PetalWidth: (?,), PetalLength: (?,)}, (?,)), types: ({SepalWidth: tf.float64, SepalLength: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>


## One shot-iterator example
A one-shot iterator is the simplest form of iterator, which only supports iterating once through a dataset, with no need for explicit initialization.

In [77]:
# example to understand iterator
sess = tf.Session()
dataset = tf.data.Dataset.range(100)
print(dataset)
iterator = dataset.repeat().batch(10).make_one_shot_iterator()
print(iterator)
next_element = iterator.get_next()

for i in range(15):
    value = sess.run(next_element)
    # value type: >class 'numpy.ndarray'>
    
    print(value)

<RangeDataset shapes: (), types: tf.int64>
<tensorflow.python.data.ops.iterator_ops.Iterator object at 0x0000022CF3271CF8>
[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47 48 49]
[50 51 52 53 54 55 56 57 58 59]
[60 61 62 63 64 65 66 67 68 69]
[70 71 72 73 74 75 76 77 78 79]
[80 81 82 83 84 85 86 87 88 89]
[90 91 92 93 94 95 96 97 98 99]
[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47 48 49]


## Train the estimator

Lambda wraps arbitrary expression as a Layer object.

In the tensorflow estimator class the first argument input_fn is a function that provides input data for training as minibatches. The function should construct and return one of the following:

A 'tf.data.Dataset' object: Outputs of Dataset object must be a tuple (features, labels) with same constraints as below.

A tuple (features, labels): Where features is a Tensor or a dictionary of string feature name to Tensor and labels is a Tensor or a dictionary of string label name to Tensor. Both features and labels are consumed by model_fn. They should satisfy  the expectation of model_fn from inputs.


In [1]:
classifier.train(input_fn = train_input_fn, steps = 1000)

NameError: name 'classifier' is not defined

In [82]:
classifier.train(input_fn = lambda:train_input_fn(train_feature, train_label, 100), steps = 1000)

<class 'tensorflow.python.data.ops.dataset_ops.TensorSliceDataset'>
<TensorSliceDataset shapes: ({SepalWidth: (), SepalLength: (), PetalLength: (), PetalWidth: ()}, ()), types: ({SepalWidth: tf.float64, SepalLength: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>
<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>
<BatchDataset shapes: ({SepalWidth: (?,), SepalLength: (?,), PetalWidth: (?,), PetalLength: (?,)}, (?,)), types: ({SepalWidth: tf.float64, SepalLength: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Siavash\AppData\Local\Temp\tmpoudvk1l0\model.ckpt-4000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 4001 into C:\Users\Siavash\AppData\Local\T

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x22adb77d0f0>

## Evluate the estimator

In [121]:
def eval_input_fn(features, labels = None, batch_size = None):
    """An input function for evaluation or prediction"""
    if labels is None:
        # No labels, use only features.
        inputs = dict(features)
    else:
        inputs = (dict(features), labels)

    # Convert inputs to a tf.dataset object.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
  
    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the read end of the pipeline.
    return dataset.make_one_shot_iterator().get_next()

## ??? INPUTS IS A TUPLE FROM CONCATENATING A DICTIONARY OF A PANDAS DATAFRAME AND A PANDAS SERIES
## ??? WHAT SHOULD BE THE TYPE OF inputs FOR tf.data.Dataset.from_tesor_slices(inputs) be?
## ??? WHAT IS lambda IN classifier.evaluate/train(input_fn = lambda: ... )?

In [122]:
a = eval_input_fn(features = test_feature, labels = test_label, batch_size = 4)

In [123]:
# Evaluate the model.
eval_result = classifier.evaluate(input_fn = lambda:eval_input_fn(test_feature, test_label, 100))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-08-19:37:59
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Siavash\AppData\Local\Temp\tmpoudvk1l0\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-08-19:37:59
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.96666664, average_loss = 0.071171835, global_step = 5000, loss = 2.135155

Test set accuracy: 0.967



## Predict with the estimator

In [136]:
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}
print(type(predict_x))
print(type(expected))

<class 'dict'>
<class 'list'>


In [137]:
predictions = classifier.predict(input_fn=lambda:eval_input_fn(predict_x,labels = None, batch_size=100))

The predict method returns a python iterable, yielding a dictionary of prediction results for each example. This dictionary contains several keys. The probabilities key holds a list of three floating-point values, each representing the probability that the input example is a particular Iris species.

The class_ids key holds a one-element array that identifies the most probable species.

The probabilities key holds a list of three floating-point values, each representing the probability that the input example is a particular Iris species.

In [138]:
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

for pred_dict, expec in zip(predictions, expected):
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print(template.format(SPECIES[class_id], 100 * probability, expec))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Siavash\AppData\Local\Temp\tmpoudvk1l0\model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

Prediction is "Setosa" (99.8%), expected "Setosa"

Prediction is "Versicolor" (100.0%), expected "Versicolor"

Prediction is "Virginica" (99.9%), expected "Virginica"
