In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import os
import itertools
import collections

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [4]:
from six.moves import  urllib

In [5]:

IRIS_TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [6]:
DOWNLOAD_FILE_NAME_IRIS_TRAIN = "iris_training.csv"
DOWNLOAD_FILE_NAME_IRIS_TEST = "iris_test.csv"

In [7]:
def load_data(url_path,file_name):
    if not os.path.exists(file_name):
        filename,_=urllib.request.urlretrieve(url_path,file_name)

In [10]:
load_data(IRIS_TRAIN_URL,DOWNLOAD_FILE_NAME_IRIS_TRAIN)
load_data(IRIS_TEST_URL,DOWNLOAD_FILE_NAME_IRIS_TEST)

In [11]:
feature_names = [
    "SepalLengthCm",
    "SepalWidthCm",
    "PetalLengthCm",
    "PetalWidthCm",
]

In [14]:
# parsing one single line from csv
def parse_csv(line):
    
    parsed_line = tf.decode_csv(line , [[0.],[0.],[0.],[0.],[0]])
    
    label = parsed_line[-1:]
    
    del parsed_line[-1]
    
    features = dict(zip(feature_names,parsed_line))
    
    return features , label

In [15]:
def get_features_labels(filename , shuffle=False , repeat_count =1):
    
    dataset = tf.data.TextLineDataset(filename).skip(1).map(parse_csv)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=256)
    
    dataset = dataset.repeat(repeat_count)
    
    dataset = dataset.batch(32)
    
    iterator = dataset.make_one_shot_iterator()
    
    batched_features , batched_labels = iterator.get_next()
    
    return batched_features, batched_labels
    

In [16]:
batched_features, batched_labels = get_features_labels(DOWNLOAD_FILE_NAME_IRIS_TRAIN)

In [18]:
batched_features

{'SepalLengthCm': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=float32>,
 'SepalWidthCm': <tf.Tensor 'IteratorGetNext:3' shape=(?,) dtype=float32>,
 'PetalLengthCm': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float32>,
 'PetalWidthCm': <tf.Tensor 'IteratorGetNext:1' shape=(?,) dtype=float32>}

In [19]:
batched_labels

<tf.Tensor 'IteratorGetNext:4' shape=(?, 1) dtype=int32>

In [21]:
feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]

In [22]:
feature_columns

[_NumericColumn(key='SepalLengthCm', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='SepalWidthCm', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='PetalLengthCm', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='PetalWidthCm', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]

In [65]:
classifier_model = tf.estimator.DNNClassifier(feature_columns=feature_columns, hidden_units=[34,36,34], n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x11a004cc0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [66]:
classifier_model.train(input_fn=lambda: get_features_labels(DOWNLOAD_FILE_NAME_IRIS_TRAIN,shuffle=True,repeat_count=20))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws/model.ckpt.
INFO:tensorflow:loss = 35.011345, step = 1
INFO:tensorflow:Saving checkpoints for 75 into /var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws/model.ckpt.
INFO:tensorflow:Loss for final step: 6.7572346.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x118043898>

In [67]:
evaluate_results = classifier_model.evaluate(input_fn=lambda: get_features_labels(DOWNLOAD_FILE_NAME_IRIS_TEST,shuffle=True,repeat_count=4))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-08-13-09:47:16
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws/model.ckpt-75
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-08-13-09:47:17
INFO:tensorflow:Saving dict for global step 75: accuracy = 0.96666664, average_loss = 0.16223498, global_step = 75, loss = 4.867049
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 75: /var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws/model.ckpt-75


In [68]:
for result in evaluate_results:
    print("%s : %s "% (result , evaluate_results[result]))

accuracy : 0.96666664 
average_loss : 0.16223498 
loss : 4.867049 
global_step : 75 


In [69]:
predicted_results = classifier_model.predict(input_fn=lambda:get_features_labels(DOWNLOAD_FILE_NAME_IRIS_TEST,shuffle=True,repeat_count=1))

In [70]:
predicted_labels = [prediction['class_ids'][0] for prediction in predicted_results]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/nr/yyxxzlcj1gqfwgbp7mn9l610q3vvqq/T/tmp6via0zws/model.ckpt-75
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [71]:
predicted_labels

[2,
 2,
 1,
 2,
 1,
 0,
 2,
 2,
 1,
 0,
 1,
 0,
 1,
 2,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 2,
 1,
 1,
 1,
 0,
 1,
 1,
 1]

In [72]:
df = pd.read_csv(DOWNLOAD_FILE_NAME_IRIS_TEST,names=feature_names+['labels'],skiprows=1)

In [73]:
df['predicted_labels'] = predicted_labels

In [74]:
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,labels,predicted_labels
0,5.9,3.0,4.2,1.5,1,2
1,6.9,3.1,5.4,2.1,2,2
2,5.1,3.3,1.7,0.5,0,1
3,6.0,3.4,4.5,1.6,1,2
4,5.5,2.5,4.0,1.3,1,1
5,6.2,2.9,4.3,1.3,1,0
6,5.5,4.2,1.4,0.2,0,2
7,6.3,2.8,5.1,1.5,2,2
8,5.6,3.0,4.1,1.3,1,1
9,6.7,2.5,5.8,1.8,2,0
