In [None]:
# 加载库文件并判断版本
import tensorflow as tf
import os
import sys

if sys.version_info < (3, 0, 0):
    from urllib import urlopen
else:
    from urllib.request import urlopen

In [None]:
# Check that we have correct TensorFlow version installed
tf_version = tf.__version__
print("TensorFlow version: {}".format(tf_version))
assert "1.3" <= tf_version, "TensorFlow r1.3 or later is needed"

In [None]:
# Windows users：You only need to change PATH, rest is platform independent
PATH = "../../../practise/tf_dataset_and_estimator_spi"

# Fetch and store Training and Test dataset files
PATH_DATASET = PATH + os.sep + "dataset"
FILE_TRAIN = PATH_DATASET + os.sep + "iris_training.csv"
FILE_TEST = PATH_DATASET + os.sep + "iris_test.csv"
URL_TRAIN = "http://download.tensorflow.org/data/iris_training.csv"
URL_TEST = "http://download.tensorflow.org/data/iris_test.csv"

In [None]:
# 下载数据集
def downloadDataset(url, path):
    if not os.path.exists(PATH_DATASET):
        os.makedirs(PATH_DATASET)
    if not os.path.exists(path):
        data = urlopen(url).read()
        with open(path, "wb") as f:
            f.write(data)
            f.close()

downloadDataset(URL_TRAIN, FILE_TRAIN)
downloadDataset(URL_TEST, FILE_TEST)

In [None]:
# 设置记录消息的阈值
tf.logging.set_verbosity(tf.logging.INFO)

In [None]:
# the CSV feature in our training & test data
feature_names = [
    'SepalLength',
    'SepalWidth',
    'PetalLength',
    'PetalWidth'
]

In [None]:
# Create an input function reading a file using the Dataset API
# Then provide the results to the Estimator API
def my_input_fn(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]]) # 最后一个元素为int类型
        label = parsed_line[-1] # Last element is the label
        del parsed_line[-1] # Delete last element
        features = parsed_line # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        
        return d
    
    dataset = (tf.data.TextLineDataset(file_path) # Read text file
              .skip(1) # Skip header row
              .map(decode_csv)) # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count) # Repeats dataset this # times
    dataset = dataset.batch(32) # Batch size to use
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

# Will return 32 random elements
next_batch = my_input_fn(FILE_TRAIN, perform_shuffle=True, repeat_count=1)

In [None]:
# Create the feature_columns, which specifies the input to our model
# All our input features are numeric, so use numeric_column for each one
feature_columns = [tf.feature_column.numeric_column(k) for k in feature_names]

In [None]:
# Create a deep neural network regression classifier
# Use the DNNClassifier pre-made estimator
classifier = tf.estimator.DNNClassifier(
    feature_columns=feature_columns, # The input features to our model
    hidden_units=[10, 10], # Two layers, each with 10 neurons
    n_classes=3,
    model_dir=PATH) # Path to where checkpoint ets are stored

In [None]:
# Train our model, use the previously function my_input_fn
# Input to training is a file with training example
# Stop training after 8 iterations of train data (epochs)
classifier.train(input_fn=lambda: my_input_fn(FILE_TRAIN, True, 8))

In [None]:
# Evaluate our model using the examples contained in FILE_TEST
# Return value will contain evaluation_metrics sunch as: loss & average_loss
evaluate_result = classifier.evaluate(input_fn=lambda: my_input_fn(FILE_TEST, False, 1))
print("Evaluation results: ")
for key in evaluate_result:
    print("  {}, was: {}". format(key, evaluate_result[key]))

In [None]:
# Predict the type of some Iris flowers.
# Let's predict the examples in FILE_TEST, repeat ony once
predict_results = classifier.predict(
    input_fn=lambda: my_input_fn(FILE_TEST, False, 1))
print("Predictions on test file: ")
for prediction in predict_results:
    # Will print the predicted class, i.e: 0, 1, or 2 if the prediction
    # is Iris Sentosa, Vericolor, Virginica, respectively
    print(prediction["class_ids"][0])
    

In [None]:
# Let create a dataset for prediction
# We've taken the first 3 examples in FILE_TEST
prediction_input = [[5.9, 3.0, 4.2, 1.5],  # -> 1, Iris Versicolor
                    [6.9, 3.1, 5.4, 2.1],  # -> 2, Iris :
                    [5.1, 3.3, 1.7, 0.5]]  # -> 0, Iris Sentosa

In [None]:
def new_input_fn():
    def decode(x):
        x = tf.split(x, 4)  # Need to split into our 4 features
        return dict(zip(feature_names, x))  # To build a dict of them

    dataset = tf.data.Dataset.from_tensor_slices(prediction_input)
    dataset = dataset.map(decode)
    iterator = dataset.make_one_shot_iterator()
    next_feature_batch = iterator.get_next()
    return next_feature_batch, None  # In prediction, we have no labels

# Predict all our prediction_input
predict_results = classifier.predict(input_fn=new_input_fn)

In [None]:
# Print results
print("Predictions on memory")
for idx, prediction in enumerate(predict_results):
    type = prediction["class_ids"][0]  # Get the predicted class (index)
    if type == 0:
        print("I think: {}, is Iris Sentosa".format(prediction_input[idx]))
    elif type == 1:
        print("I think: {}, is Iris Versicolor".format(prediction_input[idx]))
    else:
        print("I think: {}, is Iris Virginica".format(prediction_input[idx]))

In [None]:
!tensorboard --logdir=../../../practise/tf_dataset_and_estimator_spi