## Geneal steps:
    1. Write dataset importing function(s).
    2. Define the feature columns.
    3. Instantiate the Estimator.
    4. Call a training, evaluation, or inference method.

In [None]:
import tensorflow as tf

import pandas as pd

In [None]:
# Create a bucketized feature column
numeric_feature_column = tf.feature_column.numeric_column('Year')
bucketized_feature_column = tf.feature_column.bucketized_column(
    source_column=numeric_feature_column, boundaries=[1960, 1980, 2000])

In [None]:
# Create a categorical identity column (one-hot code) for integers
identity_feature_column = tf.feature_column.categorical_column_with_identity(
    key='my_feature', num_buckets=4)

In [None]:
# Create vocabulary column for strings
vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_list(
    key='feature_name',vocabulary_list=['apple','cake','dog'])

vocabulary_feature_column = tf.feature_column.categorical_column_with_vocabulary_file(
    key='feature_name',vocabulary_file='file_name')


In [None]:
# Hashed column
hashed_feature_column = tf.feature_column.categorical_column_with_hash_bucket(
    key='some_name', hash_bucket_size=100) # The number of categories

In [None]:
# Crossed column (interaction term of two categorical variables)
tf.feature_column.crossed_column()

In [None]:
tf.feature_column.indicator_column()

# Dataset fro Estimator

In [None]:
import sys

sys.path.append('/home/chsu/workspace/tensorflow/models/samples/core/get_started')

import iris_data

In [16]:
train, test = iris_data.load_data()
features, labels = train

In [18]:
dataset = tf.data.Dataset.from_tensor_slices(dict(features))
print(dataset)

<TensorSliceDataset shapes: {SepalLength: (), SepalWidth: (), PetalLength: (), PetalWidth: ()}, types: {SepalLength: tf.float64, SepalWidth: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}>


In [20]:
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
print(dataset)

<TensorSliceDataset shapes: ({SepalLength: (), SepalWidth: (), PetalLength: (), PetalWidth: ()}, ()), types: ({SepalLength: tf.float64, SepalWidth: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>


In [21]:
dataset = dataset.shuffle(1000).repeat().batch(32)
print(dataset)

<BatchDataset shapes: ({SepalLength: (?,), SepalWidth: (?,), PetalLength: (?,), PetalWidth: (?,)}, (?,)), types: ({SepalLength: tf.float64, SepalWidth: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>


In [22]:
# Build a dataset by reading a csv file
train_path, test_path = iris_data.maybe_download()
print(train_path)
print(test_path)

/home/chsu/.keras/datasets/iris_training.csv
/home/chsu/.keras/datasets/iris_test.csv


In [27]:
ds = tf.data.TextLineDataset(train_path).skip(1)

In [None]:
tf.decode_csv()

# Custom Estimator

In [29]:
def my_model(features, labels, mode, params):
    # 1. Define the input layer
    net = tf.feature_column.input_layer(features, params['feature_columns'])
    
    # 2. Make hidden layers
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
        
    # 3. Make the output layer (logits, one per class)
    logits = tf.layers.dense(net, units=params['n_classes'], activation=None)
    
    # Prediction
    predicted_classes = tf.argmax(logits, 1)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted_classes[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': logits
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    
    # Compute loss
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    
    # Evaluation
    accuracy = tf.metrics.accuracy(labels, predicted_classes, name='acc_op')
    metrics = {'accuracy': accuracy}
    tf.summary.scalar('accuracy', accuracy[1])
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss==loss, eval_metric_ops=metrics)
    
    # Training
    assert mode == tf.estimator.ModeKeys.TRAIN
    
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
    
    
    