[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/workflow_with_estimator.ipynb)

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder

In [23]:
df = pd.read_csv("https://dl.dropbox.com/s/84wg1c7055cl5ee/data.csv", sep=",")
df = df[pd.notnull(df["Name"])]
df = df[pd.notnull(df["Width"])]
df = df[pd.notnull(df["Height"])]
df.head()

Unnamed: 0,Name,Width,Height,Length,Dense
0,a small guy,10,23,12,6
1,a beautiful stage,11,24,11,7
2,the a length,4,56,15,1
3,zeros to,5,57,8,2
4,description,6,66,9,8


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, test and classes count.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_test, y_train, y_test, class_num)
      
  """
  encoder = LabelEncoder()
  labels = encoder.fit_transform(df[label_name])
  class_num = np.max(labels) + 1
   
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.5, random_state=random_seed)
  lines = len(ds_train)
  label_train = labels[:lines]
  
  ds_test = df.drop(ds_train.index)
  label_test = labels[lines:]
   
  return ds_train, ds_test, label_train, label_test, class_num

In [0]:
ds_train, ds_test, y_train, y_test, class_num = make_dataset_and_labels_and_class_num(df, "Name")

In [26]:
print("class: {}".format(class_num))

class: 7


In [27]:
ds_train.head()

Unnamed: 0,Name,Width,Height,Length,Dense
6,performs better,8,88,7,8
5,can learn,7,77,14,10
2,the a length,4,56,15,1
1,a beautiful stage,11,24,11,7


In [28]:
ds_test.head()

Unnamed: 0,Name,Width,Height,Length,Dense
0,a small guy,10,23,12,6
3,zeros to,5,57,8,2
4,description,6,66,9,8


In [0]:
x_train = ds_train[["Width", "Height"]]
x_test = ds_test[["Width", "Height"]]

In [30]:
x_train.head()

Unnamed: 0,Width,Height
6,8,88
5,7,77
2,4,56
1,11,24


In [31]:
x_test.head()

Unnamed: 0,Width,Height
0,10,23
3,5,57
4,6,66


In [32]:
y_train

array([1, 0, 5, 6])

In [33]:
type(y_train)

numpy.ndarray

In [34]:
y_test

array([3, 2, 4])

In [35]:
type(y_test)

numpy.ndarray

In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
test_input_fn =lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 29000  # Steps of train loop.

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("Width"),
  tf.feature_column.numeric_column("Height")
]

In [41]:
# Set up a linear classifier.
model = tf.estimator.LinearClassifier(
    feature_columns = feature_cols,
    n_classes = class_num
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp4njobtu9', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2a4820db00>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [42]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp4njobtu9/model.ckpt.
INFO:tensorflow:loss = 1.9459102, step = 1
INFO:tensorflow:global_step/sec: 889.1
INFO:tensorflow:loss = 0.04207887, step = 101 (0.114 sec)
INFO:tensorflow:global_step/sec: 1426.53
INFO:tensorflow:loss = 4.3899727, step = 201 (0.071 sec)
INFO:tensorflow:global_step/sec: 1467.8
INFO:tensorflow:loss = 1.7418668, step = 301 (0.068 sec)
INFO:tensorflow:global_step/sec: 1397.86
INFO:tensorflow:loss = 2.2818735, step = 401 (0.076 sec)
INFO:tensorflow:global_step/sec: 1314.72
INFO:tensorflow:loss = 0.59222823, step = 501 (0.076 sec)
INFO:tensorflow:global_step/sec: 1305.25
INFO:tensorflow:loss = 0.08641404, step = 601 (0.073 sec)
INFO:tensorflow:global_step/sec: 1330.99
INFO:tensorflow:

INFO:tensorflow:loss = 0.0054137525, step = 2801 (0.070 sec)
INFO:tensorflow:global_step/sec: 1420.92
INFO:tensorflow:loss = 0.003969291, step = 2901 (0.066 sec)
INFO:tensorflow:global_step/sec: 1399.64
INFO:tensorflow:loss = 0.21196239, step = 3001 (0.072 sec)
INFO:tensorflow:global_step/sec: 1454.4
INFO:tensorflow:loss = 0.38517135, step = 3101 (0.073 sec)
INFO:tensorflow:global_step/sec: 1367.64
INFO:tensorflow:loss = 0.004364845, step = 3201 (0.071 sec)
INFO:tensorflow:global_step/sec: 1322.81
INFO:tensorflow:loss = 0.004354875, step = 3301 (0.074 sec)
INFO:tensorflow:global_step/sec: 1416.42
INFO:tensorflow:loss = 0.36971578, step = 3401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1430.32
INFO:tensorflow:loss = 0.26231122, step = 3501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1374.42
INFO:tensorflow:loss = 0.9139075, step = 3601 (0.072 sec)
INFO:tensorflow:global_step/sec: 1379.23
INFO:tensorflow:loss = 0.8252488, step = 3701 (0.075 sec)
INFO:tensorflow:global_step/sec: 1419.6

INFO:tensorflow:loss = 0.20587523, step = 5901 (0.068 sec)
INFO:tensorflow:global_step/sec: 1436.3
INFO:tensorflow:loss = 0.00212277, step = 6001 (0.072 sec)
INFO:tensorflow:global_step/sec: 1283.01
INFO:tensorflow:loss = 1.107688, step = 6101 (0.079 sec)
INFO:tensorflow:global_step/sec: 1395.12
INFO:tensorflow:loss = 2.0369956, step = 6201 (0.068 sec)
INFO:tensorflow:global_step/sec: 1398.49
INFO:tensorflow:loss = 0.4399044, step = 6301 (0.072 sec)
INFO:tensorflow:global_step/sec: 1452.56
INFO:tensorflow:loss = 0.4448483, step = 6401 (0.072 sec)
INFO:tensorflow:global_step/sec: 1361.67
INFO:tensorflow:loss = 0.0019035093, step = 6501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1386.73
INFO:tensorflow:loss = 1.28102, step = 6601 (0.074 sec)
INFO:tensorflow:global_step/sec: 1407.6
INFO:tensorflow:loss = 0.15948406, step = 6701 (0.070 sec)
INFO:tensorflow:global_step/sec: 1403.36
INFO:tensorflow:loss = 0.21511513, step = 6801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1422.74
INFO:ten

INFO:tensorflow:loss = 0.64655924, step = 9001 (0.069 sec)
INFO:tensorflow:global_step/sec: 1384.78
INFO:tensorflow:loss = 0.49456277, step = 9101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1416.37
INFO:tensorflow:loss = 0.35537645, step = 9201 (0.071 sec)
INFO:tensorflow:global_step/sec: 1443.03
INFO:tensorflow:loss = 0.13777465, step = 9301 (0.073 sec)
INFO:tensorflow:global_step/sec: 1385.85
INFO:tensorflow:loss = 0.17232567, step = 9401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1331.25
INFO:tensorflow:loss = 0.12065931, step = 9501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1485.7
INFO:tensorflow:loss = 0.13585554, step = 9601 (0.067 sec)
INFO:tensorflow:global_step/sec: 1449.31
INFO:tensorflow:loss = 0.49039876, step = 9701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1403.87
INFO:tensorflow:loss = 1.0306761, step = 9801 (0.075 sec)
INFO:tensorflow:global_step/sec: 1315.62
INFO:tensorflow:loss = 0.0016563518, step = 9901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1403.2
I

INFO:tensorflow:loss = 1.0490785, step = 12101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1391.04
INFO:tensorflow:loss = 0.6593343, step = 12201 (0.070 sec)
INFO:tensorflow:global_step/sec: 1432.4
INFO:tensorflow:loss = 0.34381515, step = 12301 (0.072 sec)
INFO:tensorflow:global_step/sec: 1355.08
INFO:tensorflow:loss = 1.4760182, step = 12401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1439.97
INFO:tensorflow:loss = 0.17057714, step = 12501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1417.73
INFO:tensorflow:loss = 0.54470116, step = 12601 (0.068 sec)
INFO:tensorflow:global_step/sec: 1443.46
INFO:tensorflow:loss = 0.5733899, step = 12701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1379.91
INFO:tensorflow:loss = 0.31003684, step = 12801 (0.073 sec)
INFO:tensorflow:global_step/sec: 1413.78
INFO:tensorflow:loss = 0.5803157, step = 12901 (0.074 sec)
INFO:tensorflow:global_step/sec: 1410.5
INFO:tensorflow:loss = 0.00134973, step = 13001 (0.068 sec)
INFO:tensorflow:global_step/sec: 1396.

INFO:tensorflow:loss = 0.00092940044, step = 15201 (0.073 sec)
INFO:tensorflow:global_step/sec: 1438.8
INFO:tensorflow:loss = 0.5503292, step = 15301 (0.070 sec)
INFO:tensorflow:global_step/sec: 1408.3
INFO:tensorflow:loss = 0.09970639, step = 15401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1463.42
INFO:tensorflow:loss = 1.2614028, step = 15501 (0.068 sec)
INFO:tensorflow:global_step/sec: 1443.82
INFO:tensorflow:loss = 0.09788214, step = 15601 (0.070 sec)
INFO:tensorflow:global_step/sec: 1420.08
INFO:tensorflow:loss = 0.7472572, step = 15701 (0.070 sec)
INFO:tensorflow:global_step/sec: 1485.2
INFO:tensorflow:loss = 0.0011626873, step = 15801 (0.068 sec)
INFO:tensorflow:global_step/sec: 1404.2
INFO:tensorflow:loss = 0.7182027, step = 15901 (0.071 sec)
INFO:tensorflow:global_step/sec: 1422.74
INFO:tensorflow:loss = 0.001107675, step = 16001 (0.069 sec)
INFO:tensorflow:global_step/sec: 1439.61
INFO:tensorflow:loss = 0.0010106224, step = 16101 (0.070 sec)
INFO:tensorflow:global_step/sec

INFO:tensorflow:loss = 0.19825222, step = 18301 (0.072 sec)
INFO:tensorflow:global_step/sec: 1437.12
INFO:tensorflow:loss = 0.41012508, step = 18401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1432.7
INFO:tensorflow:loss = 0.59428847, step = 18501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1410.43
INFO:tensorflow:loss = 0.0009423821, step = 18601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1403.03
INFO:tensorflow:loss = 0.0009924016, step = 18701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1395.13
INFO:tensorflow:loss = 0.58220863, step = 18801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1380.62
INFO:tensorflow:loss = 0.09800557, step = 18901 (0.072 sec)
INFO:tensorflow:global_step/sec: 1426.11
INFO:tensorflow:loss = 0.0010537315, step = 19001 (0.070 sec)
INFO:tensorflow:global_step/sec: 1405.42
INFO:tensorflow:loss = 0.0008721124, step = 19101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1416.69
INFO:tensorflow:loss = 0.092276715, step = 19201 (0.070 sec)
INFO:tensorflow:global_

INFO:tensorflow:loss = 0.83300793, step = 21401 (0.075 sec)
INFO:tensorflow:global_step/sec: 1413.92
INFO:tensorflow:loss = 0.078834444, step = 21501 (0.071 sec)
INFO:tensorflow:global_step/sec: 1453.28
INFO:tensorflow:loss = 0.8091438, step = 21601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1283.2
INFO:tensorflow:loss = 0.67621416, step = 21701 (0.081 sec)
INFO:tensorflow:global_step/sec: 1362.59
INFO:tensorflow:loss = 0.16096471, step = 21801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1327.5
INFO:tensorflow:loss = 1.0974065, step = 21901 (0.077 sec)
INFO:tensorflow:global_step/sec: 1395.96
INFO:tensorflow:loss = 0.4521209, step = 22001 (0.069 sec)
INFO:tensorflow:global_step/sec: 1465.53
INFO:tensorflow:loss = 0.076141074, step = 22101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1332.14
INFO:tensorflow:loss = 0.0009837078, step = 22201 (0.072 sec)
INFO:tensorflow:global_step/sec: 1411.71
INFO:tensorflow:loss = 1.3467596, step = 22301 (0.071 sec)
INFO:tensorflow:global_step/sec: 

INFO:tensorflow:loss = 0.00072262395, step = 24501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1254.95
INFO:tensorflow:loss = 0.06720382, step = 24601 (0.080 sec)
INFO:tensorflow:global_step/sec: 1296.82
INFO:tensorflow:loss = 0.29872674, step = 24701 (0.079 sec)
INFO:tensorflow:global_step/sec: 1331.6
INFO:tensorflow:loss = 0.51530164, step = 24801 (0.073 sec)
INFO:tensorflow:global_step/sec: 1336.1
INFO:tensorflow:loss = 0.50854117, step = 24901 (0.075 sec)
INFO:tensorflow:global_step/sec: 1318.79
INFO:tensorflow:loss = 0.6529239, step = 25001 (0.076 sec)
INFO:tensorflow:global_step/sec: 1379.86
INFO:tensorflow:loss = 0.31278843, step = 25101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1400.17
INFO:tensorflow:loss = 0.61988264, step = 25201 (0.072 sec)
INFO:tensorflow:global_step/sec: 1363.42
INFO:tensorflow:loss = 0.06508961, step = 25301 (0.073 sec)
INFO:tensorflow:global_step/sec: 1306.48
INFO:tensorflow:loss = 0.00074025407, step = 25401 (0.079 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:loss = 0.8875049, step = 27601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1438.6
INFO:tensorflow:loss = 0.09224899, step = 27701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1376.21
INFO:tensorflow:loss = 0.5841892, step = 27801 (0.073 sec)
INFO:tensorflow:global_step/sec: 1365.71
INFO:tensorflow:loss = 0.9635234, step = 27901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1389.7
INFO:tensorflow:loss = 0.065546796, step = 28001 (0.076 sec)
INFO:tensorflow:global_step/sec: 1366.63
INFO:tensorflow:loss = 0.00073036697, step = 28101 (0.069 sec)
INFO:tensorflow:global_step/sec: 1343.97
INFO:tensorflow:loss = 0.47682518, step = 28201 (0.075 sec)
INFO:tensorflow:global_step/sec: 1384.34
INFO:tensorflow:loss = 0.060680047, step = 28301 (0.074 sec)
INFO:tensorflow:global_step/sec: 1474.25
INFO:tensorflow:loss = 0.06758372, step = 28401 (0.066 sec)
INFO:tensorflow:global_step/sec: 1363.87
INFO:tensorflow:loss = 0.535323, step = 28501 (0.073 sec)
INFO:tensorflow:global_step/sec: 

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7f2a4820d7b8>

In [43]:
# Use it to predict.
test = model.evaluate(steps=STEPS, input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-07:45:41
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp4njobtu9/model.ckpt-29000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-26-07:45:42
INFO:tensorflow:Saving dict for global step 29000: accuracy = 0.0, average_loss = 15.7104645, global_step = 29000, loss = 15.7104645
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 29000: /tmp/tmp4njobtu9/model.ckpt-29000
