[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/workflow_with_estimator.ipynb)

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder

In [67]:
df = pd.read_csv("https://dl.dropbox.com/s/84wg1c7055cl5ee/data.csv", sep=",")
df = df[pd.notnull(df["Name"])]
df = df[pd.notnull(df["Width"])]
df = df[pd.notnull(df["Height"])]
df.head()

Unnamed: 0,Name,Width,Height,Length,Dense
0,a small guy,10,23,12,6
1,a beautiful stage,11,24,11,7
2,the a length,4,56,15,1
3,zeros to,5,57,8,2
4,description,6,66,9,8


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, test and classes count.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_test, y_train, y_test, class_num)
      
  """
  encoder = LabelEncoder()
  labels = encoder.fit_transform(df[label_name])
  class_num = np.max(labels) + 1
   
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.5, random_state=random_seed)
  lines = len(ds_train)
  label_train = labels[:lines]
  
  ds_test = df.drop(ds_train.index)
  label_test = labels[lines:]
   
  return ds_train, ds_test, label_train, label_test, class_num

In [0]:
ds_train, ds_test, y_train, y_test, class_num = make_dataset_and_labels_and_class_num(df, "Name")

In [70]:
print("class: {}".format(class_num))

class: 7


In [71]:
ds_train.head()

Unnamed: 0,Name,Width,Height,Length,Dense
4,description,6,66,9,8
5,can learn,7,77,14,10
2,the a length,4,56,15,1
1,a beautiful stage,11,24,11,7


In [72]:
ds_test.head()

Unnamed: 0,Name,Width,Height,Length,Dense
0,a small guy,10,23,12,6
3,zeros to,5,57,8,2
6,performs better,8,88,7,8


In [0]:
x_train = ds_train[["Width", "Height"]]
x_test = ds_test[["Width", "Height"]]

In [74]:
x_train.head()

Unnamed: 0,Width,Height
4,6,66
5,7,77
2,4,56
1,11,24


In [75]:
x_test.head()

Unnamed: 0,Width,Height
0,10,23
3,5,57
6,8,88


In [76]:
y_train

array([1, 0, 5, 6])

In [77]:
type(y_train)

numpy.ndarray

In [78]:
y_test

array([3, 2, 4])

In [79]:
type(y_test)

numpy.ndarray

In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
test_input_fn =lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 29000  # Steps of train loop.

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("Width"),
  tf.feature_column.numeric_column("Height")
]

In [85]:
# Set up a linear classifier.
model = tf.estimator.LinearClassifier(
    feature_columns = feature_cols,
    n_classes = class_num
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp1drs42rs', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2a44663940>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [86]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp1drs42rs/model.ckpt.
INFO:tensorflow:loss = 1.9459102, step = 1
INFO:tensorflow:global_step/sec: 922.364
INFO:tensorflow:loss = 0.030335533, step = 101 (0.113 sec)
INFO:tensorflow:global_step/sec: 1469.07
INFO:tensorflow:loss = 1.5158117, step = 201 (0.068 sec)
INFO:tensorflow:global_step/sec: 1461.44
INFO:tensorflow:loss = 0.07971874, step = 301 (0.066 sec)
INFO:tensorflow:global_step/sec: 1407.37
INFO:tensorflow:loss = 1.0155276, step = 401 (0.074 sec)
INFO:tensorflow:global_step/sec: 1391.75
INFO:tensorflow:loss = 0.014244661, step = 501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1453.58
INFO:tensorflow:loss = 1.1547837, step = 601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1433.53
INFO:tensor

INFO:tensorflow:loss = 0.29349995, step = 2801 (0.068 sec)
INFO:tensorflow:global_step/sec: 1460.53
INFO:tensorflow:loss = 0.2570638, step = 2901 (0.068 sec)
INFO:tensorflow:global_step/sec: 1410.33
INFO:tensorflow:loss = 0.005593719, step = 3001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1451.04
INFO:tensorflow:loss = 0.004634117, step = 3101 (0.069 sec)
INFO:tensorflow:global_step/sec: 1471.73
INFO:tensorflow:loss = 1.7460806, step = 3201 (0.068 sec)
INFO:tensorflow:global_step/sec: 1433.23
INFO:tensorflow:loss = 0.56374353, step = 3301 (0.070 sec)
INFO:tensorflow:global_step/sec: 1480.37
INFO:tensorflow:loss = 0.35819095, step = 3401 (0.069 sec)
INFO:tensorflow:global_step/sec: 1415.23
INFO:tensorflow:loss = 0.5168103, step = 3501 (0.068 sec)
INFO:tensorflow:global_step/sec: 1387.62
INFO:tensorflow:loss = 0.34869426, step = 3601 (0.072 sec)
INFO:tensorflow:global_step/sec: 1423.36
INFO:tensorflow:loss = 0.0037037602, step = 3701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1422.25

INFO:tensorflow:loss = 0.40848458, step = 5901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1418.21
INFO:tensorflow:loss = 0.5153492, step = 6001 (0.069 sec)
INFO:tensorflow:global_step/sec: 1500.23
INFO:tensorflow:loss = 0.820462, step = 6101 (0.066 sec)
INFO:tensorflow:global_step/sec: 1403.68
INFO:tensorflow:loss = 0.68466175, step = 6201 (0.071 sec)
INFO:tensorflow:global_step/sec: 1441.83
INFO:tensorflow:loss = 0.27541628, step = 6301 (0.070 sec)
INFO:tensorflow:global_step/sec: 1449.6
INFO:tensorflow:loss = 0.33649784, step = 6401 (0.069 sec)
INFO:tensorflow:global_step/sec: 1392.69
INFO:tensorflow:loss = 0.00232783, step = 6501 (0.074 sec)
INFO:tensorflow:global_step/sec: 1405.02
INFO:tensorflow:loss = 0.6441235, step = 6601 (0.070 sec)
INFO:tensorflow:global_step/sec: 1484.72
INFO:tensorflow:loss = 0.26416495, step = 6701 (0.067 sec)
INFO:tensorflow:global_step/sec: 1333.92
INFO:tensorflow:loss = 0.0020932686, step = 6801 (0.078 sec)
INFO:tensorflow:global_step/sec: 1468.1
INFO

INFO:tensorflow:loss = 0.0019783466, step = 9001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1384.33
INFO:tensorflow:loss = 0.79214835, step = 9101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1318.41
INFO:tensorflow:loss = 1.0824136, step = 9201 (0.076 sec)
INFO:tensorflow:global_step/sec: 1427.24
INFO:tensorflow:loss = 0.0016358816, step = 9301 (0.071 sec)
INFO:tensorflow:global_step/sec: 1317.05
INFO:tensorflow:loss = 0.0021277664, step = 9401 (0.076 sec)
INFO:tensorflow:global_step/sec: 1436.06
INFO:tensorflow:loss = 0.5934705, step = 9501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1399.81
INFO:tensorflow:loss = 0.0015191216, step = 9601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1395.2
INFO:tensorflow:loss = 0.861228, step = 9701 (0.072 sec)
INFO:tensorflow:global_step/sec: 1448.1
INFO:tensorflow:loss = 0.4629522, step = 9801 (0.072 sec)
INFO:tensorflow:global_step/sec: 1420.15
INFO:tensorflow:loss = 0.20992263, step = 9901 (0.067 sec)
INFO:tensorflow:global_step/sec: 1438.12

INFO:tensorflow:loss = 0.7002422, step = 12101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1392.12
INFO:tensorflow:loss = 0.8630097, step = 12201 (0.075 sec)
INFO:tensorflow:global_step/sec: 1392.1
INFO:tensorflow:loss = 0.8144072, step = 12301 (0.071 sec)
INFO:tensorflow:global_step/sec: 1443.04
INFO:tensorflow:loss = 0.001457463, step = 12401 (0.067 sec)
INFO:tensorflow:global_step/sec: 1392.93
INFO:tensorflow:loss = 0.0011600677, step = 12501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1443.52
INFO:tensorflow:loss = 0.0012891324, step = 12601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1410.33
INFO:tensorflow:loss = 0.001765837, step = 12701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1415.98
INFO:tensorflow:loss = 1.0641587, step = 12801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1417.78
INFO:tensorflow:loss = 0.12559493, step = 12901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1369.82
INFO:tensorflow:loss = 0.12547591, step = 13001 (0.073 sec)
INFO:tensorflow:global_step/se

INFO:tensorflow:loss = 0.00091094, step = 15201 (0.069 sec)
INFO:tensorflow:global_step/sec: 1423.5
INFO:tensorflow:loss = 0.0011890016, step = 15301 (0.069 sec)
INFO:tensorflow:global_step/sec: 1424.99
INFO:tensorflow:loss = 0.35031292, step = 15401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1442.54
INFO:tensorflow:loss = 0.0009889479, step = 15501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1417.41
INFO:tensorflow:loss = 0.53164744, step = 15601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1484.62
INFO:tensorflow:loss = 0.6357352, step = 15701 (0.068 sec)
INFO:tensorflow:global_step/sec: 1446.4
INFO:tensorflow:loss = 0.338771, step = 15801 (0.072 sec)
INFO:tensorflow:global_step/sec: 1348.34
INFO:tensorflow:loss = 0.8417874, step = 15901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1437.72
INFO:tensorflow:loss = 0.0009234455, step = 16001 (0.067 sec)
INFO:tensorflow:global_step/sec: 1481.74
INFO:tensorflow:loss = 0.5354677, step = 16101 (0.067 sec)
INFO:tensorflow:global_step/sec:

INFO:tensorflow:loss = 0.90596116, step = 18301 (0.069 sec)
INFO:tensorflow:global_step/sec: 1444.81
INFO:tensorflow:loss = 0.921328, step = 18401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1410.46
INFO:tensorflow:loss = 0.40690532, step = 18501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1391.49
INFO:tensorflow:loss = 0.6945695, step = 18601 (0.073 sec)
INFO:tensorflow:global_step/sec: 1222.89
INFO:tensorflow:loss = 0.0008512687, step = 18701 (0.087 sec)
INFO:tensorflow:global_step/sec: 1127.37
INFO:tensorflow:loss = 0.14243805, step = 18801 (0.083 sec)
INFO:tensorflow:global_step/sec: 1412.72
INFO:tensorflow:loss = 0.7407271, step = 18901 (0.070 sec)
INFO:tensorflow:global_step/sec: 1446.94
INFO:tensorflow:loss = 0.6717281, step = 19001 (0.069 sec)
INFO:tensorflow:global_step/sec: 1387.72
INFO:tensorflow:loss = 0.10512149, step = 19101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1434.72
INFO:tensorflow:loss = 0.48168677, step = 19201 (0.070 sec)
INFO:tensorflow:global_step/sec: 1

INFO:tensorflow:loss = 0.0007092821, step = 21401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1397.44
INFO:tensorflow:loss = 0.8050558, step = 21501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1444.62
INFO:tensorflow:loss = 0.14194076, step = 21601 (0.070 sec)
INFO:tensorflow:global_step/sec: 1440.89
INFO:tensorflow:loss = 0.0008261367, step = 21701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1357.14
INFO:tensorflow:loss = 0.81721807, step = 21801 (0.072 sec)
INFO:tensorflow:global_step/sec: 1429.71
INFO:tensorflow:loss = 0.75381756, step = 21901 (0.069 sec)
INFO:tensorflow:global_step/sec: 1412.55
INFO:tensorflow:loss = 0.7077416, step = 22001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1391.26
INFO:tensorflow:loss = 0.104470745, step = 22101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1450.21
INFO:tensorflow:loss = 0.0007727736, step = 22201 (0.068 sec)
INFO:tensorflow:global_step/sec: 1443.04
INFO:tensorflow:loss = 0.5114203, step = 22301 (0.071 sec)
INFO:tensorflow:global_step

INFO:tensorflow:global_step/sec: 1463.4
INFO:tensorflow:loss = 0.09286918, step = 24501 (0.069 sec)
INFO:tensorflow:global_step/sec: 1412.65
INFO:tensorflow:loss = 0.78122425, step = 24601 (0.070 sec)
INFO:tensorflow:global_step/sec: 1413.33
INFO:tensorflow:loss = 0.085641034, step = 24701 (0.074 sec)
INFO:tensorflow:global_step/sec: 1400.8
INFO:tensorflow:loss = 0.07337917, step = 24801 (0.068 sec)
INFO:tensorflow:global_step/sec: 1434.67
INFO:tensorflow:loss = 0.7693222, step = 24901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1397.47
INFO:tensorflow:loss = 0.0007248873, step = 25001 (0.068 sec)
INFO:tensorflow:global_step/sec: 1415.15
INFO:tensorflow:loss = 0.10282223, step = 25101 (0.070 sec)
INFO:tensorflow:global_step/sec: 1435.91
INFO:tensorflow:loss = 0.6396071, step = 25201 (0.073 sec)
INFO:tensorflow:global_step/sec: 1430.21
INFO:tensorflow:loss = 0.3994486, step = 25301 (0.068 sec)
INFO:tensorflow:global_step/sec: 1407.34
INFO:tensorflow:loss = 0.31259412, step = 25401 (0.0

INFO:tensorflow:global_step/sec: 1452.29
INFO:tensorflow:loss = 0.70510906, step = 27601 (0.068 sec)
INFO:tensorflow:global_step/sec: 1450.26
INFO:tensorflow:loss = 0.0007522852, step = 27701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1423.19
INFO:tensorflow:loss = 0.6112323, step = 27801 (0.070 sec)
INFO:tensorflow:global_step/sec: 1323.47
INFO:tensorflow:loss = 0.00070570834, step = 27901 (0.078 sec)
INFO:tensorflow:global_step/sec: 1430.04
INFO:tensorflow:loss = 0.90009654, step = 28001 (0.067 sec)
INFO:tensorflow:global_step/sec: 1326.53
INFO:tensorflow:loss = 0.40136304, step = 28101 (0.075 sec)
INFO:tensorflow:global_step/sec: 1428.31
INFO:tensorflow:loss = 0.00061040366, step = 28201 (0.070 sec)
INFO:tensorflow:global_step/sec: 1448.06
INFO:tensorflow:loss = 0.09757067, step = 28301 (0.069 sec)
INFO:tensorflow:global_step/sec: 1397.71
INFO:tensorflow:loss = 0.90998656, step = 28401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1380.4
INFO:tensorflow:loss = 0.51440185, step = 28

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7f2a44663668>

In [87]:
test = model.evaluate(steps=STEPS, input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-09:18:21
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp1drs42rs/model.ckpt-29000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-26-09:18:21
INFO:tensorflow:Saving dict for global step 29000: accuracy = 0.0, average_loss = 17.444305, global_step = 29000, loss = 17.444305
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 29000: /tmp/tmp1drs42rs/model.ckpt-29000
