[View in Colaboratory](https://colab.research.google.com/github/XinyueZ/tf/blob/master/ipynb/workflow_with_estimator.ipynb)

In [0]:
import tensorflow as tf
from tensorflow.python.data import Dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder

In [89]:
df = pd.read_csv("https://dl.dropbox.com/s/84wg1c7055cl5ee/data.csv", sep=",")
df = df[pd.notnull(df["Name"])]
df = df[pd.notnull(df["Width"])]
df = df[pd.notnull(df["Height"])]
df.head()

Unnamed: 0,Name,Width,Height,Length,Dense
0,a small guy,10,23,12,6
1,a beautiful stage,11,24,11,7
2,the a length,4,56,15,1
3,zeros to,5,57,8,2
4,description,6,66,9,8


In [0]:
def make_dataset_and_labels_and_class_num(df, label_name):
  """This method will prepare dataset, labels for train, test and classes count.
     Args:
      df: DataFrame format of datasource.
      label_name: The name of column in datasource which will be as target for train.
     Return: 
       Tuple of (ds_train, ds_test, y_train, y_test, class_num)
      
  """
  target_label_col = "label" #New column name in original table.
  
  encoder = LabelEncoder()
  label = encoder.fit_transform(df[label_name])
  df.insert(2, target_label_col, label) 
  class_num = np.max(label) + 1
   
  random_seed = None
  np.random.seed(random_seed)
  
  ds_train = df.sample(frac=0.5, random_state=random_seed)
  label_train = ds_train[target_label_col]
  
  ds_test = df.drop(ds_train.index)
  label_test = ds_test[target_label_col]
   
  return ds_train, ds_test, label_train, label_test, class_num

In [0]:
ds_train, ds_test, y_train, y_test, class_num = make_dataset_and_labels_and_class_num(df, "Name")

In [92]:
print("class: {}".format(class_num))

class: 7


In [93]:
ds_train.head()

Unnamed: 0,Name,Width,label,Height,Length,Dense
5,can learn,7,2,77,14,10
4,description,6,3,66,9,8
1,a beautiful stage,11,0,24,11,7
0,a small guy,10,1,23,12,6


In [94]:
ds_test.head()

Unnamed: 0,Name,Width,label,Height,Length,Dense
2,the a length,4,5,56,15,1
3,zeros to,5,6,57,8,2
6,performs better,8,4,88,7,8


In [0]:
x_train = ds_train[["Width", "Height"]]
x_test = ds_test[["Width", "Height"]]

In [96]:
x_train.head()

Unnamed: 0,Width,Height
5,7,77
4,6,66
1,11,24
0,10,23


In [97]:
x_test.head()

Unnamed: 0,Width,Height
2,4,56
3,5,57
6,8,88


In [98]:
y_train

5    2
4    3
1    0
0    1
Name: label, dtype: int64

In [99]:
type(y_train)

pandas.core.series.Series

In [100]:
y_test

2    5
3    6
6    4
Name: label, dtype: int64

In [101]:
type(y_test)

pandas.core.series.Series

In [0]:
def input_fn(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    """Trains a linear regression model of one feature.
  
    Args:
      features: pandas DataFrame of features
      targets: pandas DataFrame of targets
      batch_size: Size of batches to be passed to the model
      shuffle: True or False. Whether to shuffle the data.
      num_epochs: Number of epochs for which data should be repeated. None = repeat indefinitely
    Returns:
      Tuple of (features, labels) for next data batch
    """
 
    # Construct a dataset, and configure batching/repeating
    features = {key:np.array(value) for key,value in dict(features).items()} 
  
    ds = Dataset.from_tensor_slices((features, targets))
    ds = ds.batch(batch_size).repeat(num_epochs)
    
    # Shuffle the data, if specified
    if shuffle:
      ds = ds.shuffle(buffer_size=10000)
    
    # Return the next batch of data
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels

In [0]:
train_input_fn = lambda: input_fn(x_train, y_train)

In [0]:
test_input_fn =lambda: input_fn(x_test, y_test, num_epochs=1, shuffle=False)

In [0]:
STEPS = 29000  # Steps of train loop.

In [0]:
feature_cols = [
  tf.feature_column.numeric_column("Width"),
  tf.feature_column.numeric_column("Height")
]

In [107]:
# Set up a linear classifier.
model = tf.estimator.LinearClassifier(
    feature_columns = feature_cols,
    n_classes = class_num
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmppe9s28eg', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2a445844a8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [108]:
model.train(input_fn=train_input_fn, steps=STEPS)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmppe9s28eg/model.ckpt.
INFO:tensorflow:loss = 1.9459102, step = 1
INFO:tensorflow:global_step/sec: 899.835
INFO:tensorflow:loss = 0.08014849, step = 101 (0.113 sec)
INFO:tensorflow:global_step/sec: 1453.08
INFO:tensorflow:loss = 0.3903152, step = 201 (0.069 sec)
INFO:tensorflow:global_step/sec: 1460.96
INFO:tensorflow:loss = 0.07100366, step = 301 (0.071 sec)
INFO:tensorflow:global_step/sec: 1424.42
INFO:tensorflow:loss = 0.24039735, step = 401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1316.31
INFO:tensorflow:loss = 0.8584721, step = 501 (0.075 sec)
INFO:tensorflow:global_step/sec: 1433.91
INFO:tensorflow:loss = 0.2188447, step = 601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1418.22
INFO:tensorfl

INFO:tensorflow:global_step/sec: 1376.4
INFO:tensorflow:loss = 0.42736065, step = 2901 (0.072 sec)
INFO:tensorflow:global_step/sec: 1426.39
INFO:tensorflow:loss = 0.4595944, step = 3001 (0.070 sec)
INFO:tensorflow:global_step/sec: 1404.43
INFO:tensorflow:loss = 0.59993196, step = 3101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1336.71
INFO:tensorflow:loss = 0.61884177, step = 3201 (0.078 sec)
INFO:tensorflow:global_step/sec: 1412.32
INFO:tensorflow:loss = 1.2438779, step = 3301 (0.068 sec)
INFO:tensorflow:global_step/sec: 1417.05
INFO:tensorflow:loss = 0.49434763, step = 3401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1383.29
INFO:tensorflow:loss = 0.81339926, step = 3501 (0.073 sec)
INFO:tensorflow:global_step/sec: 1427.67
INFO:tensorflow:loss = 0.5151096, step = 3601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1450.07
INFO:tensorflow:loss = 0.4865616, step = 3701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1393.23
INFO:tensorflow:loss = 0.4047092, step = 3801 (0.072 sec)
INFO:t

INFO:tensorflow:loss = 0.28783882, step = 6001 (0.072 sec)
INFO:tensorflow:global_step/sec: 1400.95
INFO:tensorflow:loss = 0.480092, step = 6101 (0.071 sec)
INFO:tensorflow:global_step/sec: 1395.6
INFO:tensorflow:loss = 0.6362454, step = 6201 (0.072 sec)
INFO:tensorflow:global_step/sec: 1359.73
INFO:tensorflow:loss = 0.50444245, step = 6301 (0.074 sec)
INFO:tensorflow:global_step/sec: 1401.85
INFO:tensorflow:loss = 0.38606128, step = 6401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1328.3
INFO:tensorflow:loss = 1.234074, step = 6501 (0.076 sec)
INFO:tensorflow:global_step/sec: 1360.08
INFO:tensorflow:loss = 0.55455375, step = 6601 (0.073 sec)
INFO:tensorflow:global_step/sec: 1439.83
INFO:tensorflow:loss = 0.46605936, step = 6701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1406.5
INFO:tensorflow:loss = 0.8043955, step = 6801 (0.071 sec)
INFO:tensorflow:global_step/sec: 1372.67
INFO:tensorflow:loss = 0.27071846, step = 6901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1414.57
INFO:tens

INFO:tensorflow:loss = 0.34117097, step = 9101 (0.073 sec)
INFO:tensorflow:global_step/sec: 1436.25
INFO:tensorflow:loss = 0.64079404, step = 9201 (0.069 sec)
INFO:tensorflow:global_step/sec: 1461.66
INFO:tensorflow:loss = 0.5422018, step = 9301 (0.068 sec)
INFO:tensorflow:global_step/sec: 1419.48
INFO:tensorflow:loss = 0.464427, step = 9401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1430.57
INFO:tensorflow:loss = 0.7013693, step = 9501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1457.03
INFO:tensorflow:loss = 0.39235023, step = 9601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1452.95
INFO:tensorflow:loss = 0.6591876, step = 9701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1343.26
INFO:tensorflow:loss = 1.0430827, step = 9801 (0.072 sec)
INFO:tensorflow:global_step/sec: 1448.95
INFO:tensorflow:loss = 0.6376288, step = 9901 (0.069 sec)
INFO:tensorflow:global_step/sec: 1424.27
INFO:tensorflow:loss = 0.3506963, step = 10001 (0.070 sec)
INFO:tensorflow:global_step/sec: 1436.6
INFO:ten

INFO:tensorflow:loss = 0.40425128, step = 12201 (0.069 sec)
INFO:tensorflow:global_step/sec: 1475.38
INFO:tensorflow:loss = 0.6558098, step = 12301 (0.067 sec)
INFO:tensorflow:global_step/sec: 1456.65
INFO:tensorflow:loss = 1.3388847, step = 12401 (0.069 sec)
INFO:tensorflow:global_step/sec: 1458.95
INFO:tensorflow:loss = 0.39745018, step = 12501 (0.068 sec)
INFO:tensorflow:global_step/sec: 1089.44
INFO:tensorflow:loss = 0.5255535, step = 12601 (0.095 sec)
INFO:tensorflow:global_step/sec: 1361.28
INFO:tensorflow:loss = 0.7466273, step = 12701 (0.071 sec)
INFO:tensorflow:global_step/sec: 1390.93
INFO:tensorflow:loss = 0.6142076, step = 12801 (0.073 sec)
INFO:tensorflow:global_step/sec: 1439.44
INFO:tensorflow:loss = 0.37329793, step = 12901 (0.071 sec)
INFO:tensorflow:global_step/sec: 1379.4
INFO:tensorflow:loss = 0.5472013, step = 13001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1375.15
INFO:tensorflow:loss = 0.7164017, step = 13101 (0.076 sec)
INFO:tensorflow:global_step/sec: 1402.0

INFO:tensorflow:loss = 0.4427195, step = 15301 (0.069 sec)
INFO:tensorflow:global_step/sec: 1401.72
INFO:tensorflow:loss = 0.6454693, step = 15401 (0.071 sec)
INFO:tensorflow:global_step/sec: 1445.54
INFO:tensorflow:loss = 0.64625275, step = 15501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1396.27
INFO:tensorflow:loss = 0.5473891, step = 15601 (0.070 sec)
INFO:tensorflow:global_step/sec: 1414.18
INFO:tensorflow:loss = 0.47959816, step = 15701 (0.070 sec)
INFO:tensorflow:global_step/sec: 1462.42
INFO:tensorflow:loss = 0.45991665, step = 15801 (0.069 sec)
INFO:tensorflow:global_step/sec: 1464.28
INFO:tensorflow:loss = 0.53690225, step = 15901 (0.068 sec)
INFO:tensorflow:global_step/sec: 1384.35
INFO:tensorflow:loss = 0.3779774, step = 16001 (0.075 sec)
INFO:tensorflow:global_step/sec: 1396.02
INFO:tensorflow:loss = 0.47103316, step = 16101 (0.069 sec)
INFO:tensorflow:global_step/sec: 1471.65
INFO:tensorflow:loss = 0.50996315, step = 16201 (0.071 sec)
INFO:tensorflow:global_step/sec: 13

INFO:tensorflow:loss = 0.4869527, step = 18401 (0.074 sec)
INFO:tensorflow:global_step/sec: 1372.85
INFO:tensorflow:loss = 0.57471544, step = 18501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1405.32
INFO:tensorflow:loss = 0.4826975, step = 18601 (0.072 sec)
INFO:tensorflow:global_step/sec: 1444.07
INFO:tensorflow:loss = 0.4399361, step = 18701 (0.069 sec)
INFO:tensorflow:global_step/sec: 1379.11
INFO:tensorflow:loss = 0.48429114, step = 18801 (0.072 sec)
INFO:tensorflow:global_step/sec: 1436.3
INFO:tensorflow:loss = 0.6268315, step = 18901 (0.070 sec)
INFO:tensorflow:global_step/sec: 1410.52
INFO:tensorflow:loss = 0.31114438, step = 19001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1397.58
INFO:tensorflow:loss = 0.79174525, step = 19101 (0.072 sec)
INFO:tensorflow:global_step/sec: 1466.07
INFO:tensorflow:loss = 1.5833603, step = 19201 (0.068 sec)
INFO:tensorflow:global_step/sec: 1466.84
INFO:tensorflow:loss = 0.5683524, step = 19301 (0.068 sec)
INFO:tensorflow:global_step/sec: 1408.

INFO:tensorflow:loss = 1.0087732, step = 21501 (0.074 sec)
INFO:tensorflow:global_step/sec: 1371.72
INFO:tensorflow:loss = 0.30618027, step = 21601 (0.068 sec)
INFO:tensorflow:global_step/sec: 1430.11
INFO:tensorflow:loss = 0.29055598, step = 21701 (0.070 sec)
INFO:tensorflow:global_step/sec: 1451.06
INFO:tensorflow:loss = 0.6898117, step = 21801 (0.069 sec)
INFO:tensorflow:global_step/sec: 1370.73
INFO:tensorflow:loss = 0.6802828, step = 21901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1422.5
INFO:tensorflow:loss = 0.56775594, step = 22001 (0.070 sec)
INFO:tensorflow:global_step/sec: 1421.5
INFO:tensorflow:loss = 0.2379531, step = 22101 (0.070 sec)
INFO:tensorflow:global_step/sec: 1381.02
INFO:tensorflow:loss = 0.68643594, step = 22201 (0.073 sec)
INFO:tensorflow:global_step/sec: 1418.27
INFO:tensorflow:loss = 0.48556077, step = 22301 (0.070 sec)
INFO:tensorflow:global_step/sec: 1437.99
INFO:tensorflow:loss = 0.368789, step = 22401 (0.070 sec)
INFO:tensorflow:global_step/sec: 1456.4

INFO:tensorflow:loss = 0.5507034, step = 24601 (0.071 sec)
INFO:tensorflow:global_step/sec: 1438.36
INFO:tensorflow:loss = 0.64445436, step = 24701 (0.068 sec)
INFO:tensorflow:global_step/sec: 1365.59
INFO:tensorflow:loss = 0.42357156, step = 24801 (0.073 sec)
INFO:tensorflow:global_step/sec: 1432.39
INFO:tensorflow:loss = 0.65945995, step = 24901 (0.073 sec)
INFO:tensorflow:global_step/sec: 1403.1
INFO:tensorflow:loss = 0.4804027, step = 25001 (0.071 sec)
INFO:tensorflow:global_step/sec: 1367.53
INFO:tensorflow:loss = 0.5156088, step = 25101 (0.073 sec)
INFO:tensorflow:global_step/sec: 1401.68
INFO:tensorflow:loss = 0.46415776, step = 25201 (0.070 sec)
INFO:tensorflow:global_step/sec: 1434.9
INFO:tensorflow:loss = 0.73511076, step = 25301 (0.069 sec)
INFO:tensorflow:global_step/sec: 1311.75
INFO:tensorflow:loss = 0.5212866, step = 25401 (0.077 sec)
INFO:tensorflow:global_step/sec: 1381.99
INFO:tensorflow:loss = 0.71503115, step = 25501 (0.072 sec)
INFO:tensorflow:global_step/sec: 1439

INFO:tensorflow:loss = 0.53500515, step = 27701 (0.075 sec)
INFO:tensorflow:global_step/sec: 1419.6
INFO:tensorflow:loss = 0.58351874, step = 27801 (0.066 sec)
INFO:tensorflow:global_step/sec: 1383.32
INFO:tensorflow:loss = 0.44041926, step = 27901 (0.072 sec)
INFO:tensorflow:global_step/sec: 1316.78
INFO:tensorflow:loss = 0.5267982, step = 28001 (0.077 sec)
INFO:tensorflow:global_step/sec: 1358.8
INFO:tensorflow:loss = 0.636976, step = 28101 (0.073 sec)
INFO:tensorflow:global_step/sec: 1365.13
INFO:tensorflow:loss = 0.32016107, step = 28201 (0.073 sec)
INFO:tensorflow:global_step/sec: 1441.1
INFO:tensorflow:loss = 0.45424318, step = 28301 (0.070 sec)
INFO:tensorflow:global_step/sec: 1399.86
INFO:tensorflow:loss = 0.3891828, step = 28401 (0.074 sec)
INFO:tensorflow:global_step/sec: 1403.4
INFO:tensorflow:loss = 0.6217691, step = 28501 (0.070 sec)
INFO:tensorflow:global_step/sec: 1424.67
INFO:tensorflow:loss = 0.6512524, step = 28601 (0.069 sec)
INFO:tensorflow:global_step/sec: 1403.2
I

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x7f2a44584630>

In [109]:
test = model.evaluate(steps=STEPS, input_fn=test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-07-26-10:03:20
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmppe9s28eg/model.ckpt-29000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-07-26-10:03:20
INFO:tensorflow:Saving dict for global step 29000: accuracy = 0.0, average_loss = 24.396887, global_step = 29000, loss = 24.396887
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 29000: /tmp/tmppe9s28eg/model.ckpt-29000
