# [Effective TensorFlow for Non-Experts (Google IO '17)](http://youtu.be/?v=5DknTFbcGVM)
c:\Users\hcche\Documents\ML YouTube and Moocs\Machine Learning\Misc of ML and AI\Inception slim models API\Effective TensorFlow for Non-Experts (Google I_O '17)-5DknTFbcGVM.mp4                      
# [Estimator demo using Automobile dataset](https://gist.github.com/martinwicke/6838c23abdc53e6bcda36ed9f40cff39)
[Jupyter Notebook martinwicke/automobile.ipynb](https://goo.gl/0OgXiL)




In [None]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

In [None]:
# First thing to do: Download https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data

In [None]:
# We're using pandas to read the CSV file. This is easy for small datasets, but for large and complex datasets,
# tensorflow parsing and processing functions are more powerful.
import pandas as pd
import numpy as np

In [None]:
# The CSV file does not have a header, so we have to fill in column names.
names = [
    'symboling', 
    'normalized-losses', 
    'make', 
    'fuel-type', 
    'aspiration',
    'num-of-doors',
    'body-style',
    'drive-wheels',
    'engine-location',
    'wheel-base',
    'length',
    'width',
    'height',
    'curb-weight',
    'engine-type',
    'num-of-cylinders',
    'engine-size',
    'fuel-system',
    'bore',
    'stroke',
    'compression-ratio',
    'horsepower',
    'peak-rpm',
    'city-mpg',
    'highway-mpg',
    'price',
]

# We also have to specify dtypes.
dtypes = {
    'symboling': np.int32, 
    'normalized-losses': np.float32, 
    'make': str, 
    'fuel-type': str, 
    'aspiration': str,
    'num-of-doors': str,
    'body-style': str,
    'drive-wheels': str,
    'engine-location': str,
    'wheel-base': np.float32,
    'length': np.float32,
    'width': np.float32,
    'height': np.float32,
    'curb-weight': np.float32,
    'engine-type': str,
    'num-of-cylinders': str,
    'engine-size': np.float32,
    'fuel-system': str,
    'bore': np.float32,
    'stroke': np.float32,
    'compression-ratio': np.float32,
    'horsepower': np.float32,
    'peak-rpm': np.float32,
    'city-mpg': np.float32,
    'highway-mpg': np.float32,
    'price': np.float32,    
}

In [None]:
# Read the file.
df = pd.read_csv('imports-85.data', names=names, dtype=dtypes, na_values='?')

In [6]:
# Some rows don't have price data, we can't use those.
df = df.dropna(axis='rows', how='any', subset=['price'])

In [7]:
# Fill missing values in continuous columns with zeros instead of NaN.
float_columns = [k for k,v in dtypes.items() if v == np.float32]
df[float_columns] = df[float_columns].fillna(value=0., axis='columns')
# Fill missing values in continuous columns with '' instead of NaN (NaN mixed with strings is very bad for us).
string_columns = [k for k,v in dtypes.items() if v == str]
df[string_columns] = df[string_columns].fillna(value='', axis='columns')

In [8]:
# Split the data into a training set and an eval set.
training_data = df[:160]
eval_data = df[160:]

# Separate input features from labels
training_label = training_data.pop('price')
eval_label = eval_data.pop('price')

In [9]:
# Now we can start using some TensorFlow.
import tensorflow as tf
print('please make sure that version >= 1.2:')
print(tf.__version__)

please make sure that version >= 1.2:
1.4.0


In [10]:
# Make input function for training: 
#   num_epochs=None -> will cycle through input data forever
#   shuffle=True -> randomize order of input data
training_input_fn = tf.estimator.inputs.pandas_input_fn(x=training_data, y=training_label, batch_size=64, shuffle=True, num_epochs=None)

# Make input function for evaluation:
#   shuffle=False -> do not randomize input data
eval_input_fn = tf.estimator.inputs.pandas_input_fn(x=eval_data, y=eval_label, batch_size=64, shuffle=False)

In [11]:
# Describe how the model should interpret the inputs. The names of the feature columns have to match the names
# of the series in the dataframe.

symboling = tf.feature_column.numeric_column('symboling')
normalized_losses = tf.feature_column.numeric_column('normalized-losses')
make = tf.feature_column.categorical_column_with_hash_bucket('make', 50)
fuel_type = tf.feature_column.categorical_column_with_vocabulary_list('fuel-type', vocabulary_list=['diesel', 'gas'])
aspiration = tf.feature_column.categorical_column_with_vocabulary_list('aspiration', vocabulary_list=['std', 'turbo'])
num_of_doors = tf.feature_column.categorical_column_with_vocabulary_list('num-of-doors', vocabulary_list=['two', 'four'])
body_style = tf.feature_column.categorical_column_with_vocabulary_list('body-style', vocabulary_list=['hardtop', 'wagon', 'sedan', 'hatchback', 'convertible'])
drive_wheels = tf.feature_column.categorical_column_with_vocabulary_list('drive-wheels', vocabulary_list=['4wd', 'rwd', 'fwd'])
engine_location = tf.feature_column.categorical_column_with_vocabulary_list('engine-location', vocabulary_list=['front', 'rear'])
wheel_base = tf.feature_column.numeric_column('wheel-base')
length = tf.feature_column.numeric_column('length')
width = tf.feature_column.numeric_column('width')
height = tf.feature_column.numeric_column('height')
curb_weight = tf.feature_column.numeric_column('curb-weight')
engine_type = tf.feature_column.categorical_column_with_vocabulary_list('engine-type', ['dohc', 'dohcv', 'l', 'ohc', 'ohcf', 'ohcv', 'rotor'])
num_of_cylinders = tf.feature_column.categorical_column_with_vocabulary_list('num-of-cylinders', ['eight', 'five', 'four', 'six', 'three', 'twelve', 'two'])
engine_size = tf.feature_column.numeric_column('engine-size')
fuel_system = tf.feature_column.categorical_column_with_vocabulary_list('fuel-system', ['1bbl', '2bbl', '4bbl', 'idi', 'mfi', 'mpfi', 'spdi', 'spfi'])
bore = tf.feature_column.numeric_column('bore')
stroke = tf.feature_column.numeric_column('stroke')
compression_ratio = tf.feature_column.numeric_column('compression-ratio')
horsepower = tf.feature_column.numeric_column('horsepower')
peak_rpm = tf.feature_column.numeric_column('peak-rpm')
city_mpg = tf.feature_column.numeric_column('city-mpg')
highway_mpg = tf.feature_column.numeric_column('highway-mpg')

In [12]:
linear_features = [symboling, normalized_losses, make, fuel_type, aspiration, num_of_doors,
                   body_style, drive_wheels, engine_location, wheel_base, length, width,
                   height, curb_weight, engine_type, num_of_cylinders, engine_size, fuel_system,
                   bore, stroke, compression_ratio, horsepower, peak_rpm, city_mpg, highway_mpg]

In [13]:
regressor = tf.contrib.learn.LinearRegressor(feature_columns=linear_features)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002573CDEAEB8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'C:\\Users\\hcche\\AppData\\Local\\Temp\\tmpwpz9a13i'}


In [14]:
regressor.fit(input_fn=training_input_fn, steps=10000)

Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\hcche\AppData\Local\Temp\tmpwpz9a13i\model.ckpt.
INFO:tensorflow:loss = 2.67863e+08, step = 1
INFO:tensorflow:global_step/sec: 137.81
INFO:tensorflow:loss = 2.73126e+07, step = 101 (0.731 sec)
INFO:tensorflow:global_step/sec: 218.649
INFO:tensorflow:loss = 6.96823e+07, step = 201 (0.458 sec)
INFO:tensorflow:global_step/sec: 220.688
INFO:tensorflow:loss = 3.98176e+07, step = 301 (0.453 sec)
INFO:tensorflow:global_step/sec: 213.08
INFO:tensorflow:loss = 6.56762e+07, step = 401 (0.468 sec)
INFO:tensorflow:global_step/sec: 224.878
INFO:tensorflow:loss = 5.09723e+07, step = 501 (0.445 sec)
INFO:tensorflow:global_step/sec: 225.197
INFO:tensorflow:loss = 5.01094e+07, step = 601 (0.444 sec)
INFO:tensorflow:global_step/sec: 221.707
INFO:tensorflow:loss = 3.85424e+07, step = 701 (0.450 sec)
INFO:tensorflow:global_step/sec: 224.341

INFO:tensorflow:global_step/sec: 204.109
INFO:tensorflow:loss = 1.3092e+07, step = 7701 (0.490 sec)
INFO:tensorflow:global_step/sec: 214.101
INFO:tensorflow:loss = 2.43536e+07, step = 7801 (0.467 sec)
INFO:tensorflow:global_step/sec: 215.903
INFO:tensorflow:loss = 2.13401e+07, step = 7901 (0.464 sec)
INFO:tensorflow:global_step/sec: 212.247
INFO:tensorflow:loss = 1.60149e+07, step = 8001 (0.471 sec)
INFO:tensorflow:global_step/sec: 199.277
INFO:tensorflow:loss = 1.56256e+07, step = 8101 (0.501 sec)
INFO:tensorflow:global_step/sec: 173.625
INFO:tensorflow:loss = 2.8568e+07, step = 8201 (0.577 sec)
INFO:tensorflow:global_step/sec: 204.863
INFO:tensorflow:loss = 1.49496e+07, step = 8301 (0.487 sec)
INFO:tensorflow:global_step/sec: 207.439
INFO:tensorflow:loss = 2.3783e+07, step = 8401 (0.482 sec)
INFO:tensorflow:global_step/sec: 215.083
INFO:tensorflow:loss = 2.42016e+07, step = 8501 (0.465 sec)
INFO:tensorflow:global_step/sec: 215.96
INFO:tensorflow:loss = 2.35592e+07, step = 8601 (0.465

LinearRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x000002573C07FF60>, 'feature_columns': [_NumericColumn(key='symboling', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='normalized-losses', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _HashedCategoricalColumn(key='make', hash_bucket_size=50, dtype=tf.string), _VocabularyListCategoricalColumn(key='fuel-type', vocabulary_list=('diesel', 'gas'), dtype=tf.string, default_value=-1, num_oov_buckets=0), _VocabularyListCategoricalColumn(key='aspiration', vocabulary_list=('std', 'turbo'), dtype=tf.string, default_value=-1, num_oov_buckets=0), _VocabularyListCategoricalColumn(key='num-of-doors', vocabulary_list=('two', 'four'), dtype=tf.string, default_value=-1, num_oov_buckets=0), _VocabularyListCategoricalColumn(key='body-style', vocabulary_list=('hardtop', 'wagon', 'sedan', 'hatchback', 'convertible'), dtype=tf.

In [15]:
regressor.evaluate(input_fn=eval_input_fn)

INFO:tensorflow:Starting evaluation at 2018-01-03-09:52:08
INFO:tensorflow:Restoring parameters from C:\Users\hcche\AppData\Local\Temp\tmpwpz9a13i\model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2018-01-03-09:52:10
INFO:tensorflow:Saving dict for global step 10000: global_step = 10000, loss = 7.99516e+06


{'global_step': 10000, 'loss': 7995160.0}

In [16]:
dnn_features = [
    #numerical features
    symboling, normalized_losses, wheel_base, length, width, height, curb_weight, engine_size,
    bore, stroke, compression_ratio, horsepower, peak_rpm, city_mpg, highway_mpg,    
    # densify categorical features:
    tf.feature_column.indicator_column(make),
    tf.feature_column.indicator_column(fuel_type),
    tf.feature_column.indicator_column(aspiration),
    tf.feature_column.indicator_column(num_of_doors),
    tf.feature_column.indicator_column(body_style),
    tf.feature_column.indicator_column(drive_wheels), 
    tf.feature_column.indicator_column(engine_location),
    tf.feature_column.indicator_column(engine_type),
    tf.feature_column.indicator_column(num_of_cylinders),
    tf.feature_column.indicator_column(fuel_system),
]

In [17]:
dnnregressor = tf.contrib.learn.DNNRegressor(feature_columns=dnn_features, hidden_units=[50, 30, 10])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000025740C7F5F8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'C:\\Users\\hcche\\AppData\\Local\\Temp\\tmp8aym0ig1'}


In [18]:
dnnregressor.fit(input_fn=training_input_fn, steps=10000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\hcche\AppData\Local\Temp\tmp8aym0ig1\model.ckpt.
INFO:tensorflow:loss = 2.26867e+08, step = 1
INFO:tensorflow:global_step/sec: 72.4049
INFO:tensorflow:loss = 3.27852e+07, step = 101 (1.394 sec)
INFO:tensorflow:global_step/sec: 84.5759
INFO:tensorflow:loss = 1.55734e+07, step = 201 (1.181 sec)
INFO:tensorflow:global_step/sec: 128.622
INFO:tensorflow:loss = 1.37408e+07, step = 301 (0.768 sec)
INFO:tensorflow:global_step/sec: 218.544
INFO:tensorflow:loss = 1.29803e+07, step = 401 (0.459 sec)
INFO:tensorflow:global_step/sec: 195.831
INFO:tensorflow:loss = 1.53082e+07, step = 501 (0.511 sec)
INFO:tensorflow:global_step/sec: 215.922
INFO:tensorflow:loss = 8.32073e+06, step = 601 (0.462 sec)
INFO:tensorflow:global_step/sec: 214.789
INFO:tensorflow:loss = 8.17437e+06, step = 701 (0.466 sec)
INFO:tensorflow:global_step/sec: 218.347
INFO:tensorflow:loss = 1.0526e+07, step = 801 (0.458 sec)
INFO:ten

INFO:tensorflow:global_step/sec: 186.068
INFO:tensorflow:loss = 5.12467e+06, step = 8101 (0.517 sec)
INFO:tensorflow:global_step/sec: 227.756
INFO:tensorflow:loss = 2.80731e+06, step = 8201 (0.438 sec)
INFO:tensorflow:global_step/sec: 160.688
INFO:tensorflow:loss = 2.1825e+06, step = 8301 (0.628 sec)
INFO:tensorflow:global_step/sec: 159.366
INFO:tensorflow:loss = 3.56868e+06, step = 8401 (0.624 sec)
INFO:tensorflow:global_step/sec: 222.05
INFO:tensorflow:loss = 4.54819e+06, step = 8501 (0.451 sec)
INFO:tensorflow:global_step/sec: 107.002
INFO:tensorflow:loss = 1.89633e+06, step = 8601 (0.947 sec)
INFO:tensorflow:global_step/sec: 119.193
INFO:tensorflow:loss = 1.32381e+06, step = 8701 (0.830 sec)
INFO:tensorflow:global_step/sec: 131.769
INFO:tensorflow:loss = 2.06012e+06, step = 8801 (0.761 sec)
INFO:tensorflow:global_step/sec: 137.812
INFO:tensorflow:loss = 3.0996e+06, step = 8901 (0.720 sec)
INFO:tensorflow:global_step/sec: 211.186
INFO:tensorflow:loss = 1.95128e+06, step = 9001 (0.47

DNNRegressor(params={'head': <tensorflow.contrib.learn.python.learn.estimators.head._RegressionHead object at 0x0000025740C7FAC8>, 'hidden_units': [50, 30, 10], 'feature_columns': (_NumericColumn(key='symboling', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='normalized-losses', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='wheel-base', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='height', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='curb-weight', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='engine-size', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn

In [19]:
dnnregressor.evaluate(input_fn=eval_input_fn)

INFO:tensorflow:Starting evaluation at 2018-01-03-09:53:17
INFO:tensorflow:Restoring parameters from C:\Users\hcche\AppData\Local\Temp\tmp8aym0ig1\model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2018-01-03-09:53:18
INFO:tensorflow:Saving dict for global step 10000: global_step = 10000, loss = 1.31596e+07


{'global_step': 10000, 'loss': 13159593.0}

In [20]:
def experiment_fn(run_config, params):
  # This function makes an Experiment, containing an Estimator and inputs for training and evaluation.
  # You can use params and config here to customize the Estimator depending on the cluster or to use
  # hyperparameter tuning.

  # Collect information for training
  return tf.contrib.learn.Experiment(estimator=tf.contrib.learn.LinearRegressor(
                                     feature_columns=linear_features, config=run_config),
                                     train_input_fn=training_input_fn,
                                     train_steps=10000,
                                     eval_input_fn=eval_input_fn)

In [21]:
import shutil
shutil.rmtree("/tmp/output_dir", ignore_errors=True)
tf.contrib.learn.learn_runner.run(experiment_fn, run_config=tf.contrib.learn.RunConfig(model_dir="/tmp/output_dir"))

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000257438A67B8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/output_dir'}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/output_dir\model.ckpt.
INFO:tensorflow:Starting evaluation at 2018-01-03-09:53:34
INFO:tensorflow:Restoring parameters from /tmp/output_dir\model.ckpt-1
INFO:tensorflow:Evalua

INFO:tensorflow:loss = 1.82861e+07, step = 6301 (0.802 sec)
INFO:tensorflow:global_step/sec: 107.784
INFO:tensorflow:loss = 3.71072e+07, step = 6401 (0.933 sec)
INFO:tensorflow:global_step/sec: 88.2642
INFO:tensorflow:loss = 2.53867e+07, step = 6501 (1.130 sec)
INFO:tensorflow:global_step/sec: 117.389
INFO:tensorflow:loss = 1.76848e+07, step = 6601 (0.850 sec)
INFO:tensorflow:global_step/sec: 114.962
INFO:tensorflow:loss = 1.50568e+07, step = 6701 (0.870 sec)
INFO:tensorflow:global_step/sec: 125.914
INFO:tensorflow:loss = 1.88645e+07, step = 6801 (0.794 sec)
INFO:tensorflow:global_step/sec: 85.9959
INFO:tensorflow:loss = 2.74392e+07, step = 6901 (1.163 sec)
INFO:tensorflow:global_step/sec: 125.333
INFO:tensorflow:loss = 2.708e+07, step = 7001 (0.797 sec)
INFO:tensorflow:global_step/sec: 118.692
INFO:tensorflow:loss = 3.52273e+07, step = 7101 (0.844 sec)
INFO:tensorflow:global_step/sec: 126.532
INFO:tensorflow:loss = 1.81042e+07, step = 7201 (0.790 sec)
INFO:tensorflow:global_step/sec: 

({'global_step': 10000, 'loss': 8145233.0}, [])