# [Effective TensorFlow for Non-Experts (Google IO '17)](http://youtu.be/?v=5DknTFbcGVM)
c:\Users\hcche\Documents\ML YouTube and Moocs\Machine Learning\Misc of ML and AI\Inception slim models API\Effective TensorFlow for Non-Experts (Google I_O '17)-5DknTFbcGVM.mp4                      
# [Estimator demo using Automobile dataset](https://gist.github.com/martinwicke/6838c23abdc53e6bcda36ed9f40cff39)
[Jupyter Notebook martinwicke/automobile.ipynb](https://goo.gl/0OgXiL)




In [50]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

In [51]:
# First thing to do: Download https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data

In [52]:
# We're using pandas to read the CSV file. This is easy for small datasets, but for large and complex datasets,
# tensorflow parsing and processing functions are more powerful.
import pandas as pd
import numpy as np

In [53]:
# The CSV file does not have a header, so we have to fill in column names.
names = [
    'symboling', 
    'normalized-losses', 
    'make', 
    'fuel-type', 
    'aspiration',
    'num-of-doors',
    'body-style',
    'drive-wheels',
    'engine-location',
    'wheel-base',
    'length',
    'width',
    'height',
    'curb-weight',
    'engine-type',
    'num-of-cylinders',
    'engine-size',
    'fuel-system',
    'bore',
    'stroke',
    'compression-ratio',
    'horsepower',
    'peak-rpm',
    'city-mpg',
    'highway-mpg',
    'price',
]

# We also have to specify dtypes.
dtypes = {
    'symboling': np.int32, 
    'normalized-losses': np.float32, 
    'make': str, 
    'fuel-type': str, 
    'aspiration': str,
    'num-of-doors': str,
    'body-style': str,
    'drive-wheels': str,
    'engine-location': str,
    'wheel-base': np.float32,
    'length': np.float32,
    'width': np.float32,
    'height': np.float32,
    'curb-weight': np.float32,
    'engine-type': str,
    'num-of-cylinders': str,
    'engine-size': np.float32,
    'fuel-system': str,
    'bore': np.float32,
    'stroke': np.float32,
    'compression-ratio': np.float32,
    'horsepower': np.float32,
    'peak-rpm': np.float32,
    'city-mpg': np.float32,
    'highway-mpg': np.float32,
    'price': np.float32,    
}

In [54]:
# Read the file.
df = pd.read_csv('imports-85.data', names=names, dtype=dtypes, na_values='?')

In [55]:
# Some rows don't have price data, we can't use those.
df = df.dropna(axis='rows', how='any', subset=['price'])

In [56]:
# Fill missing values in continuous columns with zeros instead of NaN.
float_columns = [k for k,v in dtypes.items() if v == np.float32]
df[float_columns] = df[float_columns].fillna(value=0., axis='columns')
# Fill missing values in continuous columns with '' instead of NaN (NaN mixed with strings is very bad for us).
string_columns = [k for k,v in dtypes.items() if v == str]
df[string_columns] = df[string_columns].fillna(value='', axis='columns')

In [57]:
# Split the data into a training set and an eval set.
training_data = df[:160]
eval_data = df[160:]

# Separate input features from labels
training_label = training_data.pop('price')
eval_label = eval_data.pop('price')

In [58]:
# Now we can start using some TensorFlow.
import tensorflow as tf
print('please make sure that version >= 1.2:')
print(tf.__version__)

please make sure that version >= 1.2:
1.4.0


In [59]:
# Make input function for training: 
#   num_epochs=None -> will cycle through input data forever
#   shuffle=True -> randomize order of input data
training_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=training_data, y=training_label, 
    batch_size=64, shuffle=True, num_epochs=None)

# Make input function for evaluation:
#   shuffle=False -> do not randomize input data
eval_input_fn = tf.estimator.inputs.pandas_input_fn(
    x=eval_data, y=eval_label, batch_size=64, shuffle=False)

In [60]:
# Describe how the model should interpret the inputs. 
# The names of the feature columns have to match the names
# of the series in the dataframe.
# 

symboling = tf.feature_column.numeric_column('symboling')
normalized_losses = tf.feature_column.numeric_column('normalized-losses')
make = tf.feature_column.categorical_column_with_hash_bucket('make', 50)
fuel_type = tf.feature_column.categorical_column_with_vocabulary_list('fuel-type', vocabulary_list=['diesel', 'gas'])
aspiration = tf.feature_column.categorical_column_with_vocabulary_list('aspiration', vocabulary_list=['std', 'turbo'])
num_of_doors = tf.feature_column.categorical_column_with_vocabulary_list('num-of-doors', vocabulary_list=['two', 'four'])
body_style = tf.feature_column.categorical_column_with_vocabulary_list('body-style', vocabulary_list=['hardtop', 'wagon', 'sedan', 'hatchback', 'convertible'])
drive_wheels = tf.feature_column.categorical_column_with_vocabulary_list('drive-wheels', vocabulary_list=['4wd', 'rwd', 'fwd'])
engine_location = tf.feature_column.categorical_column_with_vocabulary_list('engine-location', vocabulary_list=['front', 'rear'])
wheel_base = tf.feature_column.numeric_column('wheel-base')
length = tf.feature_column.numeric_column('length')
width = tf.feature_column.numeric_column('width')
height = tf.feature_column.numeric_column('height')
curb_weight = tf.feature_column.numeric_column('curb-weight')
engine_type = tf.feature_column.categorical_column_with_vocabulary_list('engine-type', ['dohc', 'dohcv', 'l', 'ohc', 'ohcf', 'ohcv', 'rotor'])
num_of_cylinders = tf.feature_column.categorical_column_with_vocabulary_list('num-of-cylinders', ['eight', 'five', 'four', 'six', 'three', 'twelve', 'two'])
engine_size = tf.feature_column.numeric_column('engine-size')
fuel_system = tf.feature_column.categorical_column_with_vocabulary_list('fuel-system', ['1bbl', '2bbl', '4bbl', 'idi', 'mfi', 'mpfi', 'spdi', 'spfi'])
bore = tf.feature_column.numeric_column('bore')
stroke = tf.feature_column.numeric_column('stroke')
compression_ratio = tf.feature_column.numeric_column('compression-ratio')
horsepower = tf.feature_column.numeric_column('horsepower')
peak_rpm = tf.feature_column.numeric_column('peak-rpm')
city_mpg = tf.feature_column.numeric_column('city-mpg')
highway_mpg = tf.feature_column.numeric_column('highway-mpg')

In [61]:
linear_features = [symboling, normalized_losses, make, fuel_type, aspiration, num_of_doors,
                   body_style, drive_wheels, engine_location, wheel_base, length, width,
                   height, curb_weight, engine_type, num_of_cylinders, engine_size, fuel_system,
                   bore, stroke, compression_ratio, horsepower, peak_rpm, city_mpg, highway_mpg]

In [62]:
regressor = tf.contrib.learn.LinearRegressor(feature_columns=linear_features)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001635EEBC0B8>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': 'C:\\Users\\hcche\\AppData\\Local\\Temp\\tmp3_5drczi'}


\ 下面查出一堆 classifier, Estimator, Regressor 都不知怎麼區分的？ Stackoverflow 上有人問了也沒人回答 <br>
\ https://stackoverflow.com/questions/43401721/difference-between-dnnlinearcombinedestimator-regressor-classifier-tensorflo<br>
\ 還太新了？<br>
\__main\__ :> tf.contrib.learn dir . cr 
\['BaseEstimator', 'DNNClassifier', 'DNNEstimator', 'DNNLinearCombinedClassifier', 'DNNLinearCombinedEstimator', 'DNNLinearCombinedRegressor', __'DNNRegressor'__, 'DynamicRnnEstimator', 'Estimator', 'Evaluable', 'Experiment', 'ExportStrategy', 'Head', 'InputFnOps', 'KMeansClustering', 'LinearClassifier', 'LinearEstimator', __'LinearRegressor'__, 'LogisticRegressor', 'MetricSpec', 'ModeKeys', 'ModelFnOps', 'NanLossDuringTrainingError', 'NotFittedError', 'PredictionKey', 'ProblemType', 'RunConfig', 'SKCompat', 'SVM', 'TaskType', 'Trainable', ...snip ..., 'binary_svm_head', 'build_parsing_serving_input_fn', 'datasets', 'evaluate', 'extract_dask_data', 'extract_dask_labels', 'extract_pandas_data', 'extract_pandas_labels', 'extract_pandas_matrix', 'graph_actions', 'head', 'infer', 'infer_real_valued_columns_from_input', 'infer_real_valued_columns_from_input_fn', 'io', __'learn_runner'__, 'make_export_strategy', 'models', 'monitors', 'multi_class_head', 'multi_head', 'multi_label_head', 'no_op_train_fn', 'ops', 'poisson_regression_head', 'preprocessing', 'read_batch_examples', 'read_batch_features', 'read_batch_record_features', 'read_keyed_batch_examples', 'read_keyed_batch_examples_shared_queue', 'read_keyed_batch_features', 'read_keyed_batch_features_shared_queue', 'regression_head', 'run_feeds', 'run_n', 'train', 'utils']
OK exit
OK 


In [63]:
def experiment_fn(run_config, params):
  # This function makes an Experiment, containing an Estimator and inputs for training and evaluation.
  # You can use params and config here to customize the Estimator depending on the cluster or to use
  # hyperparameter tuning.

  # Collect information for training
  return tf.contrib.learn.Experiment(estimator=tf.contrib.learn.LinearRegressor(
                                     feature_columns=linear_features, config=run_config),
                                     train_input_fn=training_input_fn,
                                     train_steps=10000,
                                     eval_input_fn=eval_input_fn)
# shutil http://www.cnblogs.com/CLTANG/archive/2011/11/15/2249257.html
import shutil
shutil.rmtree("/tmp/output_dir", ignore_errors=True)  # 刪除暫存目錄
tf.contrib.learn.learn_runner.run(experiment_fn, run_config=tf.contrib.learn.RunConfig(model_dir="/tmp/output_dir"))

INFO:tensorflow:Using config: {'_task_type': None, '_task_id': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001635573D860>, '_master': '', '_num_ps_replicas': 0, '_num_worker_replicas': 0, '_environment': 'local', '_is_chief': True, '_evaluation_master': '', '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_session_config': None, '_save_checkpoints_steps': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_model_dir': '/tmp/output_dir'}
Instructions for updating:
Monitors are deprecated. Please use tf.train.SessionRunHook.
Instructions for updating:
Please switch to tf.train.get_global_step
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/output_dir\model.ckpt.
INFO:tensorflow:Starting evaluation at 2018-01-05-01:34:16
INFO:tensorflow:Restorin

INFO:tensorflow:global_step/sec: 33.6781
INFO:tensorflow:loss = 1.82941e+07, step = 6001 (2.972 sec)
INFO:tensorflow:global_step/sec: 34.0922
INFO:tensorflow:loss = 2.16285e+07, step = 6101 (2.933 sec)
INFO:tensorflow:global_step/sec: 32.5035
INFO:tensorflow:loss = 1.7377e+07, step = 6201 (3.073 sec)
INFO:tensorflow:global_step/sec: 32.7818
INFO:tensorflow:loss = 2.22345e+07, step = 6301 (3.056 sec)
INFO:tensorflow:global_step/sec: 26.2252
INFO:tensorflow:loss = 1.99859e+07, step = 6401 (3.820 sec)
INFO:tensorflow:global_step/sec: 17.8684
INFO:tensorflow:loss = 7.55601e+06, step = 6501 (5.620 sec)
INFO:tensorflow:global_step/sec: 22.0052
INFO:tensorflow:loss = 2.33266e+07, step = 6601 (4.520 sec)
INFO:tensorflow:global_step/sec: 33.7754
INFO:tensorflow:loss = 2.87699e+07, step = 6701 (2.942 sec)
INFO:tensorflow:global_step/sec: 113.416
INFO:tensorflow:loss = 2.73927e+07, step = 6801 (0.876 sec)
INFO:tensorflow:global_step/sec: 130.657
INFO:tensorflow:loss = 3.03666e+07, step = 6901 (0.

({'global_step': 10000, 'loss': 8224656.5}, [])

In [None]:
regressor.fit(input_fn=training_input_fn, steps=10000)

In [None]:
regressor.evaluate(input_fn=eval_input_fn)

In [None]:
dnn_features = [
    #numerical features
    symboling, normalized_losses, wheel_base, length, width, height, curb_weight, engine_size,
    bore, stroke, compression_ratio, horsepower, peak_rpm, city_mpg, highway_mpg,    
    # densify categorical features:
    tf.feature_column.indicator_column(make),
    tf.feature_column.indicator_column(fuel_type),
    tf.feature_column.indicator_column(aspiration),
    tf.feature_column.indicator_column(num_of_doors),
    tf.feature_column.indicator_column(body_style),
    tf.feature_column.indicator_column(drive_wheels), 
    tf.feature_column.indicator_column(engine_location),
    tf.feature_column.indicator_column(engine_type),
    tf.feature_column.indicator_column(num_of_cylinders),
    tf.feature_column.indicator_column(fuel_system),
]

In [None]:
dnnregressor = tf.contrib.learn.DNNRegressor(feature_columns=dnn_features, hidden_units=[50, 30, 10])

In [None]:
dnnregressor.fit(input_fn=training_input_fn, steps=10000)

In [None]:
dnnregressor.evaluate(input_fn=eval_input_fn)

In [None]:
def experiment_fn(run_config, params):
  # This function makes an Experiment, containing an Estimator and inputs for training and evaluation.
  # You can use params and config here to customize the Estimator depending on the cluster or to use
  # hyperparameter tuning.

  # Collect information for training
  return tf.contrib.learn.Experiment(estimator=tf.contrib.learn.LinearRegressor(
                                     feature_columns=linear_features, config=run_config),
                                     train_input_fn=training_input_fn,
                                     train_steps=10000,
                                     eval_input_fn=eval_input_fn)

In [None]:
# shutil http://www.cnblogs.com/CLTANG/archive/2011/11/15/2249257.html
import shutil
shutil.rmtree("/tmp/output_dir", ignore_errors=True)  # 刪除暫存目錄
tf.contrib.learn.learn_runner.run(experiment_fn, run_config=tf.contrib.learn.RunConfig(model_dir="/tmp/output_dir"))

In [None]:
peforth.ok()

In [None]:
scores = dnnregressor.predict(x=dict(eval_data[0:10]))

In [None]:
[i for i in scores]

In [None]:
eval_data[0:10]

In [None]:
_ = dnnregressor.predict(x=dict(eval_data[20:21]))
print(eval_data[20:21])
[i for i in _]

In [None]:
_ = dnnregressor.predict(x=dict(eval_data[27:28]))
print(eval_data[27:28])
[i for i in _]

In [None]:
#[k for k,v in dtypes if v == np.float32]
mydict = {'aa':11,'bb':22}
[i for i in mydict]

In [None]:
[mydict[i] for i in mydict]

In [None]:
[(n,k) for k,n in mydict.items()]

In [None]:
mydict.items()

In [None]:
regressor = tf.contrib.learn.LinearRegressor(feature_columns=linear_features)

In [None]:
peforth.ok()