## Set up

import libs

In [2]:
import math
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

print tf.__version__

1.12.0


## Data
load and examine the data

In [37]:
df = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data", 
    sep=",",
    index_col=False,
    names = ["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"]
)
df.rings = df.rings.astype(float)
df.head()

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
0,M,0.5,0.4,0.1,0.5,0.2,0.1,0.1,15.0
1,M,0.3,0.3,0.1,0.2,0.1,0.0,0.1,7.0
2,F,0.5,0.4,0.1,0.7,0.3,0.1,0.2,9.0
3,M,0.4,0.4,0.1,0.5,0.2,0.1,0.2,10.0
4,I,0.3,0.3,0.1,0.2,0.1,0.0,0.1,7.0


In [31]:
df.describe()

Unnamed: 0,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,0.5,0.4,0.1,0.8,0.4,0.2,0.2,9.9
std,0.1,0.1,0.0,0.5,0.2,0.1,0.1,3.2
min,0.1,0.1,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.5,0.3,0.1,0.4,0.2,0.1,0.1,8.0
50%,0.5,0.4,0.1,0.8,0.3,0.2,0.2,9.0
75%,0.6,0.5,0.2,1.2,0.5,0.3,0.3,11.0
max,0.8,0.7,1.1,2.8,1.5,0.8,1.0,29.0


In [39]:
# Split into train and eval
np.random.seed(seed=1) #makes split reproducible
randoms = np.random.rand(len(df))
msk_train = randoms < 0.6
msk_eval = np.logical_and(randoms > 0.6, randoms < 0.8)
msk_test = randoms > 0.8
train_df = df[msk_train]
eval_df = df[msk_eval]
test_df = df[msk_test]

## Linear rergression model

In [92]:
OUTDIR = './abalones_trained'
def train_and_evaluate(output_dir, num_train_steps):
  estimator = tf.estimator.LinearRegressor(
                       model_dir = output_dir, 
                       feature_columns = [
                           tf.feature_column.numeric_column('length'),
                           tf.feature_column.numeric_column('diameter'),
                           tf.feature_column.numeric_column('height'),
                       ])
  
  #Add rmse evaluation metric
  def rmse(labels, predictions):
    pred_values = tf.cast(predictions['predictions'],tf.float64)
    return {'rmse': tf.metrics.root_mean_squared_error(labels, pred_values)}
  estimator = tf.contrib.estimator.add_metrics(estimator,rmse)
  
  train_spec=tf.estimator.TrainSpec(
                       input_fn = tf.estimator.inputs.pandas_input_fn(x = train_df,
                                              y = train_df["rings"],  
                                              num_epochs = None,
                                              shuffle = True),
                       max_steps = num_train_steps)
  eval_spec=tf.estimator.EvalSpec(
                       input_fn = tf.estimator.inputs.pandas_input_fn(x = eval_df,
                                              y = eval_df["rings"],  
                                              num_epochs = 1,
                                              shuffle = False),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10,  # evaluate every N seconds
                       )
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
  
# remove trained model dir    
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

## Train the linear regression model
training 10000 steps reaches loss slighly above 600

In [100]:
train_and_evaluate(OUTDIR, num_train_steps = 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd66a6aa590>, '_model_dir': './abalones_trained', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': None, '_train_distribute': None, '_master': ''}
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_g

INFO:tensorflow:loss = 449.39224, step = 6801 (0.293 sec)
INFO:tensorflow:global_step/sec: 393.045
INFO:tensorflow:loss = 1285.5582, step = 6901 (0.254 sec)
INFO:tensorflow:global_step/sec: 397.253
INFO:tensorflow:loss = 731.4885, step = 7001 (0.254 sec)
INFO:tensorflow:global_step/sec: 409.448
INFO:tensorflow:loss = 1104.115, step = 7101 (0.241 sec)
INFO:tensorflow:global_step/sec: 373.583
INFO:tensorflow:loss = 622.682, step = 7201 (0.268 sec)
INFO:tensorflow:global_step/sec: 360.54
INFO:tensorflow:loss = 1527.3694, step = 7301 (0.277 sec)
INFO:tensorflow:global_step/sec: 388.452
INFO:tensorflow:loss = 1077.9023, step = 7401 (0.259 sec)
INFO:tensorflow:global_step/sec: 367.457
INFO:tensorflow:loss = 794.8511, step = 7501 (0.272 sec)
INFO:tensorflow:global_step/sec: 373.274
INFO:tensorflow:loss = 1191.868, step = 7601 (0.269 sec)
INFO:tensorflow:global_step/sec: 371.384
INFO:tensorflow:loss = 495.5207, step = 7701 (0.270 sec)
INFO:tensorflow:global_step/sec: 364.896
INFO:tensorflow:lo

## Predict
use trained model to make some predictions

In [53]:
def make_test_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x = df,
        y = None,
        batch_size = 128,
        shuffle = False,
        queue_capacity = 1000
    )
model = tf.estimator.LinearRegressor(
   model_dir = OUTDIR, 
   feature_columns = [
       tf.feature_column.numeric_column('length'),
       tf.feature_column.numeric_column('diameter'),
       tf.feature_column.numeric_column('height'),
   ])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_global_id_in_cluster': 0, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd6683ae6d0>, '_model_dir': './abalones_trained', '_protocol': None, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_device_fn': None, '_experimental_distribute': None, '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_evaluation_master': '', '_eval_distribute': None, '_train_distribute': None, '_master': ''}


In [101]:
predictions = list(model.predict(input_fn = make_test_input_fn(test_df)))
for i in range(0, test_df.shape[0]):
    print "actual: ", test_df.iloc[i].rings, ", predicted:", predictions[i]['predictions'][0]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./abalones_trained/model.ckpt-10000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
actual:  10.0 , predicted: 10.084757
actual:  11.0 , predicted: 7.910466
actual:  10.0 , predicted: 8.059849
actual:  10.0 , predicted: 11.15539
actual:  11.0 , predicted: 10.409239
actual:  11.0 , predicted: 10.402767
actual:  18.0 , predicted: 11.722149
actual:  8.0 , predicted: 8.994904
actual:  9.0 , predicted: 8.871043
actual:  9.0 , predicted: 9.311956
actual:  10.0 , predicted: 9.464624
actual:  13.0 , predicted: 10.214918
actual:  11.0 , predicted: 10.96874
actual:  9.0 , predicted: 10.827647
actual:  9.0 , predicted: 10.526627
actual:  12.0 , predicted: 10.912155
actual:  10.0 , predicted: 10.554478
actual:  12.0 , predicted: 10.453051
actual:  9.0 , predicted: 10.306856
actual:  15.0 , predicted: 10.350666
act

actual:  11.0 , predicted: 11.361528
actual:  9.0 , predicted: 11.028343
actual:  12.0 , predicted: 11.324091
actual:  11.0 , predicted: 11.274935
actual:  10.0 , predicted: 11.145731
actual:  12.0 , predicted: 11.078457
actual:  9.0 , predicted: 11.135042
actual:  11.0 , predicted: 11.340052
actual:  10.0 , predicted: 11.272778
actual:  13.0 , predicted: 11.39448
actual:  10.0 , predicted: 11.405169
actual:  10.0 , predicted: 11.454252
actual:  9.0 , predicted: 11.506523
actual:  9.0 , predicted: 11.462711
actual:  12.0 , predicted: 11.722221
actual:  14.0 , predicted: 11.600447
actual:  11.0 , predicted: 11.561981
actual:  12.0 , predicted: 11.7638035
actual:  12.0 , predicted: 11.590788
actual:  13.0 , predicted: 11.901539
actual:  10.0 , predicted: 11.904726
actual:  10.0 , predicted: 12.244141
actual:  15.0 , predicted: 12.0583515
actual:  11.0 , predicted: 12.654161
actual:  7.0 , predicted: 12.147961
actual:  12.0 , predicted: 12.9358635
actual:  7.0 , predicted: 9.18491
actual:

actual:  13.0 , predicted: 10.922771
actual:  11.0 , predicted: 10.907768
actual:  11.0 , predicted: 10.546976
actual:  12.0 , predicted: 10.826761
actual:  12.0 , predicted: 8.062006
actual:  16.0 , predicted: 11.524786
actual:  13.0 , predicted: 10.930273
actual:  10.0 , predicted: 8.455994
actual:  11.0 , predicted: 9.792341
actual:  8.0 , predicted: 8.308598
actual:  14.0 , predicted: 8.652229
actual:  13.0 , predicted: 9.6994705
actual:  11.0 , predicted: 8.540186
actual:  13.0 , predicted: 9.483772
actual:  11.0 , predicted: 9.085712
actual:  13.0 , predicted: 9.676966
actual:  10.0 , predicted: 8.851894
actual:  11.0 , predicted: 7.477055
actual:  11.0 , predicted: 8.731222
actual:  6.0 , predicted: 7.3362045
actual:  5.0 , predicted: 6.595399
actual:  8.0 , predicted: 7.4951735
actual:  13.0 , predicted: 9.180595
actual:  13.0 , predicted: 11.147888
actual:  13.0 , predicted: 10.911985
actual:  18.0 , predicted: 11.0305
actual:  10.0 , predicted: 10.138155
actual:  13.0 , predi