In [1]:
import tensorflow as tf
from tensorflow.feature_column import embedding_column, numeric_column, categorical_column_with_identity
import numpy as np
import os

In [2]:
tf.enable_eager_execution()

In [2]:
def build_feature_columns():
    pickup_longitude = tf.feature_column.numeric_column('pickup_longitude')
    pickup_latitude = tf.feature_column.numeric_column('pickup_latitude')
    dropoff_longitude = tf.feature_column.numeric_column('dropoff_longitude')
    dropoff_latitude = tf.feature_column.numeric_column('dropoff_latitude')
    
    base_columns = [

    ]
    
    weekday = tf.feature_column.categorical_column_with_identity('weekday', 7)
    month = tf.feature_column.categorical_column_with_identity('month', 12)
    hour = tf.feature_column.categorical_column_with_identity('hour', 24)
    week_number = tf.feature_column.categorical_column_with_identity('week_number', 53)
    
    pickup_x = tf.feature_column.bucketized_column(pickup_longitude, list(np.linspace(0.0,1.0, 126)))
    pickup_y = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 124)))
    dropoff_x = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 126)))
    dropoff_y = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 124)))
    
    cross_columns = [
        tf.feature_column.crossed_column([pickup_x, pickup_y], 126*124),
        tf.feature_column.crossed_column([dropoff_x, dropoff_y], 126*124),
        tf.feature_column.crossed_column([month, weekday, hour], 7*12*24)
    ]
    
    wide_columns = base_columns + cross_columns
    
    distance = tf.feature_column.numeric_column('distance')
    weekday_embed = embedding_column(weekday, dimension=8)
    hour_embed = embedding_column(hour, dimension=8)
    week_number_embed = embedding_column(week_number, dimension=16)
    
    deep_columns = [
        distance,
        weekday_embed,
        hour_embed,
        week_number_embed
    ]
    
    return wide_columns, deep_columns

In [3]:
wide_columns, deep_columns = build_feature_columns()

In [4]:
def build_estimator(fc_fn):
    wide_columns, deep_columns = fc_fn()
    hidden_units = [100, 75, 50, 25]
    return tf.estimator.DNNLinearCombinedRegressor(
        linear_feature_columns=wide_columns,
        dnn_feature_columns=deep_columns,
        dnn_hidden_units=hidden_units)
model = build_estimator(build_feature_columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/wt/dv0qlxcn5j9b0h9vd8tr23bh0000gn/T/tmpwic07upe', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0xb262001d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [5]:
def parse_function(example_proto):
    column_description = {
         'fare_amount': tf.FixedLenFeature([], tf.float32),
            'pickup_longitude': tf.FixedLenFeature([], tf.float32),
            'pickup_latitude': tf.FixedLenFeature([], tf.float32),
            'dropoff_longitude': tf.FixedLenFeature([], tf.float32),
            'dropoff_latitude': tf.FixedLenFeature([], tf.float32),
            'distance': tf.FixedLenFeature([], tf.float32),
            'hour': tf.FixedLenFeature([], tf.int64),
            'month': tf.FixedLenFeature([], tf.int64),
            'week_number': tf.FixedLenFeature([], tf.int64),
            'weekday': tf.FixedLenFeature([], tf.int64)
    } 
    columns = tf.parse_single_example(example_proto, column_description)
    
    label = columns.pop('fare_amount')
    features = columns
    
    
    return features, label

In [18]:
dataset_dir = os.path.join('dataset')

train_data_file_name = os.path.join('train', 'train_dataset')
train_dataset_full_path = os.path.join(dataset_dir, train_data_file_name)

eval_data_file_name = os.path.join('eval', 'eval_dataset')
eval_dataset_full_path = os.path.join(dataset_dir, eval_data_file_name)

In [19]:
train_raw_dataset = tf.data.TFRecordDataset(train_dataset_full_path)
eval_raw_dataset = tf.data.TFRecordDataset(eval_dataset_full_path)

In [20]:
train_parsed_dataset = train_raw_dataset.map(parse_function)
eval_parsed_dataset = eval_raw_dataset.map(parse_function)

In [24]:
def input_fn(parsed_dataset):
    dataset = parsed_dataset.shuffle(1000)
    dataset = dataset.batch(512)
    dataset = dataset.repeat(100)
    
    iterator = dataset.make_one_shot_iterator()
    features, label = iterator.get_next()
    return (features, label)


In [25]:
train_spec = tf.estimator.TrainSpec(input_fn=lambda : input_fn(train_parsed_dataset))
eval_spec = tf.estimator.EvalSpec(input_fn=lambda : input_fn(eval_parsed_dataset))

In [26]:
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/wt/dv0qlxcn5j9b0h9vd8tr23bh0000gn/T/tmpwic07upe/model.ckpt.


InvalidArgumentError: assertion failed: [4 7 2...] [12]
	 [[node linear/linear_model/linear_model/linear_model/hour_X_month_X_weekday/assert_less_than_num_buckets/Assert/AssertGuard/Assert (defined at /Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/canned/linear.py:345) ]]

Caused by op 'linear/linear_model/linear_model/linear_model/hour_X_month_X_weekday/assert_less_than_num_buckets/Assert/AssertGuard/Assert', defined at:
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/asyncio/base_events.py", line 1775, in _run_once
    handle._run()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3220, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-26-d630bd28d0b8>", line 1, in <module>
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/training.py", line 471, in train_and_evaluate
    return executor.run()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/training.py", line 611, in run
    return self.run_local()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/training.py", line 712, in run_local
    saving_listeners=saving_listeners)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 358, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1124, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1154, in _train_model_default
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/canned/dnn_linear_combined.py", line 1002, in _model_fn
    linear_sparse_combiner=linear_sparse_combiner)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/canned/dnn_linear_combined.py", line 208, in _dnn_linear_combined_model_fn
    linear_logits = logit_fn(features=features)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/canned/linear.py", line 345, in linear_logit_fn
    logits = linear_model(features)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 554, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 650, in call
    return self.layer(features)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 554, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 510, in call
    weight_var=weight_var)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 2315, in _create_weighted_sum
    weight_var=weight_var)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 2402, in _create_categorical_column_weighted_sum
    state_manager)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 4143, in get_sparse_tensors
    transformation_cache.get(self, state_manager), None)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 2518, in get
    transformed = column.transform_feature(self, state_manager)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 4092, in transform_feature
    state_manager)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 3883, in get_sparse_tensors
    transformation_cache.get(self, state_manager), None)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 2518, in get
    transformed = column.transform_feature(self, state_manager)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 3861, in transform_feature
    return self._transform_input_tensor(input_tensor)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/feature_column/feature_column_v2.py", line 3835, in _transform_input_tensor
    name='assert_less_than_num_buckets')
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/check_ops.py", line 792, in assert_less
    return control_flow_ops.Assert(condition, data, summarize=summarize)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/util/tf_should_use.py", line 193, in wrapped
    return _add_should_use_warning(fn(*args, **kwargs))
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 168, in Assert
    guarded_assert = cond(condition, no_op, true_assert, name="AssertGuard")
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2108, in cond
    orig_res_f, res_f = context_f.BuildCondBranch(false_fn)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 1941, in BuildCondBranch
    original_result = fn()
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 166, in true_assert
    condition, data, summarize, name="Assert")
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 72, in _assert
    name=name)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): assertion failed: [4 7 2...] [12]
	 [[node linear/linear_model/linear_model/linear_model/hour_X_month_X_weekday/assert_less_than_num_buckets/Assert/AssertGuard/Assert (defined at /Users/bryanwu/anaconda/envs/Tensorflow/lib/python3.7/site-packages/tensorflow_estimator/python/estimator/canned/linear.py:345) ]]
