In [94]:
import tensorflow as tf
from tensorflow.feature_column import embedding_column, numeric_column, categorical_column_with_identity
import numpy as np
import os

In [2]:
tf.enable_eager_execution()

In [95]:
def build_feature_columns():
    pickup_longitude = tf.feature_column.numeric_column('pickup_longitude')
    pickup_latitude = tf.feature_column.numeric_column('pickup_latitude')
    dropoff_longitude = tf.feature_column.numeric_column('dropoff_longitude')
    dropoff_latitude = tf.feature_column.numeric_column('dropoff_latitude')
    
    
    weekday = tf.feature_column.categorical_column_with_identity('weekday', 7)
    month = tf.feature_column.categorical_column_with_identity('month', 12)
    hour = tf.feature_column.categorical_column_with_identity('hour', 24)
    week_number = tf.feature_column.categorical_column_with_identity('week_number', 53)
    
    base_columns = [
        weekday,
        month,
        hour,
        week_number
    ]
    
    pickup_x = tf.feature_column.bucketized_column(pickup_longitude, list(np.linspace(0.0,1.0, 126)))
    pickup_y = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 124)))
    dropoff_x = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 126)))
    dropoff_y = tf.feature_column.bucketized_column(pickup_latitude, list(np.linspace(0.0,1.0, 124)))
    
    cross_columns = [
        tf.feature_column.crossed_column([pickup_x, pickup_y], 126*124*20),
        tf.feature_column.crossed_column([dropoff_x, dropoff_y], 126*124*20),
        #tf.feature_column.crossed_column([month, weekday, hour], 7*12*24)
    ]
    
    wide_columns = base_columns + cross_columns
    
    distance = tf.feature_column.numeric_column('distance')
    weekday_embed = embedding_column(weekday, dimension=8)
    hour_embed = embedding_column(hour, dimension=8)
    week_number_embed = embedding_column(week_number, dimension=16)
    
    deep_columns = [
        distance,
        weekday_embed,
        hour_embed,
        week_number_embed
    ]
    
    return wide_columns, deep_columns

In [96]:
wide_columns, deep_columns = build_feature_columns()

In [97]:
def build_estimator(fc_fn):
    wide_columns, deep_columns = fc_fn()
    hidden_units = [1000, 500, 250, 50]
    return tf.estimator.DNNLinearCombinedRegressor(
        linear_feature_columns=wide_columns,
        dnn_feature_columns=deep_columns,
        dnn_hidden_units=hidden_units,
        dnn_optimizer=tf.train.ProximalAdagradOptimizer(
        learning_rate=0.1,
        l1_regularization_strength=0.001,
        l2_regularization_strength=0.001))
model = build_estimator(build_feature_columns)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/wt/dv0qlxcn5j9b0h9vd8tr23bh0000gn/T/tmpjjfia10n', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0xb3823a128>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [98]:
dataset_dir = os.path.join('dataset')

train_data_file_name = os.path.join('train', 'train')
train_dataset_full_path = os.path.join(dataset_dir, train_data_file_name)

eval_data_file_name = os.path.join('eval', 'eval')
eval_dataset_full_path = os.path.join(dataset_dir, eval_data_file_name)

In [99]:
# train_raw_dataset = tf.data.TFRecordDataset(train_dataset_full_path)
# eval_raw_dataset = tf.data.TFRecordDataset(eval_dataset_full_path)

In [100]:
# train_parsed_dataset = train_raw_dataset.map(parse_function)
# eval_parsed_dataset = eval_raw_dataset.map(parse_function)

In [101]:
def input_fn(dataset_path):
    raw_dataset = tf.data.TFRecordDataset(dataset_path)
    
    def parse_function(example_proto):
        column_description = {
             'fare_amount': tf.FixedLenFeature([], tf.float32),
                'pickup_longitude': tf.FixedLenFeature([], tf.float32),
                'pickup_latitude': tf.FixedLenFeature([], tf.float32),
                'dropoff_longitude': tf.FixedLenFeature([], tf.float32),
                'dropoff_latitude': tf.FixedLenFeature([], tf.float32),
                'distance': tf.FixedLenFeature([], tf.float32),
                'hour': tf.FixedLenFeature([], tf.int64),
                'month': tf.FixedLenFeature([], tf.int64),
                'week_number': tf.FixedLenFeature([], tf.int64),
                'weekday': tf.FixedLenFeature([], tf.int64)
        } 
        
        columns = tf.parse_single_example(example_proto, column_description)
        label = columns.pop('fare_amount')
        features = columns
        
        return features, label

    parsed_dataset = raw_dataset.map(parse_function)
    
    dataset = parsed_dataset.shuffle(1000)
    dataset = dataset.batch(512)
    dataset = dataset.repeat(10000)
    
    iterator = dataset.make_one_shot_iterator()
    features, label = iterator.get_next()
    return (features, label)


In [102]:
train_spec = tf.estimator.TrainSpec(input_fn=lambda : input_fn(train_dataset_full_path))
eval_spec = tf.estimator.EvalSpec(input_fn=lambda : input_fn(eval_dataset_full_path))

In [103]:
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/wt/dv0qlxcn5j9b0h9vd8tr23bh0000gn/T/tmpjjfia10n/model.ckpt.
INFO:tensorflow:loss = 111370.21, step = 1
INFO:tensorflow:global_step/sec: 14.372
INFO:tensorflow:loss = 24656.361, step = 101 (6.959 sec)
INFO:tensorflow:global_step/sec: 20.2918
INFO:tensorflow:loss = 15516.762, step = 201 (4.928 sec)
INFO:tensorflow:global_step/sec: 20.2504
INFO:tensorflow:loss 

INFO:tensorflow:global_step/sec: 19.3814
INFO:tensorflow:loss = 332.38806, step = 7701 (5.159 sec)
INFO:tensorflow:global_step/sec: 23.394
INFO:tensorflow:loss = 205.88266, step = 7801 (4.275 sec)
INFO:tensorflow:global_step/sec: 23.6656
INFO:tensorflow:loss = 210.62592, step = 7901 (4.225 sec)
INFO:tensorflow:global_step/sec: 23.2517
INFO:tensorflow:loss = 235.90369, step = 8001 (4.301 sec)
INFO:tensorflow:global_step/sec: 20.0459
INFO:tensorflow:loss = 159.53732, step = 8101 (4.988 sec)
INFO:tensorflow:global_step/sec: 23.2073
INFO:tensorflow:loss = 100.21457, step = 8201 (4.309 sec)
INFO:tensorflow:global_step/sec: 23.3333
INFO:tensorflow:loss = 353.21417, step = 8301 (4.286 sec)
INFO:tensorflow:global_step/sec: 20.3599
INFO:tensorflow:loss = 260.75244, step = 8401 (4.912 sec)
INFO:tensorflow:global_step/sec: 23.742
INFO:tensorflow:loss = 300.10352, step = 8501 (4.211 sec)
INFO:tensorflow:global_step/sec: 22.1887
INFO:tensorflow:loss = 258.52936, step = 8601 (4.507 sec)
INFO:tensorf

INFO:tensorflow:loss = 166.65659, step = 14701 (4.361 sec)
INFO:tensorflow:global_step/sec: 23.2674
INFO:tensorflow:loss = 294.71405, step = 14801 (4.298 sec)
INFO:tensorflow:global_step/sec: 22.5659
INFO:tensorflow:loss = 231.59616, step = 14901 (4.432 sec)
INFO:tensorflow:global_step/sec: 20.592
INFO:tensorflow:loss = 301.82263, step = 15001 (4.856 sec)
INFO:tensorflow:global_step/sec: 23.5505
INFO:tensorflow:loss = 188.9114, step = 15101 (4.246 sec)
INFO:tensorflow:global_step/sec: 23.2351
INFO:tensorflow:loss = 258.75092, step = 15201 (4.304 sec)
INFO:tensorflow:global_step/sec: 20.4451
INFO:tensorflow:loss = 228.2013, step = 15301 (4.892 sec)
INFO:tensorflow:global_step/sec: 21.3245
INFO:tensorflow:loss = 340.9226, step = 15401 (4.689 sec)
INFO:tensorflow:global_step/sec: 22.5781
INFO:tensorflow:loss = 177.73167, step = 15501 (4.429 sec)
INFO:tensorflow:global_step/sec: 22.2832
INFO:tensorflow:loss = 207.60486, step = 15601 (4.488 sec)
INFO:tensorflow:global_step/sec: 20.0495
INFO

({'average_loss': 35.286324,
  'label/mean': 12.0174885,
  'loss': 2822.906,
  'prediction/mean': 11.227833,
  'global_step': 20000},
 [])