In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = (16,10)

In [2]:
def read_csv(TRAIN_PATH):
    chunksize = 5_000_000 # batch size
    
    traintypes = {
        'fare': 'float32',
        'from': 'str',
        'to': 'str',
        'p_count': 'uint8', 
        'hour': 'uint8',
        'weekday': 'uint8',
        'week': 'uint8',
        'year': 'int16',
        'dist_e': 'float32',
        'dist_t': 'float32',
        'x0': 'float32',
        'x1': 'float32',
        'y0': 'float32',
        'y1': 'float32',
    }
    
    cols = list(traintypes.keys())
    
    # loading the dataframe into list of small dataframes
    df_list = []

    for df_chunk in pd.read_csv(TRAIN_PATH, usecols=cols, dtype=traintypes, chunksize=chunksize):
        df_list.append(df_chunk) 
        df = pd.concat(df_list)
        
    return df

In [3]:
train = read_csv('../input/train_cleaned_2.csv')

In [4]:
train.head()

Unnamed: 0,fare,from,to,hour,weekday,week,year,p_count,dist_e,dist_t,x0,x1,y0,y1
0,4.5,Queens,Queens,17,0,25,2009,1,0.638322,0.759079,-73.844315,-73.841614,40.721317,40.712276
1,16.9,Manhattan,Manhattan,16,1,1,2010,1,5.213395,6.717119,-74.016045,-73.979271,40.711304,40.782005
2,5.7,Manhattan,Manhattan,0,3,33,2011,2,0.85261,1.16424,-73.982735,-73.991241,40.761269,40.750561
3,7.7,Manhattan,Manhattan,4,5,16,2012,1,1.7356,1.943343,-73.987129,-73.99157,40.733143,40.758091
4,5.3,Manhattan,Manhattan,7,1,10,2010,1,1.228196,1.658726,-73.968094,-73.956657,40.768009,40.783764


In [5]:
y = train['fare']
X = train.drop(['fare'], axis=1)
del train

from sklearn.model_selection import train_test_split
X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size=0.25, random_state=379582)

del X,y

In [6]:
import tensorflow as tf

In [7]:
locations = [
    'Manhattan', 'JFK', 'LaGardia', 'Newark', 'Teterboro', 'Brooklin', 'Bronx', 'Staten', 'Queens', 'Westchester',
    'Nassau', 'Jersey', 'Bayonne', 'WStaton', 'West', 'WNewYork', 'North'        
]

# num_buckets = [0, num_buckets)

p_count = tf.feature_column.numeric_column('p_count')
taxicab = tf.feature_column.numeric_column('dist_t')
euclidean = tf.feature_column.numeric_column('dist_e')

x0 = tf.feature_column.numeric_column('x0')
x1 = tf.feature_column.numeric_column('x1')
y0 = tf.feature_column.numeric_column('y0')
y1 = tf.feature_column.numeric_column('y1')

numbuckets = 30
dx = np.linspace(-74.019108, -73.928684, numbuckets).tolist()
dy = np.linspace(40.700316, 40.877544, numbuckets).tolist()

x0_B = tf.feature_column.bucketized_column(x0, dx)
x1_B = tf.feature_column.bucketized_column(x1, dx)
y0_B = tf.feature_column.bucketized_column(y0, dy)
y1_B = tf.feature_column.bucketized_column(y1, dy)

xy_0 = tf.feature_column.crossed_column([x0_B, y0_B], numbuckets * numbuckets)
xy_1 = tf.feature_column.crossed_column([x1_B, y1_B], numbuckets * numbuckets)
xy_pair = tf.feature_column.crossed_column([xy_0, xy_1], numbuckets**4)

# definition of the categorical columns
hour = tf.feature_column.categorical_column_with_identity('hour', num_buckets = 24)
weekday = tf.feature_column.categorical_column_with_identity('weekday', num_buckets = 7)
week = tf.feature_column.categorical_column_with_identity('week', num_buckets = 54)
year = tf.feature_column.categorical_column_with_vocabulary_list('year', [2009, 2010, 2011, 2012, 2013, 2014, 2015])

loc_from = tf.feature_column.categorical_column_with_vocabulary_list('from', locations)
loc_to = tf.feature_column.categorical_column_with_vocabulary_list('to', locations)

# definition of the feature crosses
from_to = tf.feature_column.crossed_column([loc_from, loc_to], len(locations)**2)
from_to_year = tf.feature_column.crossed_column([from_to, year], len(locations)**2 * 7)
from_to_year_week = tf.feature_column.crossed_column([from_to_year, week], len(locations)**2 * 7 * 54)

weekday_year = tf.feature_column.crossed_column([weekday, year], 7 * 7)
hour_weekday_year = tf.feature_column.crossed_column([hour, weekday, year], 24 * 7 * 7)

In [8]:
deep_columns = [
    # Embedding_column to "group" together ...
    tf.feature_column.embedding_column(from_to, 50),
    tf.feature_column.embedding_column(from_to_year, 50),
    tf.feature_column.embedding_column(from_to_year_week, 50),
    tf.feature_column.embedding_column(weekday_year, 50),
    tf.feature_column.embedding_column(hour_weekday_year, 50),
    tf.feature_column.embedding_column(xy_pair, 50),

    # indicator columns
    tf.feature_column.indicator_column(loc_from),
    tf.feature_column.indicator_column(loc_to),
    tf.feature_column.indicator_column(hour),
    tf.feature_column.indicator_column(weekday),
    tf.feature_column.indicator_column(week),
    tf.feature_column.indicator_column(year),
    tf.feature_column.indicator_column(xy_0),
    tf.feature_column.indicator_column(xy_1),

    # numeric columns
    taxicab, euclidean, p_count
]

In [9]:
def train_input_fn(features, labels, batch_size):
    return tf.estimator.inputs.pandas_input_fn(
        x = features,
        y = labels,
        num_epochs = 1,
        shuffle = True,
        batch_size = batch_size)

def eval_input_fn(features, labels):
    return tf.estimator.inputs.pandas_input_fn(
        x = features,
        y = labels,
        num_epochs = 1,
        shuffle = True)

def pred_input_fn(features):
    return tf.estimator.inputs.pandas_input_fn(
        x = features,
        num_epochs = 1,
        shuffle = False)

In [10]:
model_dir = './DNN_Regressor_regions_coords_overfit'

file_writer = tf.summary.FileWriter(model_dir)

estimator = tf.estimator.DNNRegressor(
    model_dir = model_dir,
    feature_columns = deep_columns,
    optimizer = tf.train.AdamOptimizer(learning_rate=0.0001),
    hidden_units = [128,128,128,128], 
    batch_norm = True,
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './DNN_Regressor_regions_coords_overfit', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000022C032DE6A0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [11]:
train_spec = tf.estimator.TrainSpec(input_fn = train_input_fn(X_train, y_train, batch_size=2500))
eval_spec = tf.estimator.EvalSpec(input_fn = eval_input_fn(X_eval, y_eval))

In [17]:
for i in range(5):
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_Regressor_regions_coords_overfit\model.ckpt-96648
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 96648 into ./DNN_Regressor_regions_coords_overfit\model.ckpt.
INFO:tensorflow:loss = 30851.666, step = 96648
INFO:tensorflow:global_step/sec: 7.36959
INFO:tensorflow:loss = 41372.09, step = 96748 (13.569 sec)
INFO:tensorflow:global_step/sec: 7.89392
INFO:tensorflow:loss = 20651.791, step = 96848 (12.668 sec)
INFO:tensorflow:global

INFO:tensorflow:global_step/sec: 7.8619
INFO:tensorflow:loss = 21524.66, step = 102948 (12.720 sec)
INFO:tensorflow:global_step/sec: 7.87942
INFO:tensorflow:loss = 29223.8, step = 103048 (12.691 sec)
INFO:tensorflow:global_step/sec: 7.88126
INFO:tensorflow:loss = 19670.098, step = 103148 (12.688 sec)
INFO:tensorflow:global_step/sec: 7.8719
INFO:tensorflow:loss = 35378.5, step = 103248 (12.703 sec)
INFO:tensorflow:global_step/sec: 7.85244
INFO:tensorflow:loss = 22203.633, step = 103348 (12.735 sec)
INFO:tensorflow:global_step/sec: 7.87434
INFO:tensorflow:loss = 16456.045, step = 103448 (12.715 sec)
INFO:tensorflow:global_step/sec: 7.8911
INFO:tensorflow:loss = 20446.057, step = 103548 (12.673 sec)
INFO:tensorflow:global_step/sec: 7.56817
INFO:tensorflow:loss = 24395.25, step = 103648 (13.213 sec)
INFO:tensorflow:global_step/sec: 7.83982
INFO:tensorflow:loss = 28399.172, step = 103748 (12.740 sec)
INFO:tensorflow:global_step/sec: 7.86222
INFO:tensorflow:loss = 24710.242, step = 103848 (1

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-04-14:25:06
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_Regressor_regions_coords_overfit\model.ckpt-110745
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-09-04-14:25:09
INFO:tensorflow:Saving dict for global step 110745: average_loss = 26.076693, global_step = 110745, label/mean = 11.269103, loss = 3337.8167, prediction/mean = 11.038037
INFO:tensorflow:Saving 'checkpoint_path' summary for global

INFO:tensorflow:global_step/sec: 7.85406
INFO:tensorflow:loss = 26314.254, step = 115756 (12.732 sec)
INFO:tensorflow:global_step/sec: 7.84488
INFO:tensorflow:loss = 28391.465, step = 115856 (12.749 sec)
INFO:tensorflow:global_step/sec: 7.86983
INFO:tensorflow:loss = 17431.166, step = 115956 (12.705 sec)
INFO:tensorflow:global_step/sec: 7.87292
INFO:tensorflow:loss = 32899.848, step = 116056 (12.717 sec)
INFO:tensorflow:global_step/sec: 7.84789
INFO:tensorflow:loss = 19751.656, step = 116156 (12.727 sec)
INFO:tensorflow:global_step/sec: 7.86343
INFO:tensorflow:loss = 20988.03, step = 116256 (12.717 sec)
INFO:tensorflow:global_step/sec: 7.8348
INFO:tensorflow:loss = 26150.871, step = 116356 (12.764 sec)
INFO:tensorflow:global_step/sec: 7.9034
INFO:tensorflow:loss = 23421.871, step = 116456 (12.653 sec)
INFO:tensorflow:global_step/sec: 7.89334
INFO:tensorflow:loss = 22242.756, step = 116556 (12.669 sec)
INFO:tensorflow:global_step/sec: 7.86593
INFO:tensorflow:loss = 28324.164, step = 116

INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-09-04-14:49:45
INFO:tensorflow:Saving dict for global step 122128: average_loss = 14.432596, global_step = 122128, label/mean = 11.197051, loss = 1847.3723, prediction/mean = 10.247175
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 122128: ./DNN_Regressor_regions_coords_overfit\model.ckpt-122128
INFO:tensorflow:global_step/sec: 4.98062
INFO:tensorflow:loss = 18511.672, step = 122156 (20.093 sec)
INFO:tensorflow:global_step/sec: 7.82648
INFO:tensorflow:loss = 14936.315, step = 122256 (12.762 sec)
INFO:tensorflow:global_step/sec: 7.86948
INFO:tensorflow:loss = 27775.77, step = 122356 (12.707 sec)
INFO:tensorflow:global_step/sec: 7.87273
INFO:t

INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-09-04-15:04:13
INFO:tensorflow:Saving dict for global step 128864: average_loss = 12.554024, global_step = 128864, label/mean = 11.300085, loss = 1606.915, prediction/mean = 10.811912
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 128864: ./DNN_Regressor_regions_coords_overfit\model.ckpt-128864
INFO:tensorflow:Loss for final step: 4873.404.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or sav

INFO:tensorflow:loss = 19319.709, step = 134364 (12.656 sec)
INFO:tensorflow:global_step/sec: 7.90476
INFO:tensorflow:loss = 21141.26, step = 134464 (12.651 sec)
INFO:tensorflow:global_step/sec: 7.90598
INFO:tensorflow:loss = 24200.457, step = 134564 (12.664 sec)
INFO:tensorflow:global_step/sec: 7.86447
INFO:tensorflow:loss = 21786.396, step = 134664 (12.700 sec)
INFO:tensorflow:global_step/sec: 7.85616
INFO:tensorflow:loss = 27438.668, step = 134764 (12.729 sec)
INFO:tensorflow:global_step/sec: 7.88876
INFO:tensorflow:loss = 24035.21, step = 134864 (12.676 sec)
INFO:tensorflow:global_step/sec: 7.85434
INFO:tensorflow:loss = 25546.906, step = 134964 (12.747 sec)
INFO:tensorflow:global_step/sec: 7.8819
INFO:tensorflow:loss = 17037.174, step = 135064 (12.687 sec)
INFO:tensorflow:global_step/sec: 7.87135
INFO:tensorflow:loss = 18394.281, step = 135164 (12.689 sec)
INFO:tensorflow:global_step/sec: 7.89361
INFO:tensorflow:loss = 25812.014, step = 135264 (12.668 sec)
INFO:tensorflow:global_s

INFO:tensorflow:global_step/sec: 7.86664
INFO:tensorflow:loss = 24577.883, step = 141364 (12.714 sec)
INFO:tensorflow:global_step/sec: 7.88552
INFO:tensorflow:loss = 32884.99, step = 141464 (12.679 sec)
INFO:tensorflow:global_step/sec: 7.86455
INFO:tensorflow:loss = 18073.682, step = 141564 (12.715 sec)
INFO:tensorflow:global_step/sec: 7.90144
INFO:tensorflow:loss = 15523.871, step = 141664 (12.656 sec)
INFO:tensorflow:global_step/sec: 7.86418
INFO:tensorflow:loss = 22530.773, step = 141764 (12.728 sec)
INFO:tensorflow:global_step/sec: 7.91704
INFO:tensorflow:loss = 26500.129, step = 141864 (12.619 sec)
INFO:tensorflow:global_step/sec: 7.92039
INFO:tensorflow:loss = 20474.57, step = 141964 (12.626 sec)
INFO:tensorflow:global_step/sec: 7.91767
INFO:tensorflow:loss = 22705.434, step = 142064 (12.630 sec)
INFO:tensorflow:global_step/sec: 7.88135
INFO:tensorflow:loss = 20002.104, step = 142164 (12.688 sec)
INFO:tensorflow:global_step/sec: 7.89371
INFO:tensorflow:loss = 22172.037, step = 14

INFO:tensorflow:loss = 19616.34, step = 147272 (12.720 sec)
INFO:tensorflow:global_step/sec: 7.88912
INFO:tensorflow:loss = 16419.592, step = 147372 (12.676 sec)
INFO:tensorflow:global_step/sec: 7.89946
INFO:tensorflow:loss = 79779.74, step = 147472 (12.659 sec)
INFO:tensorflow:global_step/sec: 7.8923
INFO:tensorflow:loss = 27018.418, step = 147572 (12.686 sec)
INFO:tensorflow:global_step/sec: 7.8869
INFO:tensorflow:loss = 23066.62, step = 147672 (12.664 sec)
INFO:tensorflow:global_step/sec: 7.90761
INFO:tensorflow:loss = 19002.027, step = 147772 (12.646 sec)
INFO:tensorflow:global_step/sec: 7.67714
INFO:tensorflow:loss = 25298.564, step = 147872 (13.026 sec)
INFO:tensorflow:global_step/sec: 7.90121
INFO:tensorflow:loss = 23866.025, step = 147972 (12.656 sec)
INFO:tensorflow:global_step/sec: 7.90822
INFO:tensorflow:loss = 31780.85, step = 148072 (12.645 sec)
INFO:tensorflow:global_step/sec: 7.89873
INFO:tensorflow:loss = 17408.553, step = 148172 (12.660 sec)
INFO:tensorflow:global_step

INFO:tensorflow:loss = 27087.164, step = 154172 (12.631 sec)
INFO:tensorflow:global_step/sec: 7.8676
INFO:tensorflow:loss = 26094.047, step = 154272 (12.710 sec)
INFO:tensorflow:global_step/sec: 7.93153
INFO:tensorflow:loss = 17729.094, step = 154372 (12.608 sec)
INFO:tensorflow:Saving checkpoints for 154376 into ./DNN_Regressor_regions_coords_overfit\model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:global_step/sec: 6.16709
INFO:tensorflow:loss = 26947.37, step = 154472 (16.231 sec)
INFO:tensorflow:global_step/sec: 7.87881
INFO:tensorflow:loss = 24055.352, step = 154572 (12.677 sec)
INFO:tensorflow:global_step/sec: 7.9084
INFO:tensorflow:loss = 29941.926, step = 154672 (12.645 sec)
INFO:tensorflow:global_step/sec: 7.86451
INFO:tensorflow:loss = 18418.371, step = 154772 (12.715 sec)
INFO:tensorflow:global_step/sec: 7.90454
INFO:tensorflow:loss = 18246.615, step = 154872 (12.651 sec)
INFO:tensorflow:global_step/sec: 7.89831
INF

INFO:tensorflow:global_step/sec: 7.89089
INFO:tensorflow:loss = 32653.14, step = 160972 (12.673 sec)
INFO:tensorflow:global_step/sec: 7.90143
INFO:tensorflow:loss = 22701.2, step = 161072 (12.656 sec)
INFO:tensorflow:Saving checkpoints for 161080 into ./DNN_Regressor_regions_coords_overfit\model.ckpt.
INFO:tensorflow:Skip the current checkpoint eval due to throttle secs (600 secs).
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-04-16:13:24
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_Regressor_regions_coords_overfit\model.ckpt-161080
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorf

INFO:tensorflow:global_step/sec: 5.06422
INFO:tensorflow:loss = 25519.25, step = 165880 (19.746 sec)
INFO:tensorflow:global_step/sec: 7.86298
INFO:tensorflow:loss = 41180.418, step = 165980 (12.733 sec)
INFO:tensorflow:global_step/sec: 7.85542
INFO:tensorflow:loss = 28850.87, step = 166080 (12.714 sec)
INFO:tensorflow:global_step/sec: 7.82461
INFO:tensorflow:loss = 26990.598, step = 166180 (12.780 sec)
INFO:tensorflow:global_step/sec: 7.85588
INFO:tensorflow:loss = 19923.02, step = 166280 (12.731 sec)
INFO:tensorflow:global_step/sec: 7.86747
INFO:tensorflow:loss = 24791.168, step = 166380 (12.709 sec)
INFO:tensorflow:global_step/sec: 7.89194
INFO:tensorflow:loss = 27020.965, step = 166480 (12.671 sec)
INFO:tensorflow:global_step/sec: 7.8609
INFO:tensorflow:loss = 20688.254, step = 166580 (12.721 sec)
INFO:tensorflow:global_step/sec: 7.87354
INFO:tensorflow:loss = 20551.64, step = 166680 (12.716 sec)
INFO:tensorflow:global_step/sec: 7.83066
INFO:tensorflow:loss = 21468.652, step = 16678

INFO:tensorflow:loss = 23627.582, step = 172780 (12.737 sec)
INFO:tensorflow:global_step/sec: 7.8381
INFO:tensorflow:loss = 19179.17, step = 172880 (12.758 sec)
INFO:tensorflow:global_step/sec: 7.84949
INFO:tensorflow:loss = 29252.48, step = 172980 (12.740 sec)
INFO:tensorflow:global_step/sec: 7.86574
INFO:tensorflow:loss = 21446.006, step = 173080 (12.729 sec)
INFO:tensorflow:global_step/sec: 7.84331
INFO:tensorflow:loss = 16686.959, step = 173180 (12.734 sec)
INFO:tensorflow:global_step/sec: 7.87282
INFO:tensorflow:loss = 14780.836, step = 173280 (12.702 sec)
INFO:tensorflow:global_step/sec: 7.8694
INFO:tensorflow:loss = 22383.3, step = 173380 (12.707 sec)
INFO:tensorflow:global_step/sec: 7.8655
INFO:tensorflow:loss = 23731.053, step = 173480 (12.729 sec)
INFO:tensorflow:global_step/sec: 7.86927
INFO:tensorflow:loss = 18260.541, step = 173580 (12.692 sec)
INFO:tensorflow:global_step/sec: 7.88336
INFO:tensorflow:loss = 21689.844, step = 173680 (12.685 sec)
INFO:tensorflow:global_step/

In [13]:
infer = pd.read_csv('../input/test_processed.csv', index_col='key')

generator = estimator.predict(input_fn=pred_input_fn(infer))
predictions = [next(generator) for i in range(len(infer))]
values = [val['predictions'].tolist()[0] for val in predictions]

ids = infer.index
submission = pd.DataFrame()
submission['fare_amount'] = values
submission['key'] = ids
submission.set_index('key', inplace=True)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./DNN_Regressor_regions_coords_overfit\model.ckpt-16108
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [14]:
submission.head()

Unnamed: 0_level_0,fare_amount
key,Unnamed: 1_level_1
2015-01-27 13:08:24.0000002,10.670807
2015-01-27 13:08:24.0000003,11.082397
2011-10-08 11:53:44.0000002,4.456362
2012-12-01 21:12:12.0000002,8.422596
2012-12-01 21:12:12.0000003,16.601488


In [15]:
submission.to_csv('../output/12.dnn_regressor_rmse_3.01.csv')