# Prediction of Taxi Fares using Deep Neural Nets in Tensorflow

### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

### File paths

In [10]:
train_data_file = "../Data/taxi-train.csv"
valid_data_file = "../Data/taxi-valid.csv"
test_data_file = "../Data/taxi-test.csv"

output_dir = "../DNN_output/"

In [3]:
data_cols = ['fare_amount', 'pickuplon','pickuplat','dropofflon','dropofflat','passengers', 'key']
predictors = data_cols[1:len(data_cols)-1]
response = data_cols[0]

### Training dataset

In [4]:
train_df = pd.read_csv(train_data_file, header= None, names=data_cols)

### Validation dataset

In [None]:
valid_df = pd.read_csv(valid_data_file, header = None, names = data_cols)

### Training Input Function from Pandas

In [6]:
def make_train_input_fn(df, epochs):
    return tf.estimator.inputs.pandas_input_fn(x=df,
                                               y=df[response],
                                               batch_size=128,
                                               num_epochs=epochs,
                                               queue_capacity=1000,
                                               shuffle=True,
                                               num_threads = 1)


### Validation Input Function from Pandas

In [7]:
def make_prediction_input_fn(df, epochs):
    return tf.estimator.inputs.pandas_input_fn(x = df,
                                               y=None,
                                               num_epochs=128,
                                               num_threads=1,
                                               batch_size=128,
                                               queue_capacity=1000,
                                               shuffle=True)

### Declaring feature types

In [8]:
def make_features():
    input_cols = [tf.feature_column.numeric_column(X) for X in predictors]
    return input_cols

### Training Model

In [11]:
model = tf.estimator.DNNRegressor(feature_columns=make_features(), hidden_units=[32,8,2],model_dir=output_dir)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd79f60af50>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '../DNN_output/', '_save_summary_steps': 100}


In [13]:
model.train(input_fn=make_train_input_fn(train_df, 100))

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from ../DNN_output/model.ckpt-608
INFO:tensorflow:Saving checkpoints for 609 into ../DNN_output/model.ckpt.
INFO:tensorflow:loss = 12173.8, step = 609
INFO:tensorflow:global_step/sec: 447.301
INFO:tensorflow:loss = 7838.37, step = 709 (0.225 sec)
INFO:tensorflow:global_step/sec: 498.716
INFO:tensorflow:loss = 9793.85, step = 809 (0.203 sec)
INFO:tensorflow:global_step/sec: 496.571
INFO:tensorflow:loss = 9389.92, step = 909 (0.201 sec)
INFO:tensorflow:global_step/sec: 500.716
INFO:tensorflow:loss = 10946.2, step = 1009 (0.200 sec)
INFO:tensorflow:global_step/sec: 462.116
INFO:tensorflow:loss = 9935.36, step = 1109 (0.214 sec)
INFO:tensorflow:global_step/sec: 463.45
INFO:tensorflow:loss = 9455.61, step = 1209 (0.216 sec)
INFO:tensorflow:global_step/sec: 502.467
INFO:tensorflow:loss = 11522.0, step = 1309 (0.199 sec)
INFO:tensorflow:global_step/sec: 488.074
INFO:tensorflow:loss = 11533.9, step = 1409 (0.206 s

<tensorflow.python.estimator.canned.dnn.DNNRegressor at 0x7fd79f60aed0>

### Function for finding Root Mean Square Error

In [17]:
def print_rmse(model, name, df):
    metrics = model.evaluate(input_fn=make_train_input_fn(df, 1))
    print 'RMSE on {} dataset {}'.format(name, np.sqrt(metrics['average_loss']))

In [18]:
print_rmse(model, 'validation', valid_df)

INFO:tensorflow:Starting evaluation at 2018-06-17-16:02:53
INFO:tensorflow:Restoring parameters from ../DNN_output/model.ckpt-6679
INFO:tensorflow:Finished evaluation at 2018-06-17-16:02:53
INFO:tensorflow:Saving dict for global step 6679: average_loss = 109.371, global_step = 6679, loss = 13007.3
RMSE on validation dataset 10.4580440521
