# Prediction of Taxi Fares using Linear Regression in Tensorflow

### Import Libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

### File paths

In [26]:
train_data_file = "../Data/taxi-train.csv"
valid_data_file = "../Data/taxi-valid.csv"
test_data_file = "../Data/taxi-test.csv"

output_dir = "../Linear_output/"

In [3]:
data_cols = ['fare_amount', 'pickuplon','pickuplat','dropofflon','dropofflat','passengers', 'key']
predictors = data_cols[1:len(data_cols)-1]
response = data_cols[0]

### Training dataset

In [11]:
train_df = pd.read_csv(train_data_file, header= None, names=data_cols)

### Validation Dataset


In [12]:
valid_df = pd.read_csv(valid_data_file, header = None, names = data_cols)

### Training Input Function from Pandas

In [13]:
def make_train_input_fn(df, epochs):
    return tf.estimator.inputs.pandas_input_fn(x=df,
                                               y=df[response],
                                               batch_size=128,
                                               num_epochs=epochs,
                                               queue_capacity=1000,
                                               shuffle=True,
                                               num_threads = 1)


### Validation Input Function from Pandas

In [24]:
def make_prediction_input_fn(df, epochs):
    return tf.estimator.inputs.pandas_input_fn(x = df,
                                               y=None,
                                               num_epochs=128,
                                               num_threads=1,
                                               batch_size=128,
                                               queue_capacity=1000,
                                               shuffle=True)

### Declaring feature types

In [17]:
def make_features():
    input_cols = [tf.feature_column.numeric_column(X) for X in predictors]
    return input_cols

### Training Model

In [18]:
model = tf.estimator.LinearRegressor(feature_columns=make_features(), model_dir=output_dir)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f33c37f9590>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '../Output/', '_save_summary_steps': 100}


In [20]:
model.train(input_fn=make_train_input_fn(train_df, 10))

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ../Output/model.ckpt.
INFO:tensorflow:loss = 28491.9, step = 1
INFO:tensorflow:global_step/sec: 480.906
INFO:tensorflow:loss = 13264.0, step = 101 (0.210 sec)
INFO:tensorflow:global_step/sec: 551.643
INFO:tensorflow:loss = 14127.4, step = 201 (0.181 sec)
INFO:tensorflow:global_step/sec: 541.82
INFO:tensorflow:loss = 8193.33, step = 301 (0.185 sec)
INFO:tensorflow:global_step/sec: 548.147
INFO:tensorflow:loss = 12835.9, step = 401 (0.184 sec)
INFO:tensorflow:global_step/sec: 523.82
INFO:tensorflow:loss = 6815.94, step = 501 (0.189 sec)
INFO:tensorflow:global_step/sec: 541.659
INFO:tensorflow:loss = 15158.9, step = 601 (0.185 sec)
INFO:tensorflow:Saving checkpoints for 608 into ../Output/model.ckpt.
INFO:tensorflow:Loss for final step: 33.7485.


<tensorflow.python.estimator.canned.linear.LinearRegressor at 0x7f3405b15250>

### Function for finding Root Mean Square Error

In [22]:
def print_rmse(model, name, data):
    metrics = model.evaluate(input_fn=make_train_input_fn(train_df, 1))
    print 'RMSE on {} dataset {}'.format(name, np.sqrt(metrics['average_loss']))
print_rmse(model, 'validation', valid_df)

INFO:tensorflow:Starting evaluation at 2018-06-17-15:53:42
INFO:tensorflow:Restoring parameters from ../Output/model.ckpt-608
INFO:tensorflow:Finished evaluation at 2018-06-17-15:53:42
INFO:tensorflow:Saving dict for global step 608: average_loss = 82.4404, global_step = 608, loss = 10501.0
RMSE on validation dataset 9.07966899872


### Predictions on Validation dataset

In [25]:
predictions = model.predict(input_fn=make_prediction_input_fn(valid_df, 1))
for i in xrange(5):
    print(predictions.next())

INFO:tensorflow:Restoring parameters from ../Output/model.ckpt-608
{'predictions': array([ 10.8259964], dtype=float32)}
{'predictions': array([ 10.82671261], dtype=float32)}
{'predictions': array([ 10.82732868], dtype=float32)}
{'predictions': array([ 10.82853127], dtype=float32)}
{'predictions': array([ 10.82573891], dtype=float32)}
