In [5]:
from pyspark.ml.regression import LinearRegressionModel
from pyspark.ml.feature import VectorAssembler
import numpy as np
import googlemaps


In [None]:
# Install google maps on the Docker Python environment
# pip install googlemaps
# https://stackoverflow.com/questions/45039381/how-to-install-a-library-to-use-on-a-virtual-platform-like-docker-jupyter

In [2]:
# Call the Spark Session
from pyspark.sql import SparkSession
spark = SparkSession.builder \
    .config('spark.master', 'local[4]') \
    .appName('Test') \
    .getOrCreate()

sc = spark.sparkContext


In [25]:
def taxi(pick_x,pick_y,drop_x,drop_y,mon,week,day,hour):
    
    # Convert the called variables to float
    pick_x = float(pick_x)
    pick_y = float(pick_y)
    drop_x = float(drop_x)
    drop_y = float(drop_y)
    mon = float(mon)
    week = float(week)
    day = float(day)
    hour = float(hour)
    
    # Use the Google API KEY
    gmaps = googlemaps.Client(key='AIzaSyBvlpHn6Gtr6pEauCMMI1Ct9l5RqifmXm4')
    
    # Get the distance
    distance_matrix = gmaps.distance_matrix([str(pick_y) + " " + str(pick_x)], [str(drop_y) + " " + str(drop_x)])['rows'][0]['elements'][0]
    Trip_distance = distance_matrix['distance']['text']
    Trip_distance = Trip_distance.replace(" km", "") or Trip_distance.replace(" m", "")
    Trip_distance = float(Trip_distance)
    
    # Calculate the log distance
    log_Trip_distance = np.log(Trip_distance).round(2)
    log_Trip_distance = float(log_Trip_distance)
    
    # Save the record
    my_record= [(pick_x,pick_y,drop_x,drop_y,mon,week,day,hour,Trip_distance,log_Trip_distance)]
    columns= ['Pickup_longitude','Pickup_latitude','Dropoff_longitude','Dropoff_latitude','month','week','day','hour','Trip_distance','log_Trip_distance']    
    
    # Convert the record to Pandas Dataframe
    record = spark.createDataFrame(my_record, columns)
    
    # Call the pickles
    Trip_lr_model = LinearRegressionModel.load("modelA.model")
    Fare_lr_model = LinearRegressionModel.load("modelB.model")

    #  Vector Assembler
    va = VectorAssembler().setInputCols(columns).setOutputCol("featureVector")
    rec = va.transform(record)
    
    # Predict the Trip Duration
    predicted1 = Trip_lr_model.transform(rec)
    Duration = predicted1.select("prediction")
    
    # Rename the default prediction column name to Duration
    Duration = Duration.withColumnRenamed("prediction", "Duration")
    
    
    # Predict the Trip Fare
    predicted2 = Fare_lr_model.transform(rec)
    Fare = predicted2.select("prediction")
    
    # Rename the default prediction column name to Fare
    Fare = Fare.withColumnRenamed("prediction", "Fare")

    
    return Duration,Fare


In [26]:
Trip_Duration,Trip_Fare = taxi(-75.231,39.928,-75.227,39.93,6.0,23.0,8.0,20.0)


In [27]:
# Trip Prediction
Trip_Duration.toPandas()


Unnamed: 0,Duration
0,4.894277


In [28]:
# Fare Pridiction
Trip_Fare.toPandas()


Unnamed: 0,Fare
0,5.393852
