# Create Prediction Model

## Import Modules


In [52]:
import keras

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import seaborn as sns

from typing import Tuple


### Load the data

In [53]:
PROCESSED_DATA_PATH = ("/Users/lancehester/Documents/ml_predict_taxi_fares/data/clean_taxi_data.csv")

In [54]:
# Load the marketing data as a pandas dataframe for ease of use
fare_df = pd.read_csv(PROCESSED_DATA_PATH)

---

## Training a Linear Regression Model

Regression is a supervised machine learning process.  It is similar to classification, but rather than predicting a label, you try to predict a continuous value.   Linear regression defines the relationship between a target variable (y) and a set of predictive features (x).  Simply stated, If you need to predict a number, then use regression.


Here is where I build, train, and evaluate my regression model.

---
## Build Functions to View Regressin Model Information

Here I want to build helper functions (methods) to visualize what is happening during each training run. I generate two plots:

* A loss curve
* A scatter plot of the `features` vs `the label (target - fare price)` with a line showing the output of the trained model


---

In [55]:
# Method to make the plots

def make_plots(
    df: pd.DataFrame,
    feature_names: list[str],
    label_name: str,
    model_output: keras.Model,
    sample_size: int = 200
) -> None:
    """
    Helper Method that initiats the plotting of two figures: Loss Curves and Scatter Plots

    Args:
        df (pd.DataFrame): Pandas dataframe of taxi fare data
        feature_names (list[str]): List of the two feature names from the data frame used to generate a scatter plot
        label_name (str): The name of the target variable for the regression model
        model_output (keras.Model): The generated prediction model
        sample_size (int): Integer number used to randomly sample values from the data frame. 

    Returns:
        (None): Matplotlib image out of the price sales data inputs.

    """
    random_sample = df.sample(n=sample_size).copy()
    random_sample.reset_index()

    weights, bias, epochs, rmse = model_output

    is_2d_plot = len(feature_names) == 1
    model_plot_type = "scatter" if is_2d_plot else "surface"

    fig = make_subplots(
        rows = 1,
        cols = 2,
        subplot_titles = ["Loss Curve", "Model Plot"],
        specs = [[{"type": "scatter"}, {"type": model_plot_type}]]
    )

    plot_data(random_sample, feature_names, label_name, fig)
    plot_model(random_sample, feature_names, weights, bias, fig)
    plot_loss_curve(epochs, rmse, fig)

    fig.show()
    return

In [56]:
def plot_loss_curve(
    epochs: np.ndarray,
    rmse: np.ndarray,
    fig: go.Figure
) -> go.Figure:
    """
    Helper Method that plots model loss curves

    Args:
        epochs (np.ndarray, np.ndarray): Array of epochs
        rmse (np.ndarray, np.ndarray): Array of rmse values at each epoch
        fig (go.Figure): The plotly figure

    Returns:
        (go.Figure): Plotly figure

    """
    curve = px.line(x=epochs, y=rmse)
    curve.update_traces(line_color='#ff0000', line_width=3)

    fig.append_trace(curve.data[0], row=1, col=1)
    fig.update_xaxes(title_text="Epoch", row=1, col=1)
    fig.update_yaxes(title_text="Root Mean Squared Error", row=1, col=1, range=[rmse.min()*0.8, rmse.max()])

    return

In [57]:
def plot_data(
    df: pd.DataFrame,
    feature: list[str],
    label: str,
    fig: go.Figure
) -> go.Figure:
  """
  Helper Method that plots the scatter plot data

  Args:
      df (pd.DataFrame): Pandas dataframe of taxi fare data
      feature (list[str]): List of the two feature names from the data frame used to generate a scatter plot
      label (str): The name of the target variable for the regression model
      fig (go.Figure): Plotly figure

  Returns:
      (go.Figure): Plotly figure

  """
  if len(features) == 1:
      scatter = px.scatter(df, x=features[0], y=label)
  else:
    scatter = px.scatter_3d(df, x=features[0], y=features[1], z=label)

  fig.append_trace(scatter.data[0], row=1, col=2)
  if len(features) == 1:
    fig.update_xaxes(title_text=features[0], row=1, col=2)
    fig.update_yaxes(title_text=label, row=1, col=2)
  else:
    fig.update_layout(scene1=dict(xaxis_title=features[0], yaxis_title=features[1], zaxis_title=label))
  return

In [58]:
def plot_model(
    df: pd.DataFrame,
    features: list[str],
    weights: np.ndarray,
    bias: np.ndarray,
    fig: go.Figure
) -> go.Figure:
    """
    Helper Method that plots the scatter plot data

    Args:
        df (pd.DataFrame): Pandas dataframe of taxi fare data
        feature (list[str]): List of the two feature names from the data frame used to generate a scatter plot
        weights (np.ndarray): Numpy array of computed weight values from model
        bias (np.ndarray): Numpy array of computed bias values from model
        fig (go.Figure): Plotly figure

    Returns:
        (go.Figure): Plotly figure

    """
    df["fare_predicted"] = bias[0]

    for index, feature in enumerate(features):
        df["fare_predicted"] = df["fare_predicted"] + weights[index][0] * df[feature]

    if len(features) == 1:
        model = px.line(df, x=features[0], y="fare_predicted")
        model.update_traces(line_color='#ff0000', line_width=3)
    else:
        z_name, y_name = "fare_predicted", features[1]
        z = [df[z_name].min(), (df[z_name].max() - df[z_name].min()) / 2, df[z_name].max()]
        y = [df[y_name].min(), (df[y_name].max() - df[y_name].min()) / 2, df[y_name].max()]
        x = []
        for i in range(len(y)):
            x.append((z[i] - weights[1][0] * y[i] - bias[0]) / weights[0][0])

        plane=pd.DataFrame({'x':x, 'y':y, 'z':[z] * 3})

        light_yellow = [[0, '#89CFF0'], [1, '#FFDB58']]
        model = go.Figure(data=go.Surface(x=plane['x'], y=plane['y'], z=plane['z'],
                                            colorscale=light_yellow))

    fig.add_trace(model.data[0], row=1, col=2)

    return

In [59]:
def model_info(
    feature_names: list[str],
    label_name: str,
    model_output: keras.Model,
) -> str:

    """
    Get Model Info

    Args:
        feature_names (list[str]): List of the two feature names from the data frame
        label_name (str): The target/label name
        model_output (keras.Model): Keras Model
    Returns:
        (str): Output of model information

    """
    weights = model_output[0]
    bias = model_output[1]

    nl = "\n"
    header = "-" * 80
    banner = header + nl + "|" + "MODEL INFO".center(78) + "|" + nl + header

    info = ""
    equation = label_name + " = "

    for index, feature in enumerate(feature_names):
        info = info + "Weight for feature[{}]: {:.3f}\n".format(feature, weights[index][0])
        equation = equation + "{:.3f} * {} + ".format(weights[index][0], feature)

    info = info + "Bias: {:.3f}\n".format(bias[0])
    equation = equation + "{:.3f}\n".format(bias[0])

    return banner + nl + info + nl + equation

---
## Build Functions to Create the Prediction Model
Here I want to build functions (methods) to build the Keras Linear Regression Model.


---

In [60]:
def build_model(my_learning_rate: float, num_features: int) -> keras.Model:
    """
    Method to Create and compile a simple linear regression model.

    Args:
        my_learning_rate (float): The learning rate of the model
        num_features (int): The number of features
        model_output (keras.Model): Keras Model
    Returns:
        (keras.Model): The prediction model
    """
    # Describe the topography of the model.
    # The topography of a simple linear regression model
    # is a single node in a single layer.
    inputs = keras.Input(shape=(num_features,))
    outputs = keras.layers.Dense(units=1)(inputs)
    model = keras.Model(inputs=inputs, outputs=outputs)

    # Compile the model topography into code that Keras can efficiently
    # execute. Configure training to minimize the model's mean squared error.
    model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=my_learning_rate),
                loss="mean_squared_error",
                metrics=[keras.metrics.RootMeanSquaredError()])

    return model

In [61]:
def train_model(
  model: keras.Model, 
  df: pd.DataFrame, 
  features: list[str], 
  label: str,
  epochs: np.ndarray, 
  batch_size: int
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
  """
  Method to "Train the model by feeding it data

  Args:
    model (keras.Model): The derived prediction model
    df (pd.DataFrame): 
    features (list[str]): The features used to build the model
    label (str): The label or target variables
    epochs (np.ndarray): Array representing epochs
    batch_size (int): Integer defining the number of batches used for training models

  Returns:
      (Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]): The prediction model key outputs
  """

  # Feed the model the feature and the label.
  # The model will train for the specified number of epochs.
  # input_x = df.iloc[:,1:3].values
  # df[feature]
  history = model.fit(x=features,
                      y=label,
                      batch_size=batch_size,
                      epochs=epochs)

  # Gather the trained model's weight and bias.
  trained_weight = model.get_weights()[0]
  trained_bias = model.get_weights()[1]

  # The list of epochs is stored separately from the rest of history.
  epochs = history.epoch

  # Isolate the error for each epoch.
  hist = pd.DataFrame(history.history)

  # To track the progression of training, we're going to take a snapshot
  # of the model's root mean squared error at each epoch.
  rmse = hist["root_mean_squared_error"]

  return trained_weight, trained_bias, epochs, rmse

In [62]:
def run_experiment(
    df: pd.DataFrame, 
    feature_names: list[str], 
    label_name: (str), 
    learning_rate: float, 
    epochs: np.ndarray, 
    batch_size: int
    ) -> keras.Model:

  """
    Method to "Train the model by feeding it data

    Args:
      df (pd.DataFrame): The taxi fare data frame of feature and values
      feature_names (list[str]): The features used to build the model
      label (str): The label or target variables
      learning_rate (float): The learning rate for the keras model
      epochs (np.ndarray): Array representing epochs
      batch_size (int): Integer defining the number of batches used for training models

    Returns:
        (keras.Model): The prediction model
  """
  print('INFO: starting training experiment with features={} and label={}\n'.format(feature_names, label_name))

  num_features = len(feature_names)

  features = df.loc[:, feature_names].values
  label = df[label_name].values

  model = build_model(learning_rate, num_features)
  model_output = train_model(model, df, features, label, epochs, batch_size)

  print("\nSUCCESS: training experiment complete\n")

  print("{}".format(model_info(feature_names, label_name, model_output)))
  
  make_plots(df, feature_names, label_name, model_output)

  return model

---
## Train a model with one feature - Building the model a feature at a time.

First, I train the model to predict the cost of the fare using a **single feature**. 

Earlier, I saw that `trip_miles` (distance) correlates most strongly with the ``fare``, so let's start with `trip_miles` as the feature for your first training run.


* I took WWW epochs to converge on the final model?
* The model fit the sample data

The root mean square error (RMSE) in the output has units in (dollars) same as the label.

So, I can use the RMSE to determine how far off, on average, the predicted fares are in dollars from the observed values.

In [63]:
# The following variables are the hyperparameters.
learning_rate = 0.001
epochs = 20
batch_size = 50

# Specify the feature and the label.
features = ["trip_miles"]
label = "fare"

model_1 = run_experiment(fare_df, features, label, learning_rate, epochs, batch_size)

INFO: starting training experiment with features=['trip_miles'] and label=fare

Epoch 1/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256us/step - loss: 642.8674 - root_mean_squared_error: 25.3366
Epoch 2/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257us/step - loss: 333.7891 - root_mean_squared_error: 18.2427
Epoch 3/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 245us/step - loss: 128.5249 - root_mean_squared_error: 11.3033
Epoch 4/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246us/step - loss: 29.5891 - root_mean_squared_error: 5.4196
Epoch 5/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241us/step - loss: 16.9769 - root_mean_squared_error: 4.1149
Epoch 6/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240us/step - loss: 15.5384 - root_mean_squared_error: 3.9403
Epoch 7/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239us/ste

## Results of the first trial with only `trip_miles`

* With this set of hyperparameters
    * Learning Rate = 0.001
    * Epochs = 20
    * batch size Equal to 50 

The loss appears to level off at about 5 Epochs for the training data, so we can say it start so converge to the final model at that point.

In the scatter plot of fare vs trip_miles the derived model appears to fit the data fairly well. 


# Despite these results it is worth checking to see if I can tweak the hyperparameters a bit more and improve performance.


# Second Experiment - Adjusting the Hyperparameters - Increasing the Learning Rate

* With this set of hyperparameters
    * Learning Rate = 1.0
    * Epochs = 20
    * batch size Equal to 50 

In [64]:
# The following variables are the hyperparameters.
learning_rate = 1.0
epochs = 20
batch_size = 50

# Specify the feature and the label.
features = ["trip_miles"]
label = "fare"

model_1 = run_experiment(fare_df, features, label, learning_rate, epochs, batch_size)

INFO: starting training experiment with features=['trip_miles'] and label=fare

Epoch 1/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251us/step - loss: 52.1202 - root_mean_squared_error: 7.1719
Epoch 2/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - loss: 48.6708 - root_mean_squared_error: 6.9656
Epoch 3/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233us/step - loss: 49.0503 - root_mean_squared_error: 6.9969
Epoch 4/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232us/step - loss: 48.4130 - root_mean_squared_error: 6.9513
Epoch 5/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235us/step - loss: 48.5981 - root_mean_squared_error: 6.9674
Epoch 6/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233us/step - loss: 48.4646 - root_mean_squared_error: 6.9579
Epoch 7/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253us/step - lo

## Results of the Second trial with only `trip_miles` - Increasing learning rate to 1.0

* With this set of hyperparameters
    * Learning Rate = 1.0
    * Epochs = 20
    * batch size Equal to 50 

In neural networks, the learning rate is a multiplier that controls the degree to which each backward pass increases or decreases each weight. A large learning rate will increase or decrease each weight more than a small learning rate.

The issue with a large learning rate is that it can slow down model training because it can cause large fluctuations in weight values which in turn increase the amount of time for the model to converge. 

In this example, I see that the loss rate curve bounces around and does not appear to converge with each iteration. Additionally, the eventual predicted model does not fit the data very well. I am not getting good results. I will not use this model.

# Third Experiment - Adjusting the Hyperparameters - Decreasing the learning rate

* With this set of hyperparameters
    * Learning Rate = 0.0001
    * Epochs = 20
    * batch size Equal to 50 

In [65]:
# The following variables are the hyperparameters.
learning_rate = 0.0001
epochs = 20
batch_size = 50

# Specify the feature and the label.
features = ["trip_miles"]
label = "fare"

model_1 = run_experiment(fare_df, features, label, learning_rate, epochs, batch_size)

INFO: starting training experiment with features=['trip_miles'] and label=fare

Epoch 1/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253us/step - loss: 187.1078 - root_mean_squared_error: 13.6774
Epoch 2/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241us/step - loss: 170.3092 - root_mean_squared_error: 13.0480
Epoch 3/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240us/step - loss: 149.0627 - root_mean_squared_error: 12.2086
Epoch 4/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235us/step - loss: 135.3034 - root_mean_squared_error: 11.6305
Epoch 5/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - loss: 119.3922 - root_mean_squared_error: 10.9260
Epoch 6/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - loss: 107.5349 - root_mean_squared_error: 10.3667
Epoch 7/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 255

## Results of the Third Experiment with Only `trip_miles` - Decreasing the Learning rate to 0.0001

* With this set of hyperparameters
    * Learning Rate = 0.0001
    * Epochs = 20
    * batch size Equal to 50 

In neural networks, the learning rate is a multiplier that controls the degree to which each backward pass increases or decreases each weight. A small learning rate will increase or decrease each weight less than a large learning rate.

The issue with a samll learning rate is that it can slow down model training because it can cause small adjusts in weight values which in turn increase the amount of time for the model to converge. 

In this example, I see that the loss curve decreases slowly, but does not show a dramatic drop or leveling off. In fact, the last lost rate is greater than when I used a learning rate of 0.001. 

Additionally,the eventual predicted model does not fit the data very well. I am not getting good results. I will not use this model.

### Note
With a small learning rate, I could increase the number of epochs so that model eventually converges, but it will take more time. 

# Fourth Experiment - Adjusting the Hyperparameters - Increasing the Batch Size to 500 and Using the Original Learning Rate - 0.001

* With this set of hyperparameters
    * Learning Rate = 0.001
    * Epochs = 20
    * batch size Equal to 500

In [66]:
# The following variables are the hyperparameters.
learning_rate = 0.001
epochs = 20
batch_size = 500

# Specify the feature and the label.
features = ["trip_miles"]
label = "fare"

model_1 = run_experiment(fare_df, features, label, learning_rate, epochs, batch_size)

INFO: starting training experiment with features=['trip_miles'] and label=fare

Epoch 1/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 422us/step - loss: 250.5904 - root_mean_squared_error: 15.8296
Epoch 2/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370us/step - loss: 229.7834 - root_mean_squared_error: 15.1575
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 338us/step - loss: 208.2584 - root_mean_squared_error: 14.4306
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 341us/step - loss: 185.0431 - root_mean_squared_error: 13.6027
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332us/step - loss: 167.1097 - root_mean_squared_error: 12.9268
Epoch 6/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 342us/step - loss: 150.3741 - root_mean_squared_error: 12.2624
Epoch 7/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355us/step - loss

## Results of the Fourth Experiment with Only `trip_miles` - Increasing the Batch Size to 500 and Using the Original Learning Rate - 0.001

* With this set of hyperparameters
    * Learning Rate = 0.001
    * Epochs = 20
    * batch size Equal to 500 

In neural networks, increasing the batch size makes each epoch run faster, but as with the smaller learning rate, the model does not converge with just 20 epochs.

The issue with increasing batch size is that it can slow down model training because it can cause adjusts in weight values which in turn increase the amount of time for the model to converge because each epoch is using more and more data. 

In this example, I see that the loss curve decreases slowly, but does not show a dramatic drop or leveling off. In fact, the last lost rate is greater than when I used a learning rate of 0.001 and a batch size of 50

Additionally,the eventual predicted model does not fit the data very well. I am not getting good results. I will not use this model.


----

## Now I can add more features

The cross correlation implied that trip_seconds had a strong positive correlation with fare prices, so I will consider this feature.


The guidance in the class is to convert trip_seconds into minutes. 

When training a model with more than one feature, it is important that all numeric values are roughly on the same scale. In this case, TRIP_SECONDS and
TRIP_MILES do not meet this criteria. The mean value for TRIP_MILES is 8.3 and the mean for TRIP_SECONDS is 1320; that is two orders of magnitude difference.

Converting the trip duration to minutes helps during training because in puts values for both features on a more comparable scale. Of course, this is not the only way to scale values before training

So, I will use this recommendation to create the new feature and compare 

* trip_minutes
* trip_miles 

with fare prices



----

# Experiement: Multiple features - Trip_Miles and Trip_Minutes 




In [67]:
learning_rate = 0.001
epochs = 20
batch_size = 50

fare_df.loc[:, "trip_minutes"] = fare_df["trip_seconds"]/60

features = ["trip_miles", "trip_minutes"]
label = "fare"

model_2 = run_experiment(fare_df, features, label, learning_rate, epochs, batch_size)

INFO: starting training experiment with features=['trip_miles', 'trip_minutes'] and label=fare

Epoch 1/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 251us/step - loss: 1544.7292 - root_mean_squared_error: 39.1679
Epoch 2/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240us/step - loss: 316.5458 - root_mean_squared_error: 17.6715
Epoch 3/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - loss: 82.4185 - root_mean_squared_error: 9.0688
Epoch 4/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 241us/step - loss: 53.0376 - root_mean_squared_error: 7.2749
Epoch 5/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 239us/step - loss: 31.8917 - root_mean_squared_error: 5.6425
Epoch 6/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238us/step - loss: 19.9056 - root_mean_squared_error: 4.4476
Epoch 7/20
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

# Results: Experiement: Multiple features - Trip_Miles and Trip_Minutes

This new model performs better!

The loss appears to level off at about 3 Epochs for the training data, so we can say it start so converge to the final model at that point.

In the scatter plot of fare vs trip_miles vs trip_minutes, the derived model appears to fit the data fairly well. 



## Final results
The model comes close to the ground truth model documenated Chicago Taxi Cab usage forumla.


```FARE = 2.25 * trip_miles + 0.12 * trip_minutes + 3.25```


`My model` has the following form:

```fare = 2.03 * trip_miles + 0.15 * trip_minutes + 3.84```



---

# Last Part - Validating The Model


---


### Making predictions using my derived model and the following methods


"""
    Method to "Train the model by feeding it data

    Args:
      df (pd.DataFrame): The taxi fare data frame of feature and values
      feature_names (list[str]): The features used to build the model
      label (str): The label or target variables
      learning_rate (float): The learning rate for the keras model
      epochs (np.ndarray): Array representing epochs
      batch_size (int): Integer defining the number of batches used for training models

    Returns:
        (keras.Model): The prediction model
  """

In [68]:
def format_currency(x: float) -> str:
    """
    Method to formate the dollar values

    Args:
        x (float): dollar values

    Returns:
        (str): Dollar values
    """
    return "${:.2f}".format(x)

def build_batch(df: pd.DataFrame, batch_size: int) -> int:
    """
    Method to build batchs from the data frame

    Args:
        df (pd.DataFrame): The taxi fare data frame of feature and values
        batch_size (int): The batch size

    Returns:
        (str): Dollar values
    """
    batch = df.sample(n=batch_size).copy()
    batch.set_index(np.arange(batch_size), inplace=True)
    return batch

def predict_fare(model: keras.Model, df: pd.DataFrame, features: list[str], label: str, batch_size: int = 50) -> pd.DataFrame:
    """
    Method to predict fare values

    Args:
        model (kearas.Model): The keras data model derived in this work
        df (pd.DataFrame): The taxi fare data frame of feature and values
        feature (list[str]): The features used to build the model
        label (str): The label or target variables
        batch_size (int): The batch size

    Returns:
        (pd.DataFrame): Pandas Dataframe of predicted_fare, observed_fare, L1_loss, trip_miles, and trip_minutes
    """
    batch = build_batch(df, batch_size)
    predicted_values = model.predict_on_batch(x=batch.loc[:, features].values)

    data = {"predicted_fare": [], "observed_loss": [], "L1_loss": [],
            features[0]: [], features[1]: []}
    for i in range(batch_size):
        predicted = predicted_values[i][0]
        observed = batch.at[i, label]
        data["predicted_fare"].append(format_currency(predicted))
        data["observed_loss"].append(format_currency(observed))
        data["L1_loss"].append(format_currency(abs(observed - predicted)))
        data[features[0]].append(batch.at[i, features[0]])
        data[features[1]].append("{:.2f}".format(batch.at[i, features[1]]))

    output_df = pd.DataFrame(data)
    return output_df

def show_predictions(output: pd.DataFrame) -> str:
    """
    Method to show predictions

    Args:
        output (pd.DataFrame): Pandas Dataframe of predicted_fare, observed_fare, L1_loss, trip_miles, and trip_minutes
    
    Returns:
        (str): printing dataframe of predicted_fare, observed_fare, L1_loss, trip_miles, and trip_minutes
    """
    header = "-" * 80
    banner = header + "\n" + "|" + "PREDICTIONS".center(78) + "|" + "\n" + header
    print(banner)
    print(output)
    return

### Make Predictions

In [69]:
features = ["trip_miles", "trip_minutes"]
label = "fare"

output = predict_fare(model_2, fare_df, features, label)
show_predictions(output)

--------------------------------------------------------------------------------
|                                 PREDICTIONS                                  |
--------------------------------------------------------------------------------
   predicted_fare observed_loss L1_loss  trip_miles trip_minutes
0           $7.29         $6.50   $0.79        1.20         7.00
1           $8.09         $8.25   $0.16        1.34        10.53
2          $24.30        $25.50   $1.20        7.20        39.85
3           $7.68         $7.25   $0.43        1.34         7.75
4          $32.30        $32.25   $0.05       12.51        21.43
5          $14.07        $15.00   $0.93        3.71        18.52
6          $46.77        $44.50   $2.27       17.61        49.42
7          $45.36        $44.00   $1.36       17.37        43.22
8           $7.29         $7.00   $0.29        1.20         7.00
9           $6.26         $5.50   $0.76        0.86         4.68
10          $7.38         $7.00   $0.38   