In [None]:
!pip install bayesian-optimization

In [2]:
import numpy as np
import pandas as pd
from google.colab import drive

In [3]:
# Read the CSV file from My Google Drive
drive.mount('/content/drive')
df = pd.read_csv('/content/drive/My Drive/Transportation_Network_Providers_-_Trips_2.csv')

Mounted at /content/drive


  df = pd.read_csv('/content/drive/My Drive/Transportation_Network_Providers_-_Trips_2.csv')


In [None]:
df.head()

In [5]:
df.shape

(280657, 21)

In [6]:
df.dtypes

Trip ID                        object
Trip Start Timestamp           object
Trip End Timestamp             object
Trip Seconds                  float64
Trip Miles                    float64
Pickup Census Tract           float64
Dropoff Census Tract          float64
Pickup Community Area         float64
Dropoff Community Area        float64
Fare                          float64
Tip                           float64
Additional Charges            float64
Trip Total                    float64
Shared Trip Authorized         object
Trips Pooled                  float64
Pickup Centroid Latitude      float64
Pickup Centroid Longitude     float64
Pickup Centroid Location       object
Dropoff Centroid Latitude     float64
Dropoff Centroid Longitude    float64
Dropoff Centroid Location      object
dtype: object

In [7]:
df.drop_duplicates(inplace=True)

In [8]:
df.shape

(280657, 21)

In [9]:
df.dropna(inplace=True)

In [10]:
df.shape

(198766, 21)

In [11]:
shared_trips = df['Shared Trip Authorized'].value_counts()
print(shared_trips)

False    167648
True      31118
Name: Shared Trip Authorized, dtype: int64


In [12]:
# compare the trip duration and distance of shared trips versus non-shared trips
shared_trips_duration = df.loc[df['Shared Trip Authorized'] == True, 'Trip Seconds'].mean()
print(f"The average duration of shared trips is:{shared_trips_duration: .2f} seconds.")
non_shared_trips_duration = df.loc[df['Shared Trip Authorized'] == False, 'Trip Seconds'].mean()
print(f"The average duration of non_shared trips is:{non_shared_trips_duration: .2f} seconds")

shared_trips_distance = df.loc[df['Shared Trip Authorized'] == True, 'Trip Miles'].mean()
print(f"The average distance of shared trips is:{shared_trips_distance: .2f} Miles")
non_shared_trips_distance = df.loc[df['Shared Trip Authorized'] == False, 'Trip Miles'].mean()
print(f"The average distance of non_shared trips is:{non_shared_trips_distance: .2f} Miles")

The average duration of shared trips is: 1156.53 seconds.
The average duration of non_shared trips is: 928.87 seconds
The average distance of shared trips is: 4.88 Miles
The average distance of non_shared trips is: 4.37 Miles


In [13]:
# Modify the Dataset

# Assume that the "Trip Start Timestamp" column contains date and time strings
df["Start Timestamp"] = pd.to_datetime(df["Trip Start Timestamp"])

# Print the modified dataframe
print(df.dtypes)

Trip ID                               object
Trip Start Timestamp                  object
Trip End Timestamp                    object
Trip Seconds                         float64
Trip Miles                           float64
Pickup Census Tract                  float64
Dropoff Census Tract                 float64
Pickup Community Area                float64
Dropoff Community Area               float64
Fare                                 float64
Tip                                  float64
Additional Charges                   float64
Trip Total                           float64
Shared Trip Authorized                object
Trips Pooled                         float64
Pickup Centroid Latitude             float64
Pickup Centroid Longitude            float64
Pickup Centroid Location              object
Dropoff Centroid Latitude            float64
Dropoff Centroid Longitude           float64
Dropoff Centroid Location             object
Start Timestamp               datetime64[ns]
dtype: obj

In [14]:
# Assume that the "Trip End Timestamp" column contains date and time strings
df["End Timestamp"] = pd.to_datetime(df["Trip End Timestamp"])

# Drop the original "Trip End Timestamp" column
df.drop("Trip End Timestamp", axis=1, inplace=True)

# Print the modified dataframe
print(df.head())

                                    Trip ID    Trip Start Timestamp  \
2  09e6dc28f00a710926385c004c9ef0099671443b  09/17/2019 10:45:00 PM   
3  2c632a97287a94d8d9093e6082f83307435d80a6  11/29/2018 03:00:00 PM   
4  09e6dc7bcc037a2553310b6f6dbc64ab32cddf65  08/06/2019 12:00:00 PM   
5  2c632f3d748c49e9a6aa226fb97f492c7ed93d3a  12/13/2018 06:15:00 PM   
7  2c632f7236325d19bbe558b59148ed5aaf034357  12/14/2018 01:15:00 AM   

   Trip Seconds  Trip Miles  Pickup Census Tract  Dropoff Census Tract  \
2         268.0         1.1         1.703124e+10          1.703124e+10   
3         596.0         2.5         1.703128e+10          1.703108e+10   
4        1266.0         3.7         1.703124e+10          1.703132e+10   
5        1147.0         4.3         1.703184e+10          1.703124e+10   
7         346.0         1.4         1.703107e+10          1.703107e+10   

   Pickup Community Area  Dropoff Community Area  Fare  Tip  ...  \
2                   24.0                    24.0   5.0  0.0 

In [15]:
# extract the hour from the "Start Timestamp" column and convert it to an integer
df["Start hour"] = df["Start Timestamp"].dt.hour.astype(int)

# extract the hour from the "End Timestamp" column and convert it to an integer
df["End hour"] = df["End Timestamp"].dt.hour.astype(int)

In [18]:
# Modify the data to select the required columns and delete the others

# Drop "Trip ID" column
df.drop("Trip ID", axis=1, inplace=True)

# Drop "Pickup Census Tract" column
df.drop("Pickup Census Tract", axis=1, inplace=True)

# Drop "Dropoff Census Tract" column
df.drop("Dropoff Census Tract", axis=1, inplace=True)

# Drop "Pickup Centroid Latitude" column
df.drop("Pickup Centroid Latitude", axis=1, inplace=True)

# Drop "Pickup Centroid Longitude" column
df.drop("Pickup Centroid Longitude", axis=1, inplace=True)

# Drop "Pickup Centroid Location" column
df.drop("Pickup Centroid Location", axis=1, inplace=True)

# Drop "Dropoff Centroid Latitude" column
df.drop("Dropoff Centroid Latitude", axis=1, inplace=True)

# Drop "Dropoff Centroid Longitude" column
df.drop("Dropoff Centroid Longitude", axis=1, inplace=True)

# Drop "Dropoff Centroid Location" column
df.drop("Dropoff Centroid Location", axis=1, inplace=True)


In [19]:
# Drop "Start Timestamp" column
df.drop("Start Timestamp", axis=1, inplace=True)

# Drop "End Timestamp" column
df.drop("End Timestamp", axis=1, inplace=True)

In [20]:
df.dtypes


Trip Seconds              float64
Trip Miles                float64
Pickup Community Area     float64
Dropoff Community Area    float64
Fare                      float64
Tip                       float64
Additional Charges        float64
Trip Total                float64
Shared Trip Authorized     object
Trips Pooled              float64
Start hour                  int64
End hour                    int64
dtype: object

In [21]:
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from keras.models import Sequential
from keras.layers import Dense

In [40]:
# Define the dataset
df['Shared Trip Authorized'] = df['Shared Trip Authorized'].astype(int)


In [41]:
# get the indices of True and False values separately
true_indices = df[df["Shared Trip Authorized"] == 1].index
false_indices = df[df["Shared Trip Authorized"] == 0].index

# randomly sample an equal number of True and False values
num_samples = min(len(true_indices), len(false_indices))
true_samples = df.loc[true_indices].sample(n=num_samples)
false_samples = df.loc[false_indices].sample(n=num_samples)

# concatenate the sampled True and False values into a single dataframe
selected_data = pd.concat([true_samples, false_samples])

In [42]:
# Select the required data for model
X = selected_data.loc[:, ['Trip Seconds', 'Trip Miles', 'Pickup Community Area', 'Dropoff Community Area', 'Fare','Start hour']]
y = selected_data.loc[:, 'Shared Trip Authorized']

print(X.shape)
print(X.dtypes)
print(y.shape)
print(y.dtypes)

(62236, 6)
Trip Seconds              float64
Trip Miles                float64
Pickup Community Area     float64
Dropoff Community Area    float64
Fare                      float64
Start hour                  int64
dtype: object
(62236,)
int64


In [43]:
# Split the data into training, testing, and cross validation sets
X_train, X_test_val, y_train, y_test_val = train_test_split(X, y, test_size=0.2, random_state=42)

X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=0.5, random_state=42)

print(X_train.shape)
print(X_val.shape)
print(X_test.shape)


(49788, 6)
(6224, 6)
(6224, 6)


In [172]:
# Define the model architecture 1
model_1 = Sequential()
model_1.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_1.add(Dense(32, activation='relu'))
model_1.add(Dense(1, activation='sigmoid'))

In [173]:
# Define the model architecture 2
model_2 = Sequential()
model_2.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model_2.add(Dense(64, activation='relu'))
model_2.add(Dense(32, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

In [174]:
# Define the model architecture 3
model_3 = Sequential()
model_3.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_3.add(Dense(64, activation='relu'))
model_3.add(Dense(64, activation='relu'))
model_3.add(Dense(32, activation='relu'))
model_3.add(Dense(1, activation='sigmoid'))

In [175]:
# Define the model architecture 4
model_4 = Sequential()
model_4.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_4.add(Dense(64, activation='relu'))
model_4.add(Dense(32, activation='relu'))
model_4.add(Dense(32, activation='relu'))
model_4.add(Dense(16, activation='relu'))
model_4.add(Dense(1, activation='sigmoid'))

In [176]:
# Define the model architecture 5
hidden_units = 100


model_5 = Sequential()
model_5.add(Dense(hidden_units, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=keras.regularizers.l2(0)))
model_5.add(Dense(hidden_units, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0)))
model_5.add(Dense(hidden_units, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0)))
model_5.add(Dense(hidden_units, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0)))
model_5.add(Dense(hidden_units, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0)))
model_5.add(Dense(hidden_units, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0)))
model_5.add(Dense(1, activation='sigmoid', kernel_regularizer=keras.regularizers.l2(0.0)))

In [177]:
# Define the model architecture 6
model_6 = Sequential()
model_6.add(Dense(hidden_units, input_dim=X_train.shape[1], activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(hidden_units, activation='relu'))
model_6.add(Dense(1, activation='sigmoid'))

In [178]:
# Compile the models

model_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model_2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model_3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model_4.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model_5.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])
model_6.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy'])

In [179]:
# Train the model_1
history = model_1.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [180]:
# Evaluate the model_1
score_1 = model_1.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_1[0]} / Test accuracy: {score_1[1]}')

Test loss: 0.377288281917572 / Test accuracy: 0.8439909815788269


In [181]:
# Train the model_2
history = model_2.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [182]:
# Evaluate the model_2
score_2 = model_2.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_2[0]} / Test accuracy: {score_2[1]}')

Test loss: 0.3544981777667999 / Test accuracy: 0.85893315076828


In [183]:
# Train the model_3
history = model_3.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [184]:
# Evaluate the model_3
score_3 = model_3.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_3[0]} / Test accuracy: {score_3[1]}')

Test loss: 0.39037713408470154 / Test accuracy: 0.8288881778717041


In [185]:
# Train the model_4
history = model_4.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [186]:
# Evaluate the model_4
score_4 = model_4.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_4[0]} / Test accuracy: {score_4[1]}')

Test loss: 0.3482539653778076 / Test accuracy: 0.8584511280059814


In [187]:
# Train the model_5
history = model_5.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [188]:
# Evaluate the model_5
score_5 = model_5.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_5[0]} / Test accuracy: {score_5[1]}')

Test loss: 0.3329090476036072 / Test accuracy: 0.8621465563774109


In [189]:
# Train the model_6
history = model_6.fit(X_val, y_val, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [190]:
# Evaluate the model_6
score_6 = model_6.evaluate(X_val, y_val, verbose=0)
print(f'Test loss: {score_6[0]} / Test accuracy: {score_6[1]}')

Test loss: 0.33835989236831665 / Test accuracy: 0.8573265075683594


In [191]:
# Choose The Final Model
model_loss = [score_1[0], score_2[0], score_3[0], score_4[0], score_5[0], score_6[0]]
model_accuracy = [score_1[1], score_2[1], score_3[1], score_4[1], score_5[1], score_6[1]]
                  
# Find the index of the model with the minimum loss
min_loss_index = model_loss.index(min(model_loss))

# Find the index of the model with the maximum accuracy
max_accuracy_index = model_accuracy.index(max(model_accuracy))

# Determine the best model based on the minimum loss and maximum accuracy
if min_loss_index == max_accuracy_index:
    print("The best model is Model ", min_loss_index+1, " with a loss of ", model_loss[min_loss_index], " and an accuracy of ", model_accuracy[max_accuracy_index])
elif min_loss_index < max_accuracy_index:
    print("The best model based on the minimum loss is Model ", min_loss_index+1, " with a loss of ", model_loss[min_loss_index])
else:
    print("The best model based on the maximum accuracy is Model ", max_accuracy_index+1, " with an accuracy of ", model_accuracy[max_accuracy_index])

The best model is Model  5  with a loss of  0.3329090476036072  and an accuracy of  0.8621465563774109


In [192]:
# Train the Final Model

model = model_5
history = model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [193]:
# Evaluate the Final Model
score = model.evaluate(X_test, y_test, verbose=0)
print(f'Model loss: {score[0]} \nModel accuracy: {score[1]}')

Model loss: 0.28178656101226807 
Model accuracy: 0.888978123664856


In [198]:
# Use the model for prediction

# Choose a sample from dataset for prediction
sample = 568
X_new = X_val.iloc[sample].values.reshape(1, -1)

y_correct = y_val.iloc[sample]

# Predict
y_pred = model.predict(X_new)

if y_pred > 0.5:
  y_predict = 1
  print("Predict if this trip is going to be shared?   YES")
else:
  y_predict = 0
  print("Predict if this trip is going to be shared?   NO")

print("===================================================================\n")
print(f"The prediction is: {y_pred}   /   The true answer is: {y_correct}")

print("===================================================================\n")

if y_predict == y_correct:
  print("Prediction: CORRECT!")
else:
  print("Prediction: WRONG!")

Predict if this trip is going to be shared?   YES

The prediction is: [[0.99999726]]   /   The true answer is: 1

Prediction: CORRECT!
