# Here we'll do a baseline test on 6 Neural Network model variations on the cleaned data

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2
import time

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from pathlib import Path
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
# Download raw data from postgres for stage 1 ETL

conn_string = 'postgres://whnpmxwsiccrtg:53c453893549d2b1e6a4ff92e626a2a08ebcaff66678e50d33e3742f66e3e4f4@ec2-52-4-171-132.compute-1.amazonaws.com/d2ajro4cjr10lb'

db = create_engine(conn_string)
conn = db.connect()

start_time = time.time()
df = pd.read_sql_query('select * from "merged_no_cal"',con=conn)
print("PostGres Download Duration: {} seconds".format(time.time() - start_time))
conn.close ()

PostGres Download Duration: 1.9971599578857422 seconds


In [3]:
print(df.shape)
df.head()

(5242, 257)


Unnamed: 0,host_is_superhost,host_listings_count,host_identity_verified,accommodates,bathrooms,bedrooms,price,security_deposit,cleaning_fee,guests_included,...,bed_type_Futon,bed_type_Pull-out Sofa,bed_type_Real Bed,cancellation_policy_flexible,cancellation_policy_moderate,cancellation_policy_strict,cancellation_policy_strict_14_with_grace_period,cancellation_policy_super_strict_30,cancellation_policy_super_strict_60,baths_logs
0,1,0.0,1,2,1.0,2,40.0,0.0,0.0,1,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,1,0.693147,0,2,1.0,1,65.0,0.0,25.0,1,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,1,0.0,1,2,1.0,1,93.0,0.0,0.0,1,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0,0.0,0,2,1.0,1,105.0,200.0,0.0,1,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,1,0.693147,1,2,1.0,1,55.0,0.0,0.0,1,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Split our preprocessed data into our features and target arrays
y = df["price"].values
X = df.drop("price",1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [5]:
df.columns

Index(['host_is_superhost', 'host_listings_count', 'host_identity_verified',
       'accommodates', 'bathrooms', 'bedrooms', 'price', 'security_deposit',
       'cleaning_fee', 'guests_included',
       ...
       'bed_type_Futon', 'bed_type_Pull-out Sofa', 'bed_type_Real Bed',
       'cancellation_policy_flexible', 'cancellation_policy_moderate',
       'cancellation_policy_strict',
       'cancellation_policy_strict_14_with_grace_period',
       'cancellation_policy_super_strict_30',
       'cancellation_policy_super_strict_60', 'baths_logs'],
      dtype='object', length=257)

In [6]:
# scaler = RobustScaler()

from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()

# Fit the scaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


# Test 1: Use one input layer and one hidden layer

In [7]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 80)                20560     
_________________________________________________________________
dense_1 (Dense)              (None, 30)                2430      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 31        
Total params: 23,021
Trainable params: 23,021
Non-trainable params: 0
_________________________________________________________________


In [8]:
# Compile the model
nn.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

In [9]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [10]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}")
print(f"Mean Absolute Error: {model_accuracy}")

41/41 - 0s - loss: 62.9081 - mean_absolute_error: 62.9081
Loss: 62.908138275146484
Mean Absolute Error: 62.908138275146484


# Test 2.  Add more neurons to each layer

In [11]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 50

nn2 = tf.keras.models.Sequential()

# First hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn2.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="relu"))

# Output layer
nn2.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn2.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 100)               25700     
_________________________________________________________________
dense_4 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 51        
Total params: 30,801
Trainable params: 30,801
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Compile the model
nn2.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

# Train the model
fit_model2 = nn2.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [13]:
# Evaluate the model using the test data
model_loss2, model_accuracy2 = nn2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss2}")
print(f"Mean Absolute Error: {model_accuracy2}")

41/41 - 0s - loss: 62.9004 - mean_absolute_error: 62.9004
Loss: 62.9003791809082
Mean Absolute Error: 62.9003791809082


# Test 3:  Add an additional layer with 20 neurons

In [14]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 20

nn3 = tf.keras.models.Sequential()

# First hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="relu"))

# Third hidden layer
nn3.add(tf.keras.layers.Dense(units=hidden_nodes_layer3,kernel_initializer='normal', activation="relu"))

# Output layer
nn3.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               25700     
_________________________________________________________________
dense_7 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_8 (Dense)              (None, 20)                1020      
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 21        
Total params: 31,791
Trainable params: 31,791
Non-trainable params: 0
_________________________________________________________________


In [15]:
# Compile the model
nn3.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

# Train the model
fit_model3 = nn3.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [16]:
# Evaluate the model using the test data
model_loss3, model_accuracy3 = nn3.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss3}")
print(f"Mean Absolute Error: {model_accuracy3}")

41/41 - 0s - loss: 62.9132 - mean_absolute_error: 62.9132
Loss: 62.913211822509766
Mean Absolute Error: 62.913211822509766


# Test 4:  Add 50 neurons to the first layer and a 4th layer with 10 neurons

In [17]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 150
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 20
hidden_nodes_layer4 = 10

nn4 = tf.keras.models.Sequential()

# First hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="relu"))

# Third hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer3,kernel_initializer='normal', activation="relu"))

# Fourth hidden layer
nn4.add(tf.keras.layers.Dense(units=hidden_nodes_layer4,kernel_initializer='normal', activation="relu"))

# Output layer
nn4.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn4.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 150)               38550     
_________________________________________________________________
dense_11 (Dense)             (None, 50)                7550      
_________________________________________________________________
dense_12 (Dense)             (None, 20)                1020      
_________________________________________________________________
dense_13 (Dense)             (None, 10)                210       
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 11        
Total params: 47,341
Trainable params: 47,341
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Compile the model
nn4.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

# Train the model
fit_model4 = nn4.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
# Evaluate the model using the test data
model_loss4, model_accuracy4 = nn4.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss4}")
print(f"Mean Absolute Error: {model_accuracy4}")

41/41 - 0s - loss: 65.0896 - mean_absolute_error: 65.0896
Loss: 65.08956909179688
Mean Absolute Error: 65.08956909179688


# Test 5:  Add 50 more neurons to the first layer and add a 5th layer with just 5 neurons

In [20]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 200
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 20
hidden_nodes_layer4 = 10
hidden_nodes_layer5 = 5

nn5 = tf.keras.models.Sequential()

# First hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="relu"))

# Third hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer3,kernel_initializer='normal', activation="relu"))

# Fourth hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer4,kernel_initializer='normal', activation="relu"))

# Fifth hidden layer
nn5.add(tf.keras.layers.Dense(units=hidden_nodes_layer5,kernel_initializer='normal', activation="relu"))


# Output layer
nn5.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn5.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 200)               51400     
_________________________________________________________________
dense_16 (Dense)             (None, 50)                10050     
_________________________________________________________________
dense_17 (Dense)             (None, 20)                1020      
_________________________________________________________________
dense_18 (Dense)             (None, 10)                210       
_________________________________________________________________
dense_19 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_20 (Dense)             (None, 1)                 6         
Total params: 62,741
Trainable params: 62,741
Non-trainable params: 0
__________________________________________________

In [21]:
# Compile the model
nn5.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

# Train the model
fit_model5 = nn5.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [22]:
# Evaluate the model using the test data
model_loss5, model_accuracy5 = nn5.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss5}")
print(f"Mean Absolute Error: {model_accuracy5}")

41/41 - 0s - loss: 63.0521 - mean_absolute_error: 63.0521
Loss: 63.05207824707031
Mean Absolute Error: 63.05207824707031


# Test 6:  Add 150 neurons to first layer and change second activation function to tanh

In [23]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_in_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 350
hidden_nodes_layer2 = 50
hidden_nodes_layer3 = 20
hidden_nodes_layer4 = 10
hidden_nodes_layer5 = 5

nn6 = tf.keras.models.Sequential()

# First hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer1,kernel_initializer='normal', input_dim=number_in_features, activation="relu"))

# Second hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer2,kernel_initializer='normal', activation="tanh"))

# Third hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer3,kernel_initializer='normal', activation="relu"))

# Fourth hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer4,kernel_initializer='normal', activation="relu"))

# Fifth hidden layer
nn6.add(tf.keras.layers.Dense(units=hidden_nodes_layer5,kernel_initializer='normal', activation="relu"))

# Output layer
nn6.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn6.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             (None, 350)               89950     
_________________________________________________________________
dense_22 (Dense)             (None, 50)                17550     
_________________________________________________________________
dense_23 (Dense)             (None, 20)                1020      
_________________________________________________________________
dense_24 (Dense)             (None, 10)                210       
_________________________________________________________________
dense_25 (Dense)             (None, 5)                 55        
_________________________________________________________________
dense_26 (Dense)             (None, 1)                 6         
Total params: 108,791
Trainable params: 108,791
Non-trainable params: 0
________________________________________________

In [24]:
# Compile the model
nn6.compile(loss="mean_absolute_error", optimizer="adam", metrics=['mean_absolute_error'])

# Train the model
fit_model6 = nn6.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [26]:
# Evaluate the model using the test data
model_loss6, model_accuracy6 = nn6.evaluate(X_train_scaled,y_train,verbose=2)
print(f"Loss: {model_loss6}")
print(f"Mean Absolute Error: {model_accuracy6}")

123/123 - 0s - loss: 12.6459 - mean_absolute_error: 12.6459
Loss: 12.645925521850586
Mean Absolute Error: 12.645925521850586


In [25]:
# Evaluate the model using the test data
model_loss6, model_accuracy6 = nn6.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss6}")
print(f"Mean Absolute Error: {model_accuracy6}")

41/41 - 0s - loss: 59.2770 - mean_absolute_error: 59.2770
Loss: 59.27701187133789
Mean Absolute Error: 59.27701187133789


# After lots of tests, our best Mean Squared Error Result is off, on average, by about $59.27 (according to the MAE)