# Neural Network: version 2

In [None]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import tensorflow.keras as tf
from tf.models import Sequential
from tf.layers import Dense, Activation
from tf.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
# load keggle competition data
test_df = pd.read_csv('data/keggle_test_data.csv')
train_df = pd.read_csv('data/keggle_train_data.csv')

display(test_df)

In [None]:
# split data into input X and target Y
target_train = train_df['SalePrice']
target_test = test_df['SalePrice']

input_train = train_df.drop('SalePrice', axis=1)
input_test = test_df.drop('SalePrice', axis=1)

display(input_train)
display(input_test)

In [None]:
# convert the pandas dataframes to numpy ndarrays
X_train = input_train.to_numpy()
X_test = input_test.to_numpy()
y_train = target_train.to_numpy()
y_test = target_test.to_numpy()

# find number of features
n_features = input_train.shape[1]

In [None]:
# define metrics
# Root Mean Square Error function from Keras Metrics. 
# source: https://www.tensorflow.org/api_docs/python/tf/keras/metrics/RootMeanSquaredError
m1 = tf.metrics.RootMeanSquaredError()
#m2 = 'mean_absolute_percentage_error'

# create layers
model = tf.Sequential([
    Dense(160),
    Dense(224, activation='tanh'),
    Dense(112, activation='tanh'),
    Dense(240, activation='tanh'),
    Dense(1, input_shape=(n_features,)),
])

# compile model
# use Adam as optimizer because of fast and accurate converging 
# source: https://medium.com/mdr-inc/from-sgd-to-adam-c9fce513c4bb 
model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1])

In [None]:
# fit the model
# choose batch size of 32 after trying 32, 64 and 128
# source: https://datascience.stackexchange.com/questions/18414/are-there-any-rules-for-choosing-the-size-of-a-mini-batch
model.fit(X_train, y_train,
          batch_size=32, epochs=200,
          validation_data=(X_test, y_test))

In [None]:
model.summary()

In [None]:
loss_df = pd.DataFrame(model.history.history)

# plot the train and test rmse
loss_df['val_root_mean_squared_error'].plot(figsize=(12,8))
loss_df['root_mean_squared_error'].plot(figsize=(12,8))
plt.title("Model information")
plt.yscale('log')
plt.xlabel("epochs")
plt.legend()
plt.show()

In [None]:
y_pred = model.predict(X_test)
var_score = metrics.explained_variance_score(y_test,y_pred)

# print the metrices scores of the model
print('Variance score:', var_score)
print('\nRMSE:',loss_df['root_mean_squared_error'].tail(1))
print('\nval RMSE:',loss_df['val_root_mean_squared_error'].tail(1))
print('\nTrain Loss:',loss_df['loss'].tail(1))
print('\nTest Loss:',loss_df['val_loss'].tail(1))
print('\nMAPE:',loss_df['mean_absolute_percentage_error'].tail(1))
print('\nval MAPE:',loss_df['val_mean_absolute_percentage_error'].tail(1))