# Neural Network: version 1
For this model whe used some information from "https://machinelearningmastery.com/tensorflow-tutorial-deep-learning-with-tf-keras/"

In [None]:
# import libraries
import tensorflow.keras as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [None]:
# import the data
test_df = pd.read_csv('data/clean_test_data.csv')
train_df = pd.read_csv('data/clean_train_data.csv')

display(test_df)
display(train_df)

In [None]:
# split data into input X and target Y
target_train = train_df['SalePrice']
target_test = test_df['SalePrice']
input_train = train_df.drop('SalePrice', axis=1)
input_test = test_df.drop('SalePrice', axis=1)

display(test_df)
display(train_df)

In [None]:
# convert the pandas dataframes to numpy ndarrays
X_train = input_train.to_numpy()
X_test = input_test.to_numpy()
y_train = target_train.to_numpy()
y_test = target_test.to_numpy()

# find number of features
n_features = input_train.shape[1]

In [None]:
# define metrics
# Root Mean Square Error function from Keras Metrics. 
# source: https://www.tensorflow.org/api_docs/python/tf/keras/metrics/RootMeanSquaredError
m1 = tf.metrics.RootMeanSquaredError()
m2 = 'mean_absolute_percentage_error'

# create layers
model = tf.Sequential([
    Dense(1, input_shape=(n_features,)),
])

# compile model
# use Adam as optimizer because of fast and accurate converging 
# source: https://medium.com/mdr-inc/from-sgd-to-adam-c9fce513c4bb 
model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])

In [None]:
# fit the model
# choose batch size of 32 after trying 32, 64 and 128
# source: https://datascience.stackexchange.com/questions/18414/are-there-any-rules-for-choosing-the-size-of-a-mini-batch
model.fit(X_train, y_train,
          batch_size=32, epochs=200,
          validation_data=(X_test, y_test))

In [None]:
model.summary()

In [None]:
# plot the loss during converging of the model
loss_df = pd.DataFrame(model.history.history)

loss_df.plot(figsize=(12,8))
plt.title("Model information")
plt.yscale('log')
plt.xlabel("epochs")
plt.show()

In [None]:
# compute the accuracy of the model 
y_pred = model.predict(X_test)
var_score = metrics.explained_variance_score(y_test,y_pred)

print('Variance score:', var_score)
print('\nRMSE:',loss_df['root_mean_squared_error'].tail(1))
print('\nLoss:',loss_df['loss'].tail(1))
print('\nMAPE:',loss_df['mean_absolute_percentage_error'].tail(1))
print('\nval MAPE:',loss_df['val_mean_absolute_percentage_error'].tail(1))