# Neural Network using tensorflow
for this model whe used some information from "https://machinelearningmastery.com/tensorflow-tutorial-deep-learning-with-tf-keras/"

In [None]:
# import libraries
import tensorflow.keras as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import keras_tuner as kt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Activation, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam

## Load the data


In [None]:
# load data

test_df = pd.read_csv('data/clean_test_data.csv')
test_df = test_df.drop('Unnamed: 0', axis=1)

train_df = pd.read_csv('data/clean_train_data.csv')
train_df = train_df.drop('Unnamed: 0', axis=1)

display(test_df)
display(train_df)

In [None]:
# split data into input X and target Y
target_train = train_df['SalePrice']
target_test = test_df['SalePrice']

input_train = train_df.drop('SalePrice', axis=1)
input_test = test_df.drop('SalePrice', axis=1)

display(test_df)
display(train_df)

In [None]:
# convert the pandas dataframes to numpy ndarrays
X_train = input_train.to_numpy()
X_test = input_test.to_numpy()
y_train = target_train.to_numpy()
y_test = target_test.to_numpy()

# find number of features
n_features = input_train.shape[1]

## Build the model
The code that is written computes its own neural network models and picks the best out of it. We continue with that model.

In [None]:
def build_model(hp):
    """
    This function computes a/the best neural network for the given data. 
    It makes a model by tuning the layers and parameters of the layers for 
    the amount of trials given in the tuner variable.
    
    source: "https://keras.io/guides/keras_tuner/getting_started/"
    """
    
    # metrics for the layers
    m1 = tf.metrics.RootMeanSquaredError()
    m2 = 'mean_absolute_percentage_error'
    
    # compute a model
    model = Sequential()
    
    # tune number of layers
    for i in range(hp.Int("numlayers", 1, 4)):
        model.add(
            Dense(
                # Tune number of units separately.
                units=hp.Int(f"units{i}", min_value=16, max_value=256, step=16),
                activation=hp.Choice("activation", ["relu", "leaky_relu", "elu", "tanh"])),
            )
        
    if hp.Boolean("dropout"):
        model.add(Dropout(rate=0.25))
        
    # check if batch normalization is benneficial
    if hp.Boolean("bn_after_act"):
        model.add(BatchNormalization())
        
    # output layer
    model.add(Dense(1))
    
    model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])
    return model

tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=100,
    executions_per_trial=2,
    #overwrite=True,
    #directory="data",
    #project_name="Milestone_3"
)

# train the models with the given data
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))
best_model = tuner.get_best_models()[0]

In [None]:
# fit the best model
best_model.fit(X_train, y_train,
          batch_size=32, epochs=200,
          validation_data=(X_test, y_test))

## Visualizing the results
Now that the model is actually build, we can visualize the outcome.

In [None]:
best_model.summary()

In [None]:

"""
import visualkeras
#from sklearn.externals.six import StringIO  
from IPython.display import Image 
from sklearn.tree import export_graphviz
import pydot

tf.utils.model_to_dot(
    best_model,
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    subgraph=False,
    layer_range=None,
    show_layer_activations=False,
)

tf.utils.plot_model(
    best_model,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=False,
)
"""

In [None]:
tuner.search_space_summary()

In [None]:
tuner.results_summary()

In [None]:
loss_df = pd.DataFrame(best_model.history.history)

loss_df.plot(figsize=(12,8), )
plt.title("Model information")
plt.xlabel("epochs")
plt.yscale('log')
plt.show()

In [None]:
from sklearn import metrics

y_pred = best_model.predict(X_test)
var_score = metrics.explained_variance_score(y_test,y_pred)

# compute the accuracy of the model 
print('Variance score:', var_score)
print('\nRMSE:',loss_df['root_mean_squared_error'].tail(1))
print('\nval RMSE:',loss_df['val_root_mean_squared_error'].tail(1))
print('\nTrain Loss:',loss_df['loss'].tail(1))
print('\nTest Loss:',loss_df['val_loss'].tail(1))
print('\nMAPE:',loss_df['mean_absolute_percentage_error'].tail(1))
print('\nval MAPE:',loss_df['val_mean_absolute_percentage_error'].tail(1))