# Neural Network using tensorflow
for this model whe used some information from "https://machinelearningmastery.com/tensorflow-tutorial-deep-learning-with-tf-keras/"

In [None]:
# import libraries
import tensorflow.keras as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import keras_tuner as kt

from sklearn import metrics
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Activation, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

## Load the data


In [None]:
# specify wich preprocessed file you want to run (preprocess_data or featuretest_data)
preprocessed_file = 'data/preprocess_data.csv'

In [None]:
# clean is with all features and preprocess is filtered on features
clean_test_df = pd.read_csv('data/clean_test_data.csv')
clean_train_df = pd.read_csv('data/clean_train_data.csv')
preprocess_test_df = pd.read_csv(preprocessed_file)
preprocess_train_df = pd.read_csv(preprocessed_file)

display(clean_test_df)
display(clean_train_df)

In [None]:
def prepare_data(train_df, test_df):
    
    # split data into input X and target Y
    target_train = train_df['SalePrice']
    target_test = test_df['SalePrice']

    input_train = train_df.drop('SalePrice', axis=1)
    input_test = test_df.drop('SalePrice', axis=1)
    
    # convert the pandas dataframes to numpy ndarrays
    X_train = input_train.to_numpy()
    X_test = input_test.to_numpy()
    y_train = target_train.to_numpy()
    y_test = target_test.to_numpy()

    # find number of features
    n_features = input_train.shape[1]
    print("n_features:",  n_features)
    print("X_train size:", X_train.shape)
    print("X_test size:", X_test.shape)
    print("y_train size:", y_train.shape)
    print("y_test size:", y_test.shape)

    return X_train, X_test, y_train, y_test, n_features

## Build the model
The code that is written computes its own neural network models and picks the best out of it. We continue with that model.

In [None]:
def build_model(hp):
    """
    This function computes a/the best neural network for the given data. 
    It makes a model by tuning the layers and parameters of the layers for 
    the amount of trials given in the tuner variable.
    
    source: "https://keras.io/guides/keras_tuner/getting_started/"
    """
    
    # metrics for the layers
    m1 = tf.metrics.RootMeanSquaredError()
    m2 = 'mean_absolute_percentage_error'
    
    # compute a model
    model = Sequential()
    
    # tune number of layers
    for i in range(hp.Int("numlayers", 1, 4)):
        model.add(
            Dense(
                # Tune number of units separately.
                units=hp.Int(f"units{i}", min_value=16, max_value=256, step=16),
                activation=hp.Choice("activation", ["relu", "leaky_relu", "elu", "tanh"])),
            )
        
    if hp.Boolean("dropout"):
        model.add(Dropout(rate=0.25))
        
    # check if batch normalization is benneficial
    if hp.Boolean("bn_after_act"):
        model.add(BatchNormalization())
        
    # output layer
    model.add(Dense(1))
    
    model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])
    return model

In [None]:
tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=100,
    executions_per_trial=2,
    #overwrite=True,
    #directory="data",
    #project_name="Milestone_3"
)

In [None]:
def visualize_results(X_test, y_test):

    loss_df = pd.DataFrame(best_model.history.history)

    loss_df.plot(figsize=(12,8), )
    plt.title("Model information")
    plt.xlabel("epochs")
    plt.yscale('log')
    plt.show()

    y_pred = best_model.predict(X_test)
    var_score = metrics.explained_variance_score(y_test,y_pred)

    # compute the accuracy of the model 
    print('Variance score:', var_score)
    print('\nRMSE:',loss_df['root_mean_squared_error'].tail(1))
    print('\nval RMSE:',loss_df['val_root_mean_squared_error'].tail(1))
    print('\nTrain Loss:',loss_df['loss'].tail(1))
    print('\nTest Loss:',loss_df['val_loss'].tail(1))
    print('\nMAPE:',loss_df['mean_absolute_percentage_error'].tail(1))
    print('\nval MAPE:',loss_df['val_mean_absolute_percentage_error'].tail(1))
    
    return

In [None]:
# transform dataframes to numpy arrays
clean_X_train, clean_X_test, clean_y_train, clean_y_test, n_features = prepare_data(clean_train_df, clean_test_df)
preprocess_X_train, preprocess_X_test, preprocess_y_train, preprocess_y_test, n_features = prepare_data(preprocess_train_df, preprocess_test_df)

In [None]:
# train the models with all data and get best model
tuner.search(clean_X_train, clean_y_train, epochs=5, validation_data=(clean_X_test, clean_y_test))
best_model = tuner.get_best_models()[0]

In [None]:
# fit the best model on all data
print(clean_X_train.shape)
print(clean_y_train.shape)
print(clean_X_test.shape)
print(clean_y_test.shape)

best_model.fit(clean_X_train, clean_y_train,
          batch_size=32, epochs=200,
          validation_data=(clean_X_test, clean_y_test))

In [None]:
# tuner results:
best_model.summary()
tuner.search_space_summary()
tuner.results_summary()

In [None]:
visualize_results(clean_X_test, clean_y_test)

In [None]:
# fit the best model on top features
best_model.fit(preprocess_X_train_features, preprocess_y_train_features,
          batch_size=32, epochs=200,
          validation_data=(preprocess_X_test, preprocess_y_test))

In [None]:
visualize_results(preprocess_X_test, preprocess_y_test)

## Visualizing the results
Now that the model is actually build, we can visualize the outcome.

In [None]:
"""
import visualkeras
#from sklearn.externals.six import StringIO  
from IPython.display import Image 
from sklearn.tree import export_graphviz
import pydot

tf.utils.model_to_dot(
    best_model,
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    subgraph=False,
    layer_range=None,
    show_layer_activations=False,
)

tf.utils.plot_model(
    best_model,
    to_file="model.png",
    show_shapes=False,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
    show_layer_activations=False,
)
"""