# Neural Network using tensorflow
for this model whe used some information from "https://machinelearningmastery.com/tensorflow-tutorial-deep-learning-with-tf-keras/"

In [None]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn import metrics
from sklearn.model_selection import train_test_split

import tensorflow.keras as tf
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, BatchNormalization, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import InputLayer

In [None]:
# set on True if keras tuner needs to search for new best model
search_best_model = False

## Load the data


In [None]:
# clean is with all features and preprocess is filtered on features
clean_test_df = pd.read_csv('data/clean_test_data.csv')
clean_train_df = pd.read_csv('data/clean_train_data.csv')
preprocess_test_df = pd.read_csv('data/preprocess_test_data.csv')
preprocess_train_df = pd.read_csv('data/preprocess_train_data.csv')

# load best features
best_features_df = pd.read_csv('data/bestfeatures_data.csv')

best_features = []

for feature in best_features_df:
    best_features.append(feature) 

# create new dataframes for test and train with only best features
best_features_test_df = preprocess_test_df.loc[:, best_features]
best_features_train_df = preprocess_train_df.loc[:, best_features]

In [None]:
def prepare_data(train_df, test_df):
    
    # split data into input X and target Y
    target_train = train_df['SalePrice']
    target_test = test_df['SalePrice']

    input_train = train_df.drop('SalePrice', axis=1)
    input_test = test_df.drop('SalePrice', axis=1)
    
    # convert the pandas dataframes to numpy ndarrays
    X_train = input_train.to_numpy()
    X_test = input_test.to_numpy()
    y_train = target_train.to_numpy()
    y_test = target_test.to_numpy()

    # find number of features
    n_features = input_train.shape[1]

    return X_train, X_test, y_train, y_test, n_features

## Building the Neural Network

In [None]:
def build_model(hp):
    """
    This function computes a/the best neural network for the given data. 
    It makes a model by tuning the layers and parameters of the layers for 
    the amount of trials given in the tuner variable.
    
    source: "https://keras.io/guides/keras_tuner/getting_started/"
    """
    
    # metrics for the layers
    m1 = tf.metrics.RootMeanSquaredError()
    m2 = 'mean_absolute_percentage_error'
    
    # compute a model
    model = Sequential()
    
    # tune number of layers
    for i in range(hp.Int("numlayers", 1, 4)):
        model.add(
            Dense(
                # Tune number of units separately.
                units=hp.Int(f"units{i}", min_value=16, max_value=256, step=16),
                activation=hp.Choice("activation", ["relu", "leaky_relu", "elu", "tanh"])),
            )
        
    if hp.Boolean("dropout"):
        model.add(Dropout(rate=0.25))
        
    # check if batch normalization is benneficial
    if hp.Boolean("bn_after_act"):
        model.add(BatchNormalization())
        
    # output layer
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])
    
    return model

In [None]:
tuner = kt.RandomSearch(
    hypermodel=build_model,
    objective="val_loss",
    max_trials=300,
    executions_per_trial=3,
    overwrite=True,
    directory="keras_tuner",
    project_name="tuner_trials"
)

In [None]:
def build_NN(n_features):
    """This function recreates best model for the neural network of Milestone 5"""
    
    # metrics for the layers
    m1 = tf.metrics.RootMeanSquaredError()
    m2 = 'mean_absolute_percentage_error'
    
    # build model and add layers
    model = tf.Sequential([
        Dense(160),
        Dense(224, activation='tanh'),
        Dense(112, activation='tanh'),
        Dense(240, activation='tanh'),
        Dense(1, input_shape=(n_features,)),
        ])
    
    # compile model
    model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])
    
    return model

In [None]:
def visualize_results(X_test, y_test, model):
    
    loss_df = pd.DataFrame(model.history.history)
    loss_df['root_mean_squared_error'].plot(figsize=(12,8))
    loss_df['val_root_mean_squared_error'].plot(figsize=(12,8))
    
    plt.title("Model information")
    plt.xlabel("epochs")
    plt.yscale('log')
    plt.legend()
    plt.show()

    y_pred = model.predict(X_test)
    var_score = metrics.explained_variance_score(y_test,y_pred)

    # compute the accuracy of the model 
    print('Variance score:', var_score)
    print('\nRMSE:',loss_df['root_mean_squared_error'].tail(1))
    print('\nval RMSE:',loss_df['val_root_mean_squared_error'].tail(1))
    
    return

In [None]:
# transform dataframes to numpy arrays
clean_X_train, clean_X_test, clean_y_train, clean_y_test, clean_n_features = prepare_data(clean_train_df, clean_test_df)
preprocess_X_train, preprocess_X_test, preprocess_y_train, preprocess_y_test, preprocess_n_features = prepare_data(preprocess_train_df, preprocess_test_df)
best_features_X_train, best_features_X_test, best_features_y_train, best_features_y_test, best_features_n_features = prepare_data(best_features_train_df, best_features_test_df)

#### Build model for clean data
Model is fit and run with complete dataset.

In [None]:
# search for new best model
if search_best_model == True:
    
    # train the models with all data and get best model
    tuner.search(clean_X_train, clean_y_train, epochs=10, validation_data=(clean_X_test, clean_y_test))
    best_model = tuner.get_best_models()[0]

# reuse last best model
else:
    best_model = build_NN(clean_n_features)

In [None]:
# fit the best model on all data
best_model.fit(clean_X_train, clean_y_train,
          batch_size=32, epochs=300,
          validation_data=(clean_X_test, clean_y_test))

In [None]:
tuner.search_space_summary()
tuner.results_summary()

In [None]:
best_model.summary()

In [None]:
visualize_results(clean_X_test, clean_y_test, best_model)

#### Build model for preprocessed data
Model is build with preprocessed data that is filtered on 'weak' features.

In [None]:
m1 = tf.metrics.RootMeanSquaredError()
m2 = 'mean_absolute_percentage_error'

# create model with the same configuration as best model
model = tf.Sequential()

# set correct input layer nodes
nodes_first_layer = best_model.layers[0].output_shape[1]

# but change the input dimensions of the input layer to that of preprocessed data
model.add(Dense(nodes_first_layer, input_shape=(preprocess_n_features,)))

# add all the other layers of best model
for layer in range(1, len(best_model.layers)):
    model.add(best_model.layers[layer])

model.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])

# fit model with preprocessed data (only including a selection of features)
model.fit(preprocess_X_train, preprocess_y_train,
          batch_size=32, epochs=300,
          validation_data=(preprocess_X_test, preprocess_y_test))

In [None]:
model.summary()

In [None]:
visualize_results(preprocess_X_test, preprocess_y_test, model)

#### Build model for best features data
Model is build with best_features data that ony contains the top 10 features.

In [None]:
# create model with the same configuration as best model
model2 = tf.Sequential()

# set correct input layer nodes
nodes_first_layer = best_model.layers[0].output_shape[1]

# but change the input dimensions of the input layer to that of preprocessed data
model2.add(Dense(nodes_first_layer, input_shape=(best_features_n_features,)))

# add all the other layers of best model
for layer in range(1, len(best_model.layers)):
    model2.add(best_model.layers[layer])

model2.compile(optimizer='Adam', loss=tf.metrics.mean_squared_error, metrics=[m1, m2])

# fit model with preprocessed data (only including a selection of features)
model2.fit(best_features_X_train, best_features_y_train,
          batch_size=32, epochs=300,
          validation_data=(best_features_X_test, best_features_y_test))

In [None]:
model2.summary()

In [None]:
visualize_results(best_features_X_test, best_features_y_test, model2)