## Libraries

In [3]:
import os.path
import numpy as np
import pandas as pd
import tensorflow as tf

ModuleNotFoundError: No module named 'tensorflow'

## Import data

In [19]:
traindata = pd.read_csv('../data/train.csv')
testdata = pd.read_csv('../data/train.csv')

## Data Parameters

In [20]:
features1 = ['TotalBsmtSF']

# features2 --> error after 100k train steps, learn=0.001
# for a (2,20sig,10sig,1) MLP
# = 0.42
features2 = ['TotalBsmtSF', '1stFlrSF']

# features3 --> error after 100k train steps, learn=0.001
# for a (3,20sig,10sig,1) MLP
# = 0.31
features3 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea']

# features4 --> error after 100k train steps, learn=0.001
# for a (4,20sig,10sig,1) MLP
# = 0.24
features4 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual']

# features5 --> error after 100k train steps, learn=0.001
# for a (5,20sig,10sig,1) MLP
# = 0.22
features5 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea']

# features6 --> error after 100k train steps
# for a (6,20sig,10sig,1)   MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.26 (learn=0.0001)
# for a (6,40relu,30relu,10relu,1) MLP = 0.34 (learn=0.0001)
# for a (6,20sig,1) MLP = 0.26 (learn=0.001)
# for a (6,20id,10id,1)   MLP = 0.26 (learn=0.001)
features6 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea', 'GarageCars']

# set feature vector to use here!
features = features6

# Normalization factor for house sale prices
# This is important, since all the input feature values
# "live" in different intervals
# E.g. SalePrice: 50000-400000
#      TotalBsmtSF: 300-2000
#      OverallQual: 1-10
normalization_factor_per_feature = {"TotalBsmtSF": 0.001,
                                    "1stFlrSF": 0.001,
                                    "GrLivArea": 0.001,
                                    "OverallQual": 0.1,
                                    "GarageArea": 0.001,
                                    "GarageCars": 0.1,
                                    "SalePrice": 0.00001}

## Prepare data

In [21]:
def prepare_data(traindata, testdata):
    train_matrix = traindata["SalePrice"].values
    train_row_nr = len(train_matrix)
    train_matrix = train_matrix.reshape(train_row_nr,1)
    train_matrix = train_matrix * normalization_factor_per_feature["SalePrice"]
    test_matrix = testdata["Id"].values
    test_row_nr = len(test_matrix)
    test_matrix = test_matrix.reshape(test_row_nr,1)
    
    for column_name in features:
        train_column = traindata[column_name].values.reshape(train_row_nr,1)
        test_column = testdata[column_name].values.reshape(test_row_nr,1)
        train_column = train_column * normalization_factor_per_feature[column_name]
        test_column = test_column * normalization_factor_per_feature[column_name]
        train_matrix = np.hstack((train_matrix, train_column))
        test_matrix = np.hstack((test_matrix, test_column))
        missing_data_items_train = np.count_nonzero(np.isnan(train_matrix))
        missing_data_items_test = np.count_nonzero(np.isnan(test_matrix))
        print("train matrix has",missing_data_items_train, "values which are 'nan'!")
        print("test matrix has",missing_data_items_test, "values which are 'nan'!")
        nan_values_train = np.isnan(train_matrix)
        train_matrix[nan_values_train] = 0
        nan_values_test = np.isnan(test_matrix)
        test_matrix[nan_values_test] = 0
        return train_matrix, test_matrix
    
train_matrix, test_matrix = prepare_data(traindata, testdata)

train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!


## Parameters 

In [15]:
OUTPUT_FILENAME = '../data/result_al.csv'

NR_NEURONS_HIDDEN1 = 20
NR_NEURONS_HIDDEN2 = 10
NR_NEURONS_OUTPUT  = 1

NR_TRAIN_STEPS = 100000
LEARN_RATE = 0.001

## Setup the perceptron

In [22]:
def setup(inputs):
    input_node = tf.placeholder(tf.float32, shape=(1,inputs), name="input_node")
    teacher_node = tf.placeholder(tf.float32, name="teacher_node")
    
    rnd_mat1 = tf.random_normal([nr_inputs, NR_NEURONS_HIDDEN1])
    rnd_mat2 = tf.random_normal([NR_NEURONS_HIDDEN1, NR_NEURONS_HIDDEN2])
    rnd_mat3 = tf.random_normal([NR_NEURONS_HIDDEN2, NR_NEURONS_OUTPUT])
    
    weights = {
        'h1': tf.Variable(rnd_mat1),
        'h2': tf.Variable(rnd_mat2),
        'out': tf.Variable(rnd_mat3)
    }
    
    biases = {
        'b1': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN1])),
        'b2': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN2])),
        'out': tf.Variable(tf.random_normal([NR_NEURONS_OUTPUT]))
    }
    
    layer_1 = tf.add(tf.matmul(input_node, weights['h1']), biases['b1'])
    layer_1 = tf.nn.leaky_relu(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.leaky_relu(layer_2)
    
    output_node = tf.matmul(layer_2, weights['out']) + biases['out']
    output_node = tf.reshape(output_node, [])
    
    create_var_init_op = tf.global_variables_initializer()
    
    loss_node = tf.abs(teacher_node - output_node)
    optimizer_node = tf.train.GradientDescentOptimizer(LEARN_RATE).minimize(loss_node)
    
    return [input_node, teacher_node, create_var_init_op, loss_node, optimizer_node, output_node, weights['h1'],weights['h2'],weights['out']]

## Compute average error

In [None]:
def compute_avg_error(sess, model, train_matrix):
    