## Libraries

In [74]:
import os.path
import numpy as np
import pandas as pd
import tensorflow as tf

## Import data

In [75]:
traindata = pd.read_csv('../data/train.csv')
testdata = pd.read_csv('../data/train.csv')

## Data Parameters

In [76]:
features1 = ['TotalBsmtSF']

# features2 --> error after 100k train steps, learn=0.001
# for a (2,20sig,10sig,1) MLP
# = 0.42
features2 = ['TotalBsmtSF', '1stFlrSF']

# features3 --> error after 100k train steps, learn=0.001
# for a (3,20sig,10sig,1) MLP
# = 0.31
features3 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea']

# features4 --> error after 100k train steps, learn=0.001
# for a (4,20sig,10sig,1) MLP
# = 0.24
features4 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual']

# features5 --> error after 100k train steps, learn=0.001
# for a (5,20sig,10sig,1) MLP
# = 0.22
features5 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea']

# features6 --> error after 100k train steps
# for a (6,20sig,10sig,1)   MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.26 (learn=0.0001)
# for a (6,40relu,30relu,10relu,1) MLP = 0.34 (learn=0.0001)
# for a (6,20sig,1) MLP = 0.26 (learn=0.001)
# for a (6,20id,10id,1)   MLP = 0.26 (learn=0.001)
features6 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea', 'GarageCars']

# set feature vector to use here!
features = features6

# Normalization factor for house sale prices
# This is important, since all the input feature values
# "live" in different intervals
# E.g. SalePrice: 50000-400000
#      TotalBsmtSF: 300-2000
#      OverallQual: 1-10
normalization_factor_per_feature = {"TotalBsmtSF": 0.001,
                                    "1stFlrSF": 0.001,
                                    "GrLivArea": 0.001,
                                    "OverallQual": 0.1,
                                    "GarageArea": 0.001,
                                    "GarageCars": 0.1,
                                    "SalePrice": 0.00001}

## Prepare data

In [77]:
def prepare_data(traindata, testdata):
    train_matrix = traindata["SalePrice"].values
    train_row_nr = len(train_matrix)
    train_matrix = train_matrix.reshape(train_row_nr,1)
    train_matrix = train_matrix * normalization_factor_per_feature["SalePrice"]
    test_matrix = testdata["Id"].values
    test_row_nr = len(test_matrix)
    test_matrix = test_matrix.reshape(test_row_nr,1)
    
    for column_name in features:
        train_column = traindata[column_name].values.reshape(train_row_nr,1)
        test_column = testdata[column_name].values.reshape(test_row_nr,1)
        train_column = train_column * normalization_factor_per_feature[column_name]
        test_column = test_column * normalization_factor_per_feature[column_name]
        train_matrix = np.hstack((train_matrix, train_column))
        test_matrix = np.hstack((test_matrix, test_column))
        missing_data_items_train = np.count_nonzero(np.isnan(train_matrix))
        missing_data_items_test = np.count_nonzero(np.isnan(test_matrix))
        print("train matrix has",missing_data_items_train, "values which are 'nan'!")
        print("test matrix has",missing_data_items_test, "values which are 'nan'!")
        nan_values_train = np.isnan(train_matrix)
        train_matrix[nan_values_train] = 0
        nan_values_test = np.isnan(test_matrix)
        test_matrix[nan_values_test] = 0
        return train_matrix, test_matrix
    


## Parameters 

In [78]:
OUTPUT_FILENAME = '../data/result_al.csv'

NR_NEURONS_HIDDEN1 = 20
NR_NEURONS_HIDDEN2 = 10
NR_NEURONS_OUTPUT  = 1

NR_TRAIN_STEPS = 100000
LEARN_RATE = 0.001

## Setup the perceptron

In [79]:
def setup(inputs):
    input_node = tf.placeholder(tf.float32, shape=(1,inputs), name="input_node")
    teacher_node = tf.placeholder(tf.float32, name="teacher_node")
    
    rnd_mat1 = tf.random_normal([inputs, NR_NEURONS_HIDDEN1])
    rnd_mat2 = tf.random_normal([NR_NEURONS_HIDDEN1, NR_NEURONS_HIDDEN2])
    rnd_mat3 = tf.random_normal([NR_NEURONS_HIDDEN2, NR_NEURONS_OUTPUT])
    
    weights = {
        'h1': tf.Variable(rnd_mat1),
        'h2': tf.Variable(rnd_mat2),
        'out': tf.Variable(rnd_mat3)
    }
    
    biases = {
        'b1': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN1])),
        'b2': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN2])),
        'out': tf.Variable(tf.random_normal([NR_NEURONS_OUTPUT]))
    }
    
    layer_1 = tf.add(tf.matmul(input_node, weights['h1']), biases['b1'])
    layer_1 = tf.nn.leaky_relu(layer_1)
    #layer_1 = tf.nn.relu(layer_1)
    #layer_1 = tf.nn.sigmoid(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.leaky_relu(layer_2)
    #layer_2 = tf.nn.relu(layer_2)
    #layer_2 = tf.nn.sigmoid(layer_2)
    
    output_node = tf.matmul(layer_2, weights['out']) + biases['out']
    output_node = tf.reshape(output_node, [])
    
    create_var_init_op = tf.global_variables_initializer()
    
    loss_node = tf.abs(teacher_node - output_node)
    optimizer_node = tf.train.GradientDescentOptimizer(LEARN_RATE).minimize(loss_node)
    
    return [input_node, teacher_node, create_var_init_op, loss_node, optimizer_node, output_node, weights['h1'],weights['h2'],weights['out']]

## Compute average error

In [80]:
def compute_avg_error(sess, model, train_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sum_losses = 0.0
    
    for sample_row_nr in range(0, nr_train_samples):
        input_matrix = train_matrix[sample_row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[sample_row_nr, 0]
        predicted_saleprice, sample_loss = sess.run([output_node, loss_node], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        sum_losses += sample_loss
        
    avg_loss = sum_losses / nr_train_samples
    return avg_loss

## Training the model

In [81]:
def train_model(model, train_matrix, nr_steps_to_train):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sess = tf.Session()
    sess.run(var_init_node)
    
    for train_step in range(1, nr_steps_to_train+1):
        rnd_row = np.random.randint(0, nr_train_samples)
        input_matrix = train_matrix[rnd_row, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[rnd_row, 0]
        actual_output, teacher_value, loss_value, _, w_h1, w_h2, w_out = sess.run([output_node, teacher_node, loss_node, optimizer_node, weights_h1, weights_h2, weights_out], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        if train_step % 1000 == 0:
            avg_error = compute_avg_error(sess, model, train_matrix)
            print("Training step ", train_step,
                    "Average error is", avg_error,
                    "actual = ", actual_output,
                    "teacher value = ", teacher_value,
                    "loss value = ", loss_value)
    return sess

## Predict sale prices for Houses

In [82]:
def predict_sale_prices(sess, model, test_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    nr_test_samples = test_matrix.shape[0]
    nr_input_features = test_matrix.shape[1] - 1
    
    prediction_matrix = np.zeros(shape=(nr_test_samples,2))
    for row_nr in range(0, nr_test_samples):
        input_matrix = test_matrix[row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        house_id = int(test_matrix[row_nr, 0])
        predicted_saleprice = sess.run(output_node, feed_dict={input_node: input_matrix})
        print("House with id ", house_id,
              "--> predicted sale price is ", predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"]))
        prediction_matrix[row_nr][0] = house_id
        prediction_matrix[row_nr][1] = predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"])
        
        prediction_dataframe = pd.DataFrame({'Id':prediction_matrix[:,0],'SalePrice':prediction_matrix[:,1]})
        prediction_dataframe = prediction_dataframe.astype({"Id": int})
    print(prediction_dataframe)
        
    prediction_dataframe.to_csv(OUTPUT_FILENAME, sep=',', index=False)

## "Main"

In [83]:
train_matrix, test_matrix = prepare_data(traindata, testdata)
nr_input_features = train_matrix.shape[1] - 1
model = setup(nr_input_features)
sess = train_model(model, train_matrix, NR_TRAIN_STEPS)
predict_sale_prices(sess, model, test_matrix)
sess.close()
tf.reset_default_graph()

train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!
Training step  1000 Average error is 0.433840749158 actual =  1.43155 teacher value =  1.2599999904632568 loss value =  0.171551
Training step  2000 Average error is 0.454055912078 actual =  1.3269 teacher value =  1.7799999713897705 loss value =  0.453104
Training step  3000 Average error is 0.431751597826 actual =  3.16849 teacher value =  3.799999952316284 loss value =  0.631508
Training step  4000 Average error is 0.431882494647 actual =  1.56655 teacher value =  1.2999999523162842 loss value =  0.266547
Training step  5000 Average error is 0.433641814083 actual =  2.71543 teacher value =  2.700000047683716 loss value =  0.0154274
Training step  6000 Average error is 0.481220747469 actual =  1.50804 teacher value =  1.5943399667739868 loss value =  0.0862961
Training step  7000 Average error is 0.428734693013 actual =  1.76741 teacher value =  1.2000000476837158 loss value =  0.567407
Training step

Training step  64000 Average error is 0.425940073884 actual =  1.69758 teacher value =  1.7300000190734863 loss value =  0.0324153
Training step  65000 Average error is 0.423886392255 actual =  2.13141 teacher value =  1.75 loss value =  0.381409
Training step  66000 Average error is 0.427311714586 actual =  1.43632 teacher value =  1.1449999809265137 loss value =  0.291324
Training step  67000 Average error is 0.425697560221 actual =  1.51029 teacher value =  1.274999976158142 loss value =  0.23529
Training step  68000 Average error is 0.425958377654 actual =  1.28586 teacher value =  1.3888700008392334 loss value =  0.103006
Training step  69000 Average error is 0.428298650662 actual =  1.41276 teacher value =  1.4199999570846558 loss value =  0.00724077
Training step  70000 Average error is 0.429076978891 actual =  2.48543 teacher value =  3.069999933242798 loss value =  0.584567
Training step  71000 Average error is 0.42836514299 actual =  1.25056 teacher value =  1.269999980926513

House with id  87 --> predicted sale price is  135546.445847
House with id  88 --> predicted sale price is  133488.440514
House with id  89 --> predicted sale price is  143495.845795
House with id  90 --> predicted sale price is  142958.99868
House with id  91 --> predicted sale price is  104857.087135
House with id  92 --> predicted sale price is  169587.850571
House with id  93 --> predicted sale price is  140297.794342
House with id  94 --> predicted sale price is  166754.317284
House with id  95 --> predicted sale price is  139083.957672
House with id  96 --> predicted sale price is  135432.14798
House with id  97 --> predicted sale price is  234708.046913
House with id  98 --> predicted sale price is  142258.71563
House with id  99 --> predicted sale price is  129379.415512
House with id  100 --> predicted sale price is  142025.184631
House with id  101 --> predicted sale price is  239444.971085
House with id  102 --> predicted sale price is  137146.449089
House with id  103 --> p

House with id  436 --> predicted sale price is  138500.380516
House with id  437 --> predicted sale price is  134517.431259
House with id  438 --> predicted sale price is  140484.476089
House with id  439 --> predicted sale price is  129078.817368
House with id  440 --> predicted sale price is  135546.445847
House with id  441 --> predicted sale price is  474150.419235
House with id  442 --> predicted sale price is  202841.377258
House with id  443 --> predicted sale price is  148403.573036
House with id  444 --> predicted sale price is  172691.297531
House with id  445 --> predicted sale price is  141208.171844
House with id  446 --> predicted sale price is  201980.161667
House with id  447 --> predicted sale price is  219420.623779
House with id  448 --> predicted sale price is  141558.361053
House with id  449 --> predicted sale price is  138056.826591
House with id  450 --> predicted sale price is  134546.041489
House with id  451 --> predicted sale price is  138827.109337
House wi

House with id  622 --> predicted sale price is  220497.202873
House with id  623 --> predicted sale price is  142725.563049
House with id  624 --> predicted sale price is  137496.66214
House with id  625 --> predicted sale price is  136918.520927
House with id  626 --> predicted sale price is  153531.002998
House with id  627 --> predicted sale price is  142678.809166
House with id  628 --> predicted sale price is  158928.251266
House with id  629 --> predicted sale price is  144126.152992
House with id  630 --> predicted sale price is  171341.919899
House with id  631 --> predicted sale price is  134174.418449
House with id  632 --> predicted sale price is  227387.309074
House with id  633 --> predicted sale price is  191989.183426
House with id  634 --> predicted sale price is  145435.04715
House with id  635 --> predicted sale price is  145435.04715
House with id  636 --> predicted sale price is  202841.377258
House with id  637 --> predicted sale price is  126416.921616
House with 

House with id  822 --> predicted sale price is  141698.408127
House with id  823 --> predicted sale price is  139620.876312
House with id  824 --> predicted sale price is  138010.191917
House with id  825 --> predicted sale price is  213391.852379
House with id  826 --> predicted sale price is  331355.476379
House with id  827 --> predicted sale price is  138150.191307
House with id  828 --> predicted sale price is  205855.846405
House with id  829 --> predicted sale price is  143495.845795
House with id  830 --> predicted sale price is  133145.427704
House with id  831 --> predicted sale price is  192958.593369
House with id  832 --> predicted sale price is  133145.427704
House with id  833 --> predicted sale price is  141815.114021
House with id  834 --> predicted sale price is  219205.379486
House with id  835 --> predicted sale price is  157309.07917
House with id  836 --> predicted sale price is  146919.369698
House with id  837 --> predicted sale price is  228463.935852
House wit

House with id  1066 --> predicted sale price is  179139.80484
House with id  1067 --> predicted sale price is  138500.380516
House with id  1068 --> predicted sale price is  138056.826591
House with id  1069 --> predicted sale price is  141838.526726
House with id  1070 --> predicted sale price is  139574.193954
House with id  1071 --> predicted sale price is  144355.630875
House with id  1072 --> predicted sale price is  136775.612831
House with id  1073 --> predicted sale price is  138757.133484
House with id  1074 --> predicted sale price is  135717.988014
House with id  1075 --> predicted sale price is  170262.527466
House with id  1076 --> predicted sale price is  138523.697853
House with id  1077 --> predicted sale price is  138430.333138
House with id  1078 --> predicted sale price is  150832.390785
House with id  1079 --> predicted sale price is  139644.265175
House with id  1080 --> predicted sale price is  142958.99868
House with id  1081 --> predicted sale price is  172691.2

House with id  1321 --> predicted sale price is  211023.378372
House with id  1322 --> predicted sale price is  104857.087135
House with id  1323 --> predicted sale price is  137356.591225
House with id  1324 --> predicted sale price is  128744.792938
House with id  1325 --> predicted sale price is  279278.326035
House with id  1326 --> predicted sale price is  138430.333138
House with id  1327 --> predicted sale price is  131544.589996
House with id  1328 --> predicted sale price is  138897.275925
House with id  1329 --> predicted sale price is  217913.484573
House with id  1330 --> predicted sale price is  141675.066948
House with id  1331 --> predicted sale price is  234708.046913
House with id  1332 --> predicted sale price is  141114.759445
House with id  1333 --> predicted sale price is  138897.275925
House with id  1334 --> predicted sale price is  138593.697548
House with id  1335 --> predicted sale price is  137706.73275
House with id  1336 --> predicted sale price is  186172.