## Libraries

In [32]:
import numpy as np
import pandas as pd
import tensorflow as tf

## Import data

In [33]:
traindata = pd.read_csv('../data/train.csv')
testdata = pd.read_csv('../data/test.csv')

## Data Parameters

In [34]:
features1 = ['TotalBsmtSF']

# features2 --> error after 100k train steps, learn=0.001
# for a (2,20sig,10sig,1) MLP
# = 0.42
features2 = ['TotalBsmtSF', '1stFlrSF']

# features3 --> error after 100k train steps, learn=0.001
# for a (3,20sig,10sig,1) MLP
# = 0.31
features3 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea']

# features4 --> error after 100k train steps, learn=0.001
# for a (4,20sig,10sig,1) MLP
# = 0.24
features4 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual']

# features5 --> error after 100k train steps, learn=0.001
# for a (5,20sig,10sig,1) MLP
# = 0.22
features5 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea']

# features6 --> error after 100k train steps
# for a (6,20sig,10sig,1)   MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.26 (learn=0.0001)
# for a (6,40relu,30relu,10relu,1) MLP = 0.34 (learn=0.0001)
# for a (6,20sig,1) MLP = 0.26 (learn=0.001)
# for a (6,20id,10id,1)   MLP = 0.26 (learn=0.001)
features6 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea', 'GarageCars']

# set feature vector to use here!
features = features6

# Normalization factor for house sale prices
# This is important, since all the input feature values
# "live" in different intervals
# E.g. SalePrice: 50000-400000
#      TotalBsmtSF: 300-2000
#      OverallQual: 1-10
normalization_factor_per_feature = {"TotalBsmtSF": 0.001,
                                    "1stFlrSF": 0.001,
                                    "GrLivArea": 0.001,
                                    "OverallQual": 0.1,
                                    "GarageArea": 0.001,
                                    "GarageCars": 0.1,
                                    "SalePrice": 0.00001}

## Prepare data

In [45]:
def prepare_data(traindata, testdata):
    train_matrix = traindata["SalePrice"].values
    train_row_nr = len(train_matrix)
    train_matrix = train_matrix.reshape(train_row_nr,1)
    train_matrix = train_matrix * normalization_factor_per_feature["SalePrice"]
    test_matrix = testdata["Id"].values
    test_row_nr = len(test_matrix)
    test_matrix = test_matrix.reshape(test_row_nr,1)
    
    for column_name in features:
        train_column = traindata[column_name].values.reshape(train_row_nr,1)
        test_column = testdata[column_name].values.reshape(test_row_nr,1)
        train_column = train_column * normalization_factor_per_feature[column_name]
        test_column = test_column * normalization_factor_per_feature[column_name]
        train_matrix = np.hstack((train_matrix, train_column))
        test_matrix = np.hstack((test_matrix, test_column))
        missing_data_items_train = np.count_nonzero(np.isnan(train_matrix))
        missing_data_items_test = np.count_nonzero(np.isnan(test_matrix))
        print("train matrix has",missing_data_items_train, "values which are 'nan'!")
        print("test matrix has",missing_data_items_test, "values which are 'nan'!")
        nan_values_train = np.isnan(train_matrix)
        train_matrix[nan_values_train] = 0
        nan_values_test = np.isnan(test_matrix)
        test_matrix[nan_values_test] = 0
    return train_matrix, test_matrix
    


## Parameters 

In [36]:
OUTPUT_FILENAME = '../data/result_al.csv'

NR_NEURONS_HIDDEN1 = 20
NR_NEURONS_HIDDEN2 = 10
NR_NEURONS_OUTPUT  = 1

NR_TRAIN_STEPS = 100000
LEARN_RATE = 0.001

## Setup the perceptron

In [53]:
def setup(inputs):
    input_node = tf.placeholder(tf.float32, shape=(1,inputs), name="input_node")
    teacher_node = tf.placeholder(tf.float32, name="teacher_node")
    
    rnd_mat1 = tf.random_normal([inputs, NR_NEURONS_HIDDEN1])
    rnd_mat2 = tf.random_normal([NR_NEURONS_HIDDEN1, NR_NEURONS_HIDDEN2])
    rnd_mat3 = tf.random_normal([NR_NEURONS_HIDDEN2, NR_NEURONS_OUTPUT])
    
    weights = {
        'h1': tf.Variable(rnd_mat1),
        'h2': tf.Variable(rnd_mat2),
        'out': tf.Variable(rnd_mat3)
    }
    
    biases = {
        'b1': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN1])),
        'b2': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN2])),
        'out': tf.Variable(tf.random_normal([NR_NEURONS_OUTPUT]))
    }
    
    layer_1 = tf.add(tf.matmul(input_node, weights['h1']), biases['b1'])
    layer_1 = tf.nn.leaky_relu(layer_1)
    #layer_1 = tf.nn.relu(layer_1)
    #layer_1 = tf.nn.sigmoid(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.leaky_relu(layer_2)
    #layer_2 = tf.nn.relu(layer_2)
    #layer_2 = tf.nn.sigmoid(layer_2)
    
    output_node = tf.matmul(layer_2, weights['out']) + biases['out']
    output_node = tf.reshape(output_node, [])
    
    create_var_init_op = tf.global_variables_initializer()
    
    loss_node = tf.abs(teacher_node - output_node)
    optimizer_node = tf.train.GradientDescentOptimizer(LEARN_RATE).minimize(loss_node)
    
    return [input_node, teacher_node, create_var_init_op, loss_node, optimizer_node, output_node, weights['h1'],weights['h2'],weights['out']]

## Compute average error

In [54]:
def compute_avg_error(sess, model, train_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sum_losses = 0.0
    
    for sample_row_nr in range(0, nr_train_samples):
        input_matrix = train_matrix[sample_row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[sample_row_nr, 0]
        predicted_saleprice, sample_loss = sess.run([output_node, loss_node], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        sum_losses += sample_loss
        
    avg_loss = sum_losses / nr_train_samples
    return avg_loss

## Training the model

In [55]:
def train_model(model, train_matrix, nr_steps_to_train):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sess = tf.Session()
    sess.run(var_init_node)
    
    for train_step in range(1, nr_steps_to_train+1):
        rnd_row = np.random.randint(0, nr_train_samples)
        input_matrix = train_matrix[rnd_row, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[rnd_row, 0]
        actual_output, teacher_value, loss_value, _, w_h1, w_h2, w_out = sess.run([output_node, teacher_node, loss_node, optimizer_node, weights_h1, weights_h2, weights_out], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        if train_step % 1000 == 0:
            avg_error = compute_avg_error(sess, model, train_matrix)
            print("Training step ", train_step,
                    "Average error is", avg_error,
                    "actual = ", actual_output,
                    "teacher value = ", teacher_value,
                    "loss value = ", loss_value)
    return sess

## Predict sale prices for Houses

In [56]:
def predict_sale_prices(sess, model, test_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    nr_test_samples = test_matrix.shape[0]
    nr_input_features = test_matrix.shape[1] - 1
    
    prediction_matrix = np.zeros(shape=(nr_test_samples,2))
    for row_nr in range(0, nr_test_samples):
        input_matrix = test_matrix[row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        house_id = int(test_matrix[row_nr, 0])
        predicted_saleprice = sess.run(output_node, feed_dict={input_node: input_matrix})
        print("House with id ", house_id,
              "--> predicted sale price is ", predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"]))
        prediction_matrix[row_nr][0] = house_id
        prediction_matrix[row_nr][1] = predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"])
        
        prediction_dataframe = pd.DataFrame({'Id':prediction_matrix[:,0],'SalePrice':prediction_matrix[:,1]})
        prediction_dataframe = prediction_dataframe.astype({"Id": int})
    print(prediction_dataframe)
        
    prediction_dataframe.to_csv(OUTPUT_FILENAME, sep=',', index=False)

## "Main"

In [57]:
train_matrix, test_matrix = prepare_data(traindata, testdata)
nr_input_features = train_matrix.shape[1] - 1
model = setup(nr_input_features)
sess = train_model(model, train_matrix, NR_TRAIN_STEPS)
predict_sale_prices(sess, model, test_matrix)
sess.close()
tf.reset_default_graph()

train matrix has 0 values which are 'nan'!
test matrix has 1 values which are 'nan'!
train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!
train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!
train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!
train matrix has 0 values which are 'nan'!
test matrix has 1 values which are 'nan'!
train matrix has 0 values which are 'nan'!
test matrix has 1 values which are 'nan'!
Training step  1000 Average error is 0.3936194235127266 actual =  1.7237622 teacher value =  1.55 loss value =  0.1737622
Training step  2000 Average error is 0.49465228033392394 actual =  1.6024488 teacher value =  1.36 loss value =  0.2424488
Training step  3000 Average error is 0.37631277527711166 actual =  1.6755288 teacher value =  1.51 loss value =  0.16552877
Training step  4000 Average error is 0.3412727196739144 actual =  1.7118322 teacher value =  1.794 loss value =  0.082167

Training step  63000 Average error is 0.23947736759708352 actual =  0.94463325 teacher value =  0.684 loss value =  0.26063323
Training step  64000 Average error is 0.23315389544179996 actual =  1.4713588 teacher value =  1.1 loss value =  0.37135875
Training step  65000 Average error is 0.22985002082096387 actual =  1.3439658 teacher value =  1.4 loss value =  0.056034207
Training step  66000 Average error is 0.2472225361490903 actual =  2.107192 teacher value =  2.025 loss value =  0.082191944
Training step  67000 Average error is 0.2310912479888903 actual =  1.708185 teacher value =  2.15 loss value =  0.44181514
Training step  68000 Average error is 0.25025217694370716 actual =  2.1548533 teacher value =  1.945 loss value =  0.20985329
Training step  69000 Average error is 0.2270421271863049 actual =  0.98176634 teacher value =  0.85 loss value =  0.13176632
Training step  70000 Average error is 0.2268249551727347 actual =  1.2544085 teacher value =  1.299 loss value =  0.044591546

House with id  1526 --> predicted sale price is  125411.88001632689
House with id  1527 --> predicted sale price is  90662.80722618102
House with id  1528 --> predicted sale price is  136401.8082618713
House with id  1529 --> predicted sale price is  148639.86968994138
House with id  1530 --> predicted sale price is  186779.86860275266
House with id  1531 --> predicted sale price is  128865.95726013182
House with id  1532 --> predicted sale price is  108814.13221359251
House with id  1533 --> predicted sale price is  162019.4315910339
House with id  1534 --> predicted sale price is  135857.60593414304
House with id  1535 --> predicted sale price is  146545.6128120422
House with id  1536 --> predicted sale price is  137710.20174026486
House with id  1537 --> predicted sale price is  47118.8724040985
House with id  1538 --> predicted sale price is  186438.94195556638
House with id  1539 --> predicted sale price is  204010.41507720944
House with id  1540 --> predicted sale price is  14707

House with id  1681 --> predicted sale price is  240765.09475708005
House with id  1682 --> predicted sale price is  278280.3297042846
House with id  1683 --> predicted sale price is  203423.1901168823
House with id  1684 --> predicted sale price is  168966.84169769284
House with id  1685 --> predicted sale price is  156004.22620773313
House with id  1686 --> predicted sale price is  179962.682723999
House with id  1687 --> predicted sale price is  140646.68416976926
House with id  1688 --> predicted sale price is  188437.66450881955
House with id  1689 --> predicted sale price is  185424.17287826535
House with id  1690 --> predicted sale price is  173198.85492324826
House with id  1691 --> predicted sale price is  161752.66504287717
House with id  1692 --> predicted sale price is  244994.56882476804
House with id  1693 --> predicted sale price is  140646.68416976926
House with id  1694 --> predicted sale price is  159497.43986129758
House with id  1695 --> predicted sale price is  142

House with id  1820 --> predicted sale price is  76355.27253150938
House with id  1821 --> predicted sale price is  154148.8170623779
House with id  1822 --> predicted sale price is  170718.8248634338
House with id  1823 --> predicted sale price is  66264.17636871336
House with id  1824 --> predicted sale price is  157501.1610984802
House with id  1825 --> predicted sale price is  119001.77001953124
House with id  1826 --> predicted sale price is  107697.60608673094
House with id  1827 --> predicted sale price is  124431.45513534544
House with id  1828 --> predicted sale price is  142708.8618278503
House with id  1829 --> predicted sale price is  171475.875377655
House with id  1830 --> predicted sale price is  207108.7598800659
House with id  1831 --> predicted sale price is  168745.86343765256
House with id  1832 --> predicted sale price is  146169.59095001218
House with id  1833 --> predicted sale price is  165716.4931297302
House with id  1834 --> predicted sale price is  131976.09

House with id  1960 --> predicted sale price is  109067.6426887512
House with id  1961 --> predicted sale price is  126669.77643966673
House with id  1962 --> predicted sale price is  110490.23866653441
House with id  1963 --> predicted sale price is  113214.68353271483
House with id  1964 --> predicted sale price is  111936.7480278015
House with id  1965 --> predicted sale price is  165568.22061538693
House with id  1966 --> predicted sale price is  166291.59450531003
House with id  1967 --> predicted sale price is  281802.3204803466
House with id  1968 --> predicted sale price is  326986.95659637445
House with id  1969 --> predicted sale price is  321313.23814392084
House with id  1970 --> predicted sale price is  381240.15331268305
House with id  1971 --> predicted sale price is  419693.8037872314
House with id  1972 --> predicted sale price is  352321.4101791381
House with id  1973 --> predicted sale price is  257854.70008850095
House with id  1974 --> predicted sale price is  2984

House with id  2147 --> predicted sale price is  149651.03864669797
House with id  2148 --> predicted sale price is  129071.46215438841
House with id  2149 --> predicted sale price is  138207.4952125549
House with id  2150 --> predicted sale price is  218762.37392425534
House with id  2151 --> predicted sale price is  128448.28367233275
House with id  2152 --> predicted sale price is  159981.58454895017
House with id  2153 --> predicted sale price is  163920.70055007932
House with id  2154 --> predicted sale price is  101833.08124542235
House with id  2155 --> predicted sale price is  127912.10412979125
House with id  2156 --> predicted sale price is  258322.95417785642
House with id  2157 --> predicted sale price is  226913.69056701657
House with id  2158 --> predicted sale price is  212177.7534484863
House with id  2159 --> predicted sale price is  191797.48296737668
House with id  2160 --> predicted sale price is  165195.53661346433
House with id  2161 --> predicted sale price is  2

House with id  2274 --> predicted sale price is  176862.84780502316
House with id  2275 --> predicted sale price is  218185.7347488403
House with id  2276 --> predicted sale price is  195197.36766815183
House with id  2277 --> predicted sale price is  207962.29839324948
House with id  2278 --> predicted sale price is  144256.46066665647
House with id  2279 --> predicted sale price is  120710.40868759154
House with id  2280 --> predicted sale price is  106774.53279495238
House with id  2281 --> predicted sale price is  182620.75185775754
House with id  2282 --> predicted sale price is  187242.12646484372
House with id  2283 --> predicted sale price is  113989.22204971312
House with id  2284 --> predicted sale price is  125812.00599670409
House with id  2285 --> predicted sale price is  156958.30583572385
House with id  2286 --> predicted sale price is  127168.95341873167
House with id  2287 --> predicted sale price is  310890.7222747802
House with id  2288 --> predicted sale price is  2

House with id  2396 --> predicted sale price is  161218.8935279846
House with id  2397 --> predicted sale price is  204514.93263244626
House with id  2398 --> predicted sale price is  124016.46375656126
House with id  2399 --> predicted sale price is  56868.93463134765
House with id  2400 --> predicted sale price is  57916.53990745544
House with id  2401 --> predicted sale price is  110058.52222442625
House with id  2402 --> predicted sale price is  137008.5000991821
House with id  2403 --> predicted sale price is  191653.1443595886
House with id  2404 --> predicted sale price is  137372.76792526242
House with id  2405 --> predicted sale price is  172332.7279090881
House with id  2406 --> predicted sale price is  133846.21143341062
House with id  2407 --> predicted sale price is  128002.35748291014
House with id  2408 --> predicted sale price is  142944.7293281555
House with id  2409 --> predicted sale price is  140653.7055969238
House with id  2410 --> predicted sale price is  188511.

House with id  2522 --> predicted sale price is  212716.36486053464
House with id  2523 --> predicted sale price is  124617.36202239989
House with id  2524 --> predicted sale price is  136777.11486816403
House with id  2525 --> predicted sale price is  139526.10492706296
House with id  2526 --> predicted sale price is  143531.1436653137
House with id  2527 --> predicted sale price is  117565.66762924193
House with id  2528 --> predicted sale price is  124041.70036315917
House with id  2529 --> predicted sale price is  126669.77643966673
House with id  2530 --> predicted sale price is  90590.5246734619
House with id  2531 --> predicted sale price is  236130.21373748776
House with id  2532 --> predicted sale price is  231396.0790634155
House with id  2533 --> predicted sale price is  202499.38964843747
House with id  2534 --> predicted sale price is  235589.1942977905
House with id  2535 --> predicted sale price is  308123.11172485346
House with id  2536 --> predicted sale price is  2128

House with id  2729 --> predicted sale price is  139744.74668502805
House with id  2730 --> predicted sale price is  148159.122467041
House with id  2731 --> predicted sale price is  121563.2915496826
House with id  2732 --> predicted sale price is  125670.0038909912
House with id  2733 --> predicted sale price is  157986.1521720886
House with id  2734 --> predicted sale price is  162337.73231506345
House with id  2735 --> predicted sale price is  138716.983795166
House with id  2736 --> predicted sale price is  136041.9988632202
House with id  2737 --> predicted sale price is  120331.33506774901
House with id  2738 --> predicted sale price is  181060.6479644775
House with id  2739 --> predicted sale price is  170946.44308090207
House with id  2740 --> predicted sale price is  149592.11349487302
House with id  2741 --> predicted sale price is  148318.6364173889
House with id  2742 --> predicted sale price is  169978.94048690793
House with id  2743 --> predicted sale price is  145857.13

House with id  2892 --> predicted sale price is  58394.83737945556
House with id  2893 --> predicted sale price is  113269.35291290282
House with id  2894 --> predicted sale price is  72566.74170494078
House with id  2895 --> predicted sale price is  244166.37420654294
House with id  2896 --> predicted sale price is  245090.62767028806
House with id  2897 --> predicted sale price is  207943.01033020017
House with id  2898 --> predicted sale price is  214200.3774642944
House with id  2899 --> predicted sale price is  226367.64049530026
House with id  2900 --> predicted sale price is  165564.6443367004
House with id  2901 --> predicted sale price is  207215.26145935056
House with id  2902 --> predicted sale price is  186230.39722442624
House with id  2903 --> predicted sale price is  283310.6040954589
House with id  2904 --> predicted sale price is  310561.990737915
House with id  2905 --> predicted sale price is  63017.827272415154
House with id  2906 --> predicted sale price is  232476