## Libraries

In [27]:
import os.path
import numpy as np
import pandas as pd
import tensorflow as tf

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

RuntimeError: module compiled against API version 0xc but this version of numpy is 0xb

## Import data

In [28]:
traindata = pd.read_csv('../data/train.csv')
testdata = pd.read_csv('../data/train.csv')

## Data Parameters

In [29]:
features1 = ['TotalBsmtSF']

# features2 --> error after 100k train steps, learn=0.001
# for a (2,20sig,10sig,1) MLP
# = 0.42
features2 = ['TotalBsmtSF', '1stFlrSF']

# features3 --> error after 100k train steps, learn=0.001
# for a (3,20sig,10sig,1) MLP
# = 0.31
features3 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea']

# features4 --> error after 100k train steps, learn=0.001
# for a (4,20sig,10sig,1) MLP
# = 0.24
features4 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual']

# features5 --> error after 100k train steps, learn=0.001
# for a (5,20sig,10sig,1) MLP
# = 0.22
features5 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea']

# features6 --> error after 100k train steps
# for a (6,20sig,10sig,1)   MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.22 (learn=0.001)
# for a (6,20relu,10relu,1) MLP = 0.26 (learn=0.0001)
# for a (6,40relu,30relu,10relu,1) MLP = 0.34 (learn=0.0001)
# for a (6,20sig,1) MLP = 0.26 (learn=0.001)
# for a (6,20id,10id,1)   MLP = 0.26 (learn=0.001)
features6 = ['TotalBsmtSF', '1stFlrSF', 'GrLivArea', 'OverallQual', 'GarageArea', 'GarageCars']

# set feature vector to use here!
features = features6

# Normalization factor for house sale prices
# This is important, since all the input feature values
# "live" in different intervals
# E.g. SalePrice: 50000-400000
#      TotalBsmtSF: 300-2000
#      OverallQual: 1-10
normalization_factor_per_feature = {"TotalBsmtSF": 0.001,
                                    "1stFlrSF": 0.001,
                                    "GrLivArea": 0.001,
                                    "OverallQual": 0.1,
                                    "GarageArea": 0.001,
                                    "GarageCars": 0.1,
                                    "SalePrice": 0.00001}

## Prepare data

In [30]:
def prepare_data(traindata, testdata):
    train_matrix = traindata["SalePrice"].values
    train_row_nr = len(train_matrix)
    train_matrix = train_matrix.reshape(train_row_nr,1)
    train_matrix = train_matrix * normalization_factor_per_feature["SalePrice"]
    test_matrix = testdata["Id"].values
    test_row_nr = len(test_matrix)
    test_matrix = test_matrix.reshape(test_row_nr,1)
    
    for column_name in features:
        train_column = traindata[column_name].values.reshape(train_row_nr,1)
        test_column = testdata[column_name].values.reshape(test_row_nr,1)
        train_column = train_column * normalization_factor_per_feature[column_name]
        test_column = test_column * normalization_factor_per_feature[column_name]
        train_matrix = np.hstack((train_matrix, train_column))
        test_matrix = np.hstack((test_matrix, test_column))
        missing_data_items_train = np.count_nonzero(np.isnan(train_matrix))
        missing_data_items_test = np.count_nonzero(np.isnan(test_matrix))
        print("train matrix has",missing_data_items_train, "values which are 'nan'!")
        print("test matrix has",missing_data_items_test, "values which are 'nan'!")
        nan_values_train = np.isnan(train_matrix)
        train_matrix[nan_values_train] = 0
        nan_values_test = np.isnan(test_matrix)
        test_matrix[nan_values_test] = 0
        return train_matrix, test_matrix
    


## Parameters 

In [31]:
OUTPUT_FILENAME = '../data/result_al.csv'

NR_NEURONS_HIDDEN1 = 20
NR_NEURONS_HIDDEN2 = 10
NR_NEURONS_OUTPUT  = 1

NR_TRAIN_STEPS = 100000
LEARN_RATE = 0.001

## Setup the perceptron

In [39]:
def setup(inputs):
    input_node = tf.placeholder(tf.float32, shape=(1,inputs), name="input_node")
    teacher_node = tf.placeholder(tf.float32, name="teacher_node")
    
    rnd_mat1 = tf.random_normal([inputs, NR_NEURONS_HIDDEN1])
    rnd_mat2 = tf.random_normal([NR_NEURONS_HIDDEN1, NR_NEURONS_HIDDEN2])
    rnd_mat3 = tf.random_normal([NR_NEURONS_HIDDEN2, NR_NEURONS_OUTPUT])
    
    weights = {
        'h1': tf.Variable(rnd_mat1),
        'h2': tf.Variable(rnd_mat2),
        'out': tf.Variable(rnd_mat3)
    }
    
    biases = {
        'b1': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN1])),
        'b2': tf.Variable(tf.random_normal([NR_NEURONS_HIDDEN2])),
        'out': tf.Variable(tf.random_normal([NR_NEURONS_OUTPUT]))
    }
    
    layer_1 = tf.add(tf.matmul(input_node, weights['h1']), biases['b1'])
    layer_1 = tf.nn.leaky_relu(layer_1)
    
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.leaky_relu(layer_2)
    
    output_node = tf.matmul(layer_2, weights['out']) + biases['out']
    output_node = tf.reshape(output_node, [])
    
    create_var_init_op = tf.global_variables_initializer()
    
    loss_node = tf.abs(teacher_node - output_node)
    optimizer_node = tf.train.GradientDescentOptimizer(LEARN_RATE).minimize(loss_node)
    
    return [input_node, teacher_node, create_var_init_op, loss_node, optimizer_node, output_node, weights['h1'],weights['h2'],weights['out']]

## Compute average error

In [40]:
def compute_avg_error(sess, model, train_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sum_losses = 0.0
    
    for sample_row_nr in range(0, nr_train_samples):
        input_matrix = train_matrix[sample_row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[sample_row_nr, 0]
        predicted_saleprice, sample_loss = sess.run([output_node, loss_node], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        sum_losses += sample_loss
        
    avg_loss = sum_losses / nr_train_samples
    return avg_loss

## Training the model

In [41]:
def train_model(model, train_matrix, nr_steps_to_train):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    
    nr_train_samples = train_matrix.shape[0]
    nr_input_features = train_matrix.shape[1] - 1
    sess = tf.Session()
    sess.run(var_init_node)
    
    for train_step in range(1, nr_steps_to_train+1):
        rnd_row = np.random.randint(0, nr_train_samples)
        input_matrix = train_matrix[rnd_row, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        saleprice = train_matrix[rnd_row, 0]
        actual_output, tacher_value, loss_value, _, w_h1, w_h2, w_out = sess.run([output_node, teacher_node, loss_node, optimizer_node, weights_h1, weights_h2, weights_out], feed_dict={input_node: input_matrix, teacher_node: saleprice})
        if train_step % 1000 == 0:
            avg_error = compute_avg_error(sess, model, train_matrix)
    return sess

## Predict sale prices for Houses

In [44]:
def predict_sale_prices(sess, model, test_matrix):
    input_node, teacher_node, var_init_node, loss_node, optimizer_node, output_node, weights_h1, weights_h2, weights_out = model
    nr_test_samples = test_matrix.shape[0]
    nr_input_features = test_matrix.shape[1] - 1
    
    prediction_matrix = np.zeros(shape=(nr_test_samples,2))
    for row_nr in range(0, nr_test_samples):
        input_matrix = test_matrix[row_nr, 1:]
        input_matrix = input_matrix.reshape(1, nr_input_features)
        house_id = int(test_matrix[row_nr, 0])
        predicted_saleprice = sess.run(output_node, feed_dict={input_node: input_matrix})
        print("House with id ", house_id,
              "has feature input_matrix = ", input_matrix,
              "--> predicted sale price is ", predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"]))
        prediction_matrix[row_nr][0] = house_id
        prediction_matrix[row_nr][1] = predicted_saleprice * (1.0/normalization_factor_per_feature["SalePrice"])
        
        prediction_dataframe = pd.DataFrame({'Id':prediction_matrix[:,0],'SalePrice':prediction_matrix[:,1]})
        prediction_dataframe = prediction_dataframe.astype({"Id": int})
        print(prediction_dataframe)
        
        predection_dataframe.to_csv(OUTPUT_FILENAME, sep=',', index=False)

## "Main"

In [45]:
train_matrix, test_matrix = prepare_data(traindata, testdata)
nr_input_features = train_matrix.shape[1] - 1
model = setup(nr_input_features)
sess = train_model(model, train_matrix, NR_TRAIN_STEPS)
predict_sale_prices(sess, model, test_matrix)
sess.close()
tf.reset_default_graph()

train matrix has 0 values which are 'nan'!
test matrix has 0 values which are 'nan'!


NameError: name 'houseid' is not defined