In [3]:
import numpy as np
import pandas as pd
import os

# load Boston housing dataset
boston_dataset_train_file = './datasets/boston_dataset/boston_train.csv'
boston_dataset_test_file = './datasets/boston_dataset/boston_train.csv'
df_boston_train = pd.read_csv(boston_dataset_train_file)
df_boston_test = pd.read_csv(boston_dataset_test_file)

# extract features and target variable into seperate datasets.
df_boston_train_target = df_boston_train.loc[:,['price']]
df_boston_train_features = df_boston_train.drop(['price'], axis=1)

df_boston_test_target = df_boston_test.loc[:,['price']]
df_boston_test_features = df_boston_test.drop(['price'], axis=1)

In [4]:
# train a linear regression model on a single feature
# the general equation for linear regression is:
# y = w1x1 + w2x2 + ... wnxn + c
#
# in case of a single feature, the equation reduces to:
# y = w1x1 + c

import tensorflow as tf
tf.reset_default_graph()

#
# define a TensorFlow graph
#

# what you will provide:
# x1 =  one-dimensional column array of features
# y_actual = one-dimensional column arrray of expected values/labels
x1 = tf.placeholder(tf.float32, [None, 1], name="x1")
y_actual = tf.placeholder(tf.float32, [None, 1], name="y_actual")

# what you  are interested in:
# w1, c weight and bias term (intercept)
w1 = tf.Variable(tf.zeros([1,1]), name="w1")
c = tf.Variable(tf.zeros([1]), name="c")

# compute y_predicged = w1x1 + c
y_predicted = tf.matmul(x1,w1) + c

# compute cost function
# MSE between predicted and actual values
mse_cost = tf.reduce_mean(tf.square(y_predicted - y_actual))

# create a root node - a gradient descent optimizer, setup to optimise the
# mse_cost function.
learning_rate = 0.0001
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_cost)

#
# execute the graph
#
with tf.Session() as sess:
    # use a global_variable_initializer node to initialize
    # variables all variables in the graph (w1 and c)
    init = tf.global_variables_initializer()
    sess.run(init)
    
    # extract CRIM feature and targer variable into arrays
    xin =  df_boston_train_features['CRIM'].values
    yin =  df_boston_train_target['price'].values
    

    # for each observaiton, evaluate the node called train_step
    # which will in turn update w1 and c so as to minimise
    # the cost function.
    best_w1 = 0.0
    best_c = 0.0
    lowest_cost = 100000.00
    
    num_elements = df_boston_train_features.shape[0]
    for i in range(num_elements):
        input_dict={x1:[[xin[i]]], y_actual:[[yin[i]]]}
        sess.run(train_step, feed_dict=input_dict)
        
        computed_w1 = sess.run(w1)
        computed_c = sess.run(c)
        computed_cost = sess.run(mse_cost, feed_dict=input_dict)
        
        if computed_cost < lowest_cost:
            lowest_cost = computed_cost
            best_w1 = computed_w1
            best_c = computed_c
            
        print ("End of iteration %d, w1=%f, c=%f, cost=%f" % (i, computed_w1, computed_c, computed_cost))
    print ("End of training, w1=%f, c=%f, lowest_cost=%f" % (best_w1, best_c, lowest_cost))
    

End of iteration 0, w1=0.001062, c=0.004960, cost=614.782654
End of iteration 1, w1=0.001816, c=0.008319, cost=281.946838
End of iteration 2, w1=0.063531, c=0.010929, cost=134.250870
End of iteration 3, w1=0.066140, c=0.015940, cost=627.483948
End of iteration 4, w1=0.066644, c=0.022336, cost=1022.235352
End of iteration 5, w1=0.086234, c=0.026387, cost=406.289185
End of iteration 6, w1=0.094737, c=0.031110, cost=556.881409
End of iteration 7, w1=0.095107, c=0.034862, cost=351.778900
End of iteration 8, w1=0.095447, c=0.044555, cost=2347.605469
End of iteration 9, w1=0.098273, c=0.048211, cost=334.005737
End of iteration 10, w1=0.109659, c=0.050650, cost=147.315781
End of iteration 11, w1=0.109998, c=0.054698, cost=409.498779
End of iteration 12, w1=0.120764, c=0.059093, cost=481.586945
End of iteration 13, w1=0.120893, c=0.065681, cost=1084.514038
End of iteration 14, w1=0.121959, c=0.070102, cost=488.432281
End of iteration 15, w1=0.123165, c=0.073760, cost=334.363831
End of iteratio

End of iteration 242, w1=0.735170, c=0.990379, cost=366.778961
End of iteration 243, w1=0.735719, c=1.000172, cost=2396.945801
End of iteration 244, w1=0.736396, c=1.003462, cost=270.430878
End of iteration 245, w1=0.739230, c=1.003832, cost=3.329815
End of iteration 246, w1=0.742473, c=1.007934, cost=420.463593
End of iteration 247, w1=0.753785, c=1.009058, cost=30.317471
End of iteration 248, w1=0.754373, c=1.012491, cost=294.405701
End of iteration 249, w1=0.768908, c=1.022059, cost=2285.791748
End of iteration 250, w1=0.740958, c=1.020513, cost=52.134834
End of iteration 251, w1=0.741351, c=1.026920, cost=1025.770752
End of iteration 252, w1=0.742778, c=1.033703, cost=1149.890137
End of iteration 253, w1=0.743843, c=1.038343, cost=537.813354
End of iteration 254, w1=0.744611, c=1.043010, cost=544.502747
End of iteration 255, w1=0.744771, c=1.046675, cost=335.648468
End of iteration 256, w1=0.746334, c=1.051275, cost=528.774475
End of iteration 257, w1=0.742066, c=1.050845, cost=4.4

In [14]:
# use best_w1 and best_c to make predictions on all observations in the test set
predictions = df_boston_test_features['CRIM'].values * best_w1 + best_c

# compute MSE on test set
from sklearn.metrics import mean_squared_error
mse_test = mean_squared_error(np.transpose(df_boston_test_target.values), predictions)
print (mse_test)

518.6643030640115
