In [1]:
import boto3
import sagemaker
import io

import pandas as pd
import numpy as np

# load training and validation dataset from Amazon S3
s3_client = boto3.client('s3')
s3_bucket_name='awsml-sagemaker-source'

response = s3_client.get_object(Bucket='awsml-sagemaker-source', Key='boston_train.csv')
response_body = response["Body"].read()
df_boston_train = pd.read_csv(io.BytesIO(response_body), header=0, delimiter=",", low_memory=False)

response = s3_client.get_object(Bucket='awsml-sagemaker-source', Key='boston_test.csv')
response_body = response["Body"].read()
df_boston_test = pd.read_csv(io.BytesIO(response_body), header=0, index_col=False, delimiter=",", low_memory=False)

# extract features and target variable into seperate datasets.
df_boston_train_target = df_boston_train.loc[:,['price']]
df_boston_train_features = df_boston_train.drop(['price'], axis=1)

df_boston_test_target = df_boston_test.loc[:,['price']]
df_boston_test_features = df_boston_test.drop(['price'], axis=1)


In [2]:
df_boston_train.head()

Unnamed: 0,price,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,24.8,0.21409,22.0,5.86,0.0,0.431,6.438,8.9,7.3967,7.0,330.0,19.1,377.07,3.59
1,16.8,0.22438,0.0,9.69,0.0,0.585,6.027,79.7,2.4982,6.0,391.0,19.2,396.9,14.33
2,13.1,23.6482,0.0,18.1,0.0,0.671,6.38,96.2,1.3861,24.0,666.0,20.2,396.9,23.69
3,25.1,0.52058,0.0,6.2,1.0,0.507,6.631,76.5,4.148,8.0,307.0,17.4,388.45,9.54
4,32.0,0.07875,45.0,3.44,0.0,0.437,6.782,41.1,3.7886,5.0,398.0,15.2,393.87,6.68


In [3]:
# train a linear regression model on a single feature
# the general equation for linear regression is:
# y = w1x1 + w2x2 + ... wnxn + c
#
# in case of a single feature, the equation reduces to:
# y = w1x1 + c

import tensorflow as tf
tf.reset_default_graph()

#
# define a TensorFlow graph
#

# what you will provide:
# x1 =  one-dimensional column array of features
# y_actual = one-dimensional column arrray of expected values/labels
x1 = tf.placeholder(tf.float32, [None, 1], name="x1")
y_actual = tf.placeholder(tf.float32, [None, 1], name="y_actual")

# what you  are interested in:
# w1, c weight and bias term (intercept)
w1 = tf.Variable(tf.zeros([1,1]), name="w1")
c = tf.Variable(tf.zeros([1]), name="c")

# compute y_predicged = w1x1 + c
temp = tf.matmul(x1,w1)
y_predicted = temp + c


# compute cost function
# MSE between predicted and actual values
diff = y_predicted - y_actual
square_diff = tf.square(diff)
mse_cost = tf.reduce_mean(square_diff)

# create a root node - a gradient descent optimizer, setup to optimise the
# mse_cost function.
learning_rate = 0.000001
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse_cost)

# extract CRIM feature and target variable into arrays so that 
# these can be fed into the computation graph via the placeholders 
# creared earlier.
xin =  df_boston_train_features['CRIM'].values
yin =  df_boston_train_target['price'].values

    
#
# execute the graph
#
with tf.Session() as sess:
    # use a global_variable_initializer node to initialize
    # variables all variables in the graph (w1 and c)
    init_node = tf.global_variables_initializer()
    sess.run(init_node)
    
    # for each observation, evaluate the node called train_step
    # which will in turn update w1 and c so as to minimize
    # the cost function.
    
    num_elements = df_boston_train_features.shape[0]
    best_w1 = 0.0
    best_c = 0.0
    lowest_cost = 100000.00

    for i in range(num_elements):
        input_dict={x1:[[xin[i]]], y_actual:[[yin[i]]]}
        sess.run(train_step, feed_dict=input_dict)
        
        computed_w1 = sess.run(w1)
        computed_c = sess.run(c)
        computed_cost = sess.run(mse_cost, feed_dict=input_dict)
        
        if computed_cost < lowest_cost:
            lowest_cost = computed_cost
            best_w1 = computed_w1
            best_c = computed_c
            
        print ("End of iteration %d, w1=%f, c=%f, cost=%f" % (i, computed_w1, computed_c, computed_cost))
    print ("End of training, w1=%f, c=%f, lowest_cost=%f" % (best_w1, best_c, lowest_cost))
    

Instructions for updating:
Colocations handled automatically by placer.
End of iteration 0, w1=0.000011, c=0.000050, cost=615.037415
End of iteration 1, w1=0.000018, c=0.000083, cost=282.237030
End of iteration 2, w1=0.000638, c=0.000109, cost=171.212265
End of iteration 3, w1=0.000664, c=0.000160, cost=629.984619
End of iteration 4, w1=0.000669, c=0.000224, cost=1023.982300
End of iteration 5, w1=0.000868, c=0.000265, cost=424.176147
End of iteration 6, w1=0.000954, c=0.000312, cost=566.343384
End of iteration 7, w1=0.000957, c=0.000350, cost=353.423248
End of iteration 8, w1=0.000961, c=0.000447, cost=2352.203369
End of iteration 9, w1=0.000989, c=0.000484, cost=338.514008
End of iteration 10, w1=0.001108, c=0.000509, cost=161.145706
End of iteration 11, w1=0.001111, c=0.000550, cost=412.063873
End of iteration 12, w1=0.001221, c=0.000594, cost=497.130127
End of iteration 13, w1=0.001222, c=0.000660, cost=1088.954956
End of iteration 14, w1=0.001232, c=0.000705, cost=492.795563
End o

End of iteration 131, w1=0.016364, c=0.005712, cost=376.116394
End of iteration 132, w1=0.016393, c=0.005741, cost=209.614105
End of iteration 133, w1=0.016401, c=0.005774, cost=275.243561
End of iteration 134, w1=0.016403, c=0.005821, cost=556.641479
End of iteration 135, w1=0.016410, c=0.005870, cost=594.964844
End of iteration 136, w1=0.016468, c=0.005970, cost=2498.450684
End of iteration 137, w1=0.016470, c=0.006036, cost=1108.443848
End of iteration 138, w1=0.016539, c=0.006065, cost=211.841003
End of iteration 139, w1=0.016540, c=0.006113, cost=575.701599
End of iteration 140, w1=0.016551, c=0.006149, cost=323.584839
End of iteration 141, w1=0.016557, c=0.006195, cost=528.628052
End of iteration 142, w1=0.016725, c=0.006233, cost=361.752930
End of iteration 143, w1=0.016732, c=0.006291, cost=817.476440
End of iteration 144, w1=0.017066, c=0.006299, cost=18.828541
End of iteration 145, w1=0.017149, c=0.006335, cost=315.193542
End of iteration 146, w1=0.017795, c=0.006350, cost=60

End of iteration 266, w1=0.028725, c=0.011881, cost=2005.164307
End of iteration 267, w1=0.028917, c=0.011906, cost=145.370605
End of iteration 268, w1=0.028942, c=0.011934, cost=203.420639
End of iteration 269, w1=0.028946, c=0.011994, cost=893.177979
End of iteration 270, w1=0.029162, c=0.012032, cost=358.041626
End of iteration 271, w1=0.029169, c=0.012062, cost=224.441696
End of iteration 272, w1=0.029170, c=0.012162, cost=2498.743652
End of iteration 273, w1=0.029193, c=0.012193, cost=242.297394
End of iteration 274, w1=0.029206, c=0.012235, cost=444.325470
End of iteration 275, w1=0.029224, c=0.012281, cost=532.500366
End of iteration 276, w1=0.029230, c=0.012356, cost=1390.196533
End of iteration 277, w1=0.029238, c=0.012405, cost=609.247314
End of iteration 278, w1=0.029247, c=0.012451, cost=532.789734
End of iteration 279, w1=0.029513, c=0.012472, cost=108.361664
End of iteration 280, w1=0.029545, c=0.012558, cost=1829.832642
End of iteration 281, w1=0.029551, c=0.012596, cost

In [4]:
# use best_w1 and best_c to make predictions on all observations in the test set
predictions = df_boston_test_features['CRIM'].values * best_w1 + best_c

# compute MSE on test set
from sklearn.metrics import mean_squared_error
mse_test = mean_squared_error(np.transpose(df_boston_test_target.values), predictions)
print (mse_test)

601.8169340697825
