In [3]:
import pandas as pd  # work with data as table
import numpy as np  # use number matrices
import matplotlib.pyplot as plt
import tensorflow as tf

In [7]:
# step 1 is load data
dataframe = pd.read_csv("tensorflow/predicting-housing-prices/data.csv")  # dataframe object  # removed the features we don't care about
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1)
# only use the first 10 rows
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [10]:
# steps 2 - add labels  
# 1 is good buy and 0 is bad buy
dataframe.loc[:, ('y1')] = [1, 1, 1, 0, 0, 1, 0, 1, 1, 1]
# y2 is negation of y1, opposite
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# turn TRUE/FALSE values to 1s and 0s
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [14]:
# step 3 compare data for tensorflow
# tensor are generic version of vector and matrices
# a vector is a list of number (1D tensor)
# matrix is a list of list of number (2D tensor)
# list of list of list of number is (3D tensor) 
# ...

# convert features into tensors
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()

# convert label into tensor
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()


In [15]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [16]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [25]:
# step 4  - write out hyperparameters
learning_rate = 0.00001  # how fast we hit convergence
training_epochs = 2000  # iteration
display_step = 50
n_samples = inputY.size


In [21]:
# step 5 - create  our computation graph/neural network
# for our feature input tensors, none means any numbers of examples
# placeholder of tensorflow are like gateways for data into computation graph
x = tf.placeholder(tf.float32, [None, 2])

# create weight
# 2x2 float matrix, that will keep updating through the training process
# variables in tf hold weights and update parameters
# in memory buffers containing tensors
W = tf.Variable(tf.zeros((2, 2)))

# add biases (example is b in y = mx +b , so b is the bias)
b = tf.Variable(tf.zeros(2))

# first calculation
# multiplying our weights by our inputs 
# weights are how we govern the data flow in our computation graph
# multiply inputs by weight and add biases
y_value = tf.add(tf.matmul(x, W), b)

# apply softmax to value we just create 
# softmax is just another word for sigmoid fn.
# It is our activation function
y = tf.nn.softmax(y_value)

# feed in matrix of label
y_ = tf.placeholder(tf.float32, [None, 2])


In [22]:
# step 6 - to perform training
# create our cost function, mean squared error
# reduce sum computes the sum of elements across dimension of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2)) / (2 * n_samples)

# gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)


In [27]:
# initialize variables and tensorflow sessio
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)



In [28]:
# training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_: inputY})

    # write out logs of training
    if i % display_step == 0:
        cc = sess.run(cost, feed_dict={x: inputX, y_: inputY})
        print("Training steps: ", "%04d" % i, " cost=", "{:.9f}".format(cc))

print('Optimization Finished !')
training_cost = sess.run(cost, feed_dict={x: inputX, y_: inputY})
print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b))


Training steps:  0000  cost= 0.134503588
Training steps:  0050  cost= 0.112963296
Training steps:  0100  cost= 0.112963341
Training steps:  0150  cost= 0.112963364
Training steps:  0200  cost= 0.112963364
Training steps:  0250  cost= 0.112963393
Training steps:  0300  cost= 0.112963438
Training steps:  0350  cost= 0.112963483
Training steps:  0400  cost= 0.112963483
Training steps:  0450  cost= 0.112963513
Training steps:  0500  cost= 0.112963557


Training steps:  0550  cost= 0.112963572
Training steps:  0600  cost= 0.112963602
Training steps:  0650  cost= 0.112963632
Training steps:  0700  cost= 0.112963654
Training steps:  0750  cost= 0.112963699
Training steps:  0800  cost= 0.112963714
Training steps:  0850  cost= 0.112963721
Training steps:  0900  cost= 0.112963758
Training steps:  0950  cost= 0.112963796
Training steps:  1000  cost= 0.112963811


Training steps:  1050  cost= 0.112963840
Training steps:  1100  cost= 0.112963870
Training steps:  1150  cost= 0.112963893
Training steps:  1200  cost= 0.112963915
Training steps:  1250  cost= 0.112963937
Training steps:  1300  cost= 0.112963960
Training steps:  1350  cost= 0.112964012
Training steps:  1400  cost= 0.112964019
Training steps:  1450  cost= 0.112964056
Training steps:  1500  cost= 0.112964079
Training steps:  1550  cost= 0.112964109
Training steps:  1600  cost= 0.112964131


Training steps:  1650  cost= 0.112964153
Training steps:  1700  cost= 0.112964176
Training steps:  1750  cost= 0.112964213
Training steps:  1800  cost= 0.112964235
Training steps:  1850  cost= 0.112964250
Training steps:  1900  cost= 0.112964287
Training steps:  1950  cost= 0.112964317
Optimization Finished !
Training cost= 0.110957 W= [[ 0.00014077 -0.00014077]
 [ 0.00045578 -0.00045578]] b= [  9.51521506e-05  -9.51521361e-05]


In [29]:
sess.run(y, feed_dict={x: inputX})


array([[ 0.64457554,  0.3554244 ],
       [ 0.61144364,  0.38855633],
       [ 0.66342938,  0.33657059],
       [ 0.5988487 ,  0.40115133],
       [ 0.70024157,  0.29975843],
       [ 0.63707429,  0.36292574],
       [ 0.60702008,  0.39297998],
       [ 0.59981126,  0.40018877],
       [ 0.59663081,  0.40336913],
       [ 0.60433042,  0.39566955]], dtype=float32)