In [1]:
import pandas as pd #work with data as tables
import numpy as np #use number matrices
import matplotlib.pyplot as plt
import tensorflow as tf



In [7]:
df = pd.read_csv("data.csv")
# remove the features we don't need to build our neural network
df = df.drop(['index', 'price', 'sq_price'], axis=1)
df = df[0:10]

In [8]:
df

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [22]:
# Add Labels
# 1 is good buy, 0 is bad buy
df.loc[:,('y1')] = [1,1,1,0,0,1,0,1,1,1]
# y2 is the negation of y1, the opposite value
df.loc[:,('y2')] = df['y1'] == 0
# now that we have true and false values for y2, convert to 1s and 0s
df.loc[:,('y2')] = df['y2'].astype('int')

In [23]:
df

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [28]:
# Step 3: Prepare data for TensorFlow
# tensors are a generic version of vectors and matrices
# vector is a list of numbers (1D tensor)
# matrix is a list of list of numbers (2D tensor)
# list of list of list of numbers (3D tensor)

# convert features to input tensors
xInput = df.loc[:,['area', 'bathrooms']].as_matrix()
# convert labels to input tensor as well
yInput = df.loc[:,['y1', 'y2']].as_matrix()

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [29]:
xInput

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [30]:
yInput

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [35]:
# Step 4: Write out our hyperparameters
learning_rate = 0.000001
training_epochs = 2000
display_step = 50
n_samples = yInput.size
n_samples


20

In [40]:
# Step 5: Create our computation graph / Neural Network
# for feature input tensor, none means any number of examples
# placeholders are gateways for data into our computation brackets
x = tf.placeholder(tf.float32, [None, 2])

# create weights
# 2 by 2 float matrix, will keep updating through the training process
# variables in TF hold and update parameters
# in memory buffers containing tensors
W = tf.Variable(tf.zeros([2,2]))

# Add biases (biases are like the 'b' in 'y = m * x + b')
b = tf.Variable(tf.zeros([2]))

# multiply our weights by our inputs, first calculation
# weights are how we govern how our data flows in our computational graph
# multiply inputs by weights and add biases
y_values = tf.add(tf.matmul(x, W),b)


# apply softmax to value we just created
y = tf.nn.softmax(y_values)

# feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])


In [41]:
# Step 6: Perform training
# create our cost function - the mean squared error

# reduce_sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [42]:
# initialize variables and tensorflow session

init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [44]:
# training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict = {x:xInput, y_: yInput})
    
    # write out logs of trainig
    if (i) % display_step == 0:
        cc = sess.run(cost, feed_dict = {x: xInput, y_: yInput})
        print 'Training step:', '%04d' % (i), "cost=", "{:.9f}".format(cc)
        
print "Optimization finished!"
training_cost = sess.run(cost, feed_dict = {x: xInput, y_: yInput})
print "Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b)

Training step: 0000 cost= 0.114958666
Training step: 0050 cost= 0.109539941
Training step: 0100 cost= 0.109539866
Training step: 0150 cost= 0.109539807
Training step: 0200 cost= 0.109539732
Training step: 0250 cost= 0.109539673
Training step: 0300 cost= 0.109539606
Training step: 0350 cost= 0.109539531
Training step: 0400 cost= 0.109539464
Training step: 0450 cost= 0.109539405
Training step: 0500 cost= 0.109539315
Training step: 0550 cost= 0.109539248
Training step: 0600 cost= 0.109539196
Training step: 0650 cost= 0.109539129
Training step: 0700 cost= 0.109539054
Training step: 0750 cost= 0.109538987
Training step: 0800 cost= 0.109538913
Training step: 0850 cost= 0.109538853
Training step: 0900 cost= 0.109538779
Training step: 0950 cost= 0.109538712
Training step: 1000 cost= 0.109538652
Training step: 1050 cost= 0.109538577
Training step: 1100 cost= 0.109538510
Training step: 1150 cost= 0.109538436
Training step: 1200 cost= 0.109538361
Training step: 1250 cost= 0.109538302
Training ste

In [46]:
sess.run(y, feed_dict={x: xInput})

array([[ 0.71125221,  0.28874779],
       [ 0.66498977,  0.33501023],
       [ 0.73657656,  0.26342347],
       [ 0.64718789,  0.35281211],
       [ 0.78335613,  0.2166439 ],
       [ 0.70069474,  0.29930523],
       [ 0.65866327,  0.34133676],
       [ 0.64828628,  0.35171372],
       [ 0.64368278,  0.35631716],
       [ 0.65480113,  0.3451989 ]], dtype=float32)

In [None]:
# It is saying that all houses are a good buy! 7/10
# how to improve? add a hidden layer