In [4]:
import pandas as pd # work with data as tables
import numpy as np # use number matrices
import matplotlib.pyplot as plt
import tensorflow as tf

In [5]:
# Step 1 load data
dataframe = pd.read_csv('data.csv') # dataframe

# Remove the features we dont care about
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1)

# We only use the first 10 rows
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [7]:
# Step 2 - add labels
# 1 is goog buy and 0 is bad buy
dataframe.loc[:, ('y1')] = [1,1,1,0,0,1,0,1,1,1]
# y2 is a negation of y1, opposite
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# Turn to TRUE/FALSE values to 1s and 0s
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
dataframe

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,1,0
6,1534.0,3.0,0,1
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [12]:
# Step 3 - prepare data for tensorflow (tensors)
# tensors are a generic version of vectors and matrices
# vector - is a list of numbers (1D tensor)
# matrix is a list of list of numbers (2D tensor)

# convert features to input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
# convert labels to input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()

In [13]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [14]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0]])

In [51]:
# Step 4 - write out hyperparameters
learning_rate = 0.000001
epochs = 2000
display_step = 50
count_samples = inputY.size

In [52]:
# Step 5 - create our computation graph/neural network
# for feature input, none means any numbers of examples
# placeholder are gateways for data into our computation graph
x = tf.placeholder(tf.float32, [None, 2])

# Create weights
W = tf.Variable(tf.zeros([2, 2]))
# Add biases (example is b in y = mx + b)
b = tf.Variable(tf.zeros([2]))

# multiply our weights by our inputs, first calculation
# weights are how we govern how data flows in our computation graph
y_values = tf.add(tf.matmul(x, W), b)

# apply softmax to value we just created
# softmax is our activation function
y = tf.nn.softmax(y_values)

# feed in a matrix of labels
y_ = tf.placeholder(tf.float32, [None, 2])

In [53]:
# Step 6 - perform training
# create our cost function, mean squared error
cost = tf.reduce_sum(tf.pow(y_ - y, 2)) / (2 * count_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [54]:
# Initialize variables and tensorflow session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [55]:
for i in range(epochs):
    sess.run(optimizer, feed_dict={x: inputX, y_:inputY})
    
    if (i % display_step == 0):
        cc = sess.run(cost, feed_dict={x: inputX, y_: inputY})
        print("Training step: ", '%04d' % (i), " cost = ", "{:.9f}".format(cc))

print("Optimizer finished!")

training_cost = sess.run(cost, feed_dict={x: inputX, y_:inputY})
print("Training cost = ", training_cost, "\nW = ", sess.run(W), "\nb = ", sess.run(b))

Training step:  0000  cost =  0.114958666
Training step:  0050  cost =  0.109539941
Training step:  0100  cost =  0.109539866
Training step:  0150  cost =  0.109539807
Training step:  0200  cost =  0.109539732
Training step:  0250  cost =  0.109539673
Training step:  0300  cost =  0.109539606
Training step:  0350  cost =  0.109539531
Training step:  0400  cost =  0.109539464
Training step:  0450  cost =  0.109539405
Training step:  0500  cost =  0.109539315
Training step:  0550  cost =  0.109539248
Training step:  0600  cost =  0.109539196
Training step:  0650  cost =  0.109539129
Training step:  0700  cost =  0.109539054
Training step:  0750  cost =  0.109538987
Training step:  0800  cost =  0.109538913
Training step:  0850  cost =  0.109538853
Training step:  0900  cost =  0.109538779
Training step:  0950  cost =  0.109538712
Training step:  1000  cost =  0.109538652
Training step:  1050  cost =  0.109538577
Training step:  1100  cost =  0.109538510
Training step:  1150  cost =  0.10

In [56]:
sess.run(y, feed_dict={x: inputX})

array([[ 0.71125221,  0.28874779],
       [ 0.66498977,  0.33501023],
       [ 0.73657656,  0.26342347],
       [ 0.64718789,  0.35281211],
       [ 0.78335613,  0.2166439 ],
       [ 0.70069474,  0.29930523],
       [ 0.65866327,  0.34133676],
       [ 0.64828628,  0.35171372],
       [ 0.64368278,  0.35631716],
       [ 0.65480113,  0.3451989 ]], dtype=float32)