In [1]:
### tensor flow supervised practice
### practice following along "How to Use Tensorflow for Classification (LIVE)

import tensorflow as tf
import pandas as pd # work with data as tables
import numpy as np # number matrices
import matplotlib.pyplot as plt

In [2]:
# step 1 load data

dataframe = pd.read_csv('data.csv') # dataframe
dataframe = dataframe.drop(['index', 'price', 'sq_price'], axis=1) # drop unused rows
dataframe = dataframe[0:10]
dataframe

Unnamed: 0,area,bathrooms
0,2104.0,3.0
1,1600.0,3.0
2,2400.0,3.0
3,1416.0,2.0
4,3000.0,4.0
5,1985.0,4.0
6,1534.0,3.0
7,1427.0,3.0
8,1380.0,3.0
9,1494.0,3.0


In [3]:
# step 2 - add label 

# 1 is good buy and 0 is bad buy
dataframe.loc[:, ('y1')] = [1,1,1,0,0,0,1,1,1,1] 
#y2 is opposite of y1 // a negation
dataframe.loc[:, ('y2')] = dataframe['y1'] == 0
# turn true/false values to 1 or 0
dataframe.loc[:, ('y2')] = dataframe['y2'].astype(int)
dataframe

# thinking point: what does .loc do for you here? why is making y2 important?

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,1,0
3,1416.0,2.0,0,1
4,3000.0,4.0,0,1
5,1985.0,4.0,0,1
6,1534.0,3.0,1,0
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,1,0


In [4]:
# step 3 - prepare data for tensorflow (tensors)
# tensors are generic vector/matrix
# vector - 1D list of numbers
# matrix - 2D list of list of numbers
# list of list of list is 3D, etc.

# convert features to input tensor
inputX = dataframe.loc[:, ['area', 'bathrooms']].as_matrix()
#convert labes to input tensors
inputY = dataframe.loc[:, ['y1', 'y2']].as_matrix()


In [5]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [6]:
inputY

array([[1, 0],
       [1, 0],
       [1, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0]])

In [7]:
# Step 4 - write out our hyperparameters
learning_rate = 0.000001 # how quickly to learn
training_epochs = 2000 # number of times to train? look up epoch definition
display_steps = 50 # display at each step n
n_samples = inputY.size

In [8]:
# Step 5 - Create our computation graph/neural network

# placeholders are "gateways for data into our computational graph"
x = tf.placeholder(tf.float32, [None,2]) # what EXACTLY does this mean

# create weights
# 2x2 float matrix
# variables in tf hold and update parameters as in memory buffers containing tensors
W = tf.Variable(tf.zeros([2,2]))

# add biasis. want two biasis for two input
# example b in y = mx + b - b is bias
b = tf.Variable(tf.zeros([2]))

# multiply our weights by our inputs, first calculation
# weights are how we govern how data flows in our computation graph
# multiply input by weights and add biases
y_values = tf.add(tf.matmul(x, W), b)

# softmax <=> sigmoid
# apply softmax to value we just created
# softmax is our activation function
# softmax normalizes aka converts to probability to feed to output
y = tf.nn.softmax(y_values)

# feed in matrix of placeholders
y_ = tf.placeholder(tf.float32, [None,2])

In [9]:
# Step 6 - perform training set
# create our cost function, mean squared error
# reduce sum computes the sum of elements across dimensions of a tensor
cost = tf.reduce_sum(tf.pow(y_ - y, 2))/(2*n_samples)
# Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [10]:
# initialize variables and tensorflow session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [11]:
# training loop
for i in range(training_epochs):
    sess.run(optimizer, feed_dict={x:inputX, y_: inputY})
    
    #write out logs of training
    if (i) % display_steps == 0:
        cc = sess.run(cost, feed_dict={x:inputX, y_:inputY})
        print("Training steps:", '%04d' % (i), "cost=", "{:.9f}".format(cc))
        
print("Finished")
training_cost = sess.run(cost, feed_dict={x:inputX, y_:inputY})
print("Training Cost", training_cost, "W=", sess.run(W),"b=", sess.run(b))

Training steps: 0000 cost= 0.117558062
Training steps: 0050 cost= 0.113512419
Training steps: 0100 cost= 0.113512360
Training steps: 0150 cost= 0.113512300
Training steps: 0200 cost= 0.113512233
Training steps: 0250 cost= 0.113512173
Training steps: 0300 cost= 0.113512114
Training steps: 0350 cost= 0.113512039
Training steps: 0400 cost= 0.113511994
Training steps: 0450 cost= 0.113511920
Training steps: 0500 cost= 0.113511860
Training steps: 0550 cost= 0.113511801
Training steps: 0600 cost= 0.113511726
Training steps: 0650 cost= 0.113511667
Training steps: 0700 cost= 0.113511607
Training steps: 0750 cost= 0.113511562
Training steps: 0800 cost= 0.113511488
Training steps: 0850 cost= 0.113511421
Training steps: 0900 cost= 0.113511361
Training steps: 0950 cost= 0.113511287
Training steps: 1000 cost= 0.113511227
Training steps: 1050 cost= 0.113511160
Training steps: 1100 cost= 0.113511108
Training steps: 1150 cost= 0.113511041
Training steps: 1200 cost= 0.113510989
Training steps: 1250 cost

In [12]:
sess.run(y, feed_dict={x:inputX})

array([[ 0.6835745 ,  0.31642553],
       [ 0.64240432,  0.35759568],
       [ 0.70652485,  0.29347512],
       [ 0.62677217,  0.37322792],
       [ 0.74992591,  0.25007415],
       [ 0.67410034,  0.32589966],
       [ 0.63683718,  0.36316276],
       [ 0.62773395,  0.37226608],
       [ 0.62370604,  0.37629399],
       [ 0.63344514,  0.3665548 ]], dtype=float32)

In [13]:
# improve by adding hidden layers