In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [12]:
#loading data(reading as a fataframe object)
df = pd.read_csv('/root/Desktop/data.csv')
df.head()

Unnamed: 0,index,area,bathrooms,price,sq_price
0,0,2104.0,3.0,399900.0,190.06654
1,1,1600.0,3.0,329900.0,206.1875
2,2,2400.0,3.0,369000.0,153.75
3,3,1416.0,2.0,232000.0,163.841808
4,4,3000.0,4.0,539900.0,179.966667


In [13]:
#removing the features we don't care about
df = df.drop(['index', 'price', 'sq_price'], axis=1)
#using only the first 10 rows
df = df[0:10]
print df

     area  bathrooms
0  2104.0        3.0
1  1600.0        3.0
2  2400.0        3.0
3  1416.0        2.0
4  3000.0        4.0
5  1985.0        4.0
6  1534.0        3.0
7  1427.0        3.0
8  1380.0        3.0
9  1494.0        3.0


In [15]:
#Now, we add labels to the dataset
#1 is good buy and 0 is bad buy(introducing labels by
#ourself)
df.loc[:, ('y1')] = [1,1,0,1,0,0,1,1,1,0]
#y2 is a negation of y1
df.loc[:, ('y2')] = df['y1'] == 0
#turn TRUE/FALSE  values to 1s and os for negation
df.loc[:, ('y2')] = df['y2'].astype(int)
df

Unnamed: 0,area,bathrooms,y1,y2
0,2104.0,3.0,1,0
1,1600.0,3.0,1,0
2,2400.0,3.0,0,1
3,1416.0,2.0,1,0
4,3000.0,4.0,0,1
5,1985.0,4.0,0,1
6,1534.0,3.0,1,0
7,1427.0,3.0,1,0
8,1380.0,3.0,1,0
9,1494.0,3.0,0,1


In [17]:
#preparing the data for tensorflow(tensors)
#tensors are generic versions of vectors and matrices
#a vector is a 1D tensor
#converting features to input tensors
inputX = df.loc[:, ['area','bathrooms']].as_matrix()
#convert labels to tensors
inputY = df.loc[:, ['y1','y2']].as_matrix()

In [18]:
inputX

array([[  2.10400000e+03,   3.00000000e+00],
       [  1.60000000e+03,   3.00000000e+00],
       [  2.40000000e+03,   3.00000000e+00],
       [  1.41600000e+03,   2.00000000e+00],
       [  3.00000000e+03,   4.00000000e+00],
       [  1.98500000e+03,   4.00000000e+00],
       [  1.53400000e+03,   3.00000000e+00],
       [  1.42700000e+03,   3.00000000e+00],
       [  1.38000000e+03,   3.00000000e+00],
       [  1.49400000e+03,   3.00000000e+00]])

In [19]:
inputY

array([[1, 0],
       [1, 0],
       [0, 1],
       [1, 0],
       [0, 1],
       [0, 1],
       [1, 0],
       [1, 0],
       [1, 0],
       [0, 1]])

In [20]:
#Write out the hyperparameters
#learning rate determines how fast the model converges
learning_rate = 0.00001
#number of iterations for which the model will train
training_epochs = 2000
display_step = 50
n_samples = inputY.size

In [26]:
#Creation of the neural network using tensorflow
#for feature input tensors, none means any number of 
#examples and 2 implies that 2 features are there
#placeholders are gateways for data into the computation
#graph
x = tf.placeholder(tf.float32,[None,2])
#create weights
#2*2 float matrix, that keeps on updating while the 
#training process
#variables in tf hold and update parameters in memory
w = tf.Variable(tf.zeros([2,2]))
#add biases, they help in training the model better
b = tf.Variable(tf.zeros([2]))
#multiply the weights by inputs
#multiply the input by weights and add biases
y_values = tf.add(tf.matmul(x, w),b)
#apply softmax to the value which we just created, it is
#the activation function or normalises the values
y = tf.nn.softmax(y_values)
#feed in a matrix of labels
y2 = tf.placeholder(tf.float32,[None,2])

In [27]:
#perform the training work
#create the cost function of mean squared error
#reduce sum computes the sum of elements across the 
#dimensions of tensor
cost = tf.reduce_sum(tf.pow(y2-y,2))/(2*n_samples)
#Gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

In [32]:
#initialize variables and tensorflow session
init = tf.initialize_all_variables()
session = tf.Session()
session.run(init)

Instructions for updating:
Use `tf.global_variables_initializer` instead.


In [34]:
#training in loops
for i in range(training_epochs):
    session.run(optimizer, feed_dict={x:inputX, y2:inputY})
    #writing out logs of training
    if(i) % display_step == 0:
        cc = session.run(cost,feed_dict={x:inputX, y2 : inputY})
        print "Training Step:",'%04d'%(i), "cost=","{:.9f}".format(cc)
#training_cost = session.run(cost, feed_dict={x:inputX, y2:inputY})
print "Training Cost=" ,cc,'\n'
print "W=",session.run(w),'\n'
print "b=",session.run(b)                                                                  

Training Step: 0000 cost= 0.185581103
Training Step: 0050 cost= 0.131538585
Training Step: 0100 cost= 0.185581207
Training Step: 0150 cost= 0.131537542
Training Step: 0200 cost= 0.185581341
Training Step: 0250 cost= 0.131536424
Training Step: 0300 cost= 0.185581446
Training Step: 0350 cost= 0.131535366
Training Step: 0400 cost= 0.185581565
Training Step: 0450 cost= 0.131534293
Training Step: 0500 cost= 0.185581684
Training Step: 0550 cost= 0.131533265
Training Step: 0600 cost= 0.185581774
Training Step: 0650 cost= 0.131532177
Training Step: 0700 cost= 0.185581893
Training Step: 0750 cost= 0.131531075
Training Step: 0800 cost= 0.185582012
Training Step: 0850 cost= 0.131529972
Training Step: 0900 cost= 0.185582116
Training Step: 0950 cost= 0.131528929
Training Step: 1000 cost= 0.185582235
Training Step: 1050 cost= 0.131527811
Training Step: 1100 cost= 0.185582355
Training Step: 1150 cost= 0.131526783
Training Step: 1200 cost= 0.185582474
Training Step: 1250 cost= 0.131525710
Training Ste

In [35]:
session.run(y,feed_dict = {x:inputX})

array([[ 0.21877393,  0.78122598],
       [ 0.27544969,  0.72455031],
       [ 0.18963996,  0.81036001],
       [ 0.29809868,  0.70190132],
       [ 0.13998245,  0.8600176 ],
       [ 0.23150511,  0.76849484],
       [ 0.28351003,  0.71648997],
       [ 0.29687563,  0.7031244 ],
       [ 0.3028602 ,  0.6971398 ],
       [ 0.28846386,  0.71153617]], dtype=float32)

In [None]:
#model can be improved by adding more hidden layers