## Tensor Flow basic : Lab 4
### Multivariable linear regression

Instead of single function $H(x) = wx+b$, we can have multi-variables $w_1 x_1 + w_2 x_2 + ... + w_n x_n +b$. For implementation, we can write (like using matrix multiplication)
\begin{align}
(x_1 \,\,\, x_2 \,\,\, x_3) \cdot (w_1 \,\,\, w_2 \,\,\, w_3)^T = (x_1 w_1 + x_2 w_2 + x_3 w_3) \Rightarrow H(X) = X W 
\end{align}
For multiple case
\begin{align}
\begin{pmatrix}
x_{11} & x_{12} \\
x_{21} & x_{22}
\end{pmatrix}
\cdot
\begin{pmatrix}
w_1\\
w_2
\end{pmatrix}
=
\begin{pmatrix}
x_{11} w_1 + x_{12} w_2 \\
x_{21} w_1 + x_{22} w_2
\end{pmatrix}
\end{align}

Again this is $H(X) = X W$ form. Note that this expression should satisfy $[n,m] \cdot [m,l] = [n,l]$ 

In [10]:
import tensorflow as tf

In [11]:
x1_data = [73., 93., 89., 96., 73.]
x2_data = [80., 88., 91., 98., 66.]
x3_data = [75., 93., 90., 100., 70.]
y_data = [152., 185., 180., 196., 142.]

# Placeholders for a tensor what will be always fed

x1 = tf.placeholder(tf.float32)
x2 = tf.placeholder(tf.float32)
x3 = tf.placeholder(tf.float32)

Y = tf.placeholder(tf.float32)

w1 = tf.Variable(tf.random_normal([1]), name = 'weight1')
w2 = tf.Variable(tf.random_normal([1]), name = 'weight2')
w3 = tf.Variable(tf.random_normal([1]), name = 'weight3')
b = tf.Variable(tf.random_normal([1]), name = 'bias')

hypo = x1*w1 + x2*w2 + x3*w3 + b

# Define cost function usual way
cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={x1:x1_data,x2:x2_data,x3:x3_data,Y:y_data})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 3373.59 
Prediction:
 [ 104.33953857  119.79135132  120.9046936   131.21409607   90.4752121 ]
step: 100 cost: 9.58387 
Prediction:
 [ 155.67913818  181.72425842  181.8105011   197.55088806  137.76094055]
step: 200 cost: 9.09063 
Prediction:
 [ 155.56242371  181.80444336  181.77494812  197.52372742  137.86738586]
step: 300 cost: 8.62332 
Prediction:
 [ 155.44882202  181.88244629  181.74032593  197.4972229   137.97102356]
step: 400 cost: 8.18063 
Prediction:
 [ 155.33828735  181.95838928  181.70663452  197.47140503  138.07189941]
step: 500 cost: 7.76132 
Prediction:
 [ 155.23069763  182.03224182  181.67384338  197.44621277  138.17007446]
step: 600 cost: 7.36409 
Prediction:
 [ 155.1260376   182.10417175  181.64196777  197.42170715  138.26567078]
step: 700 cost: 6.98779 
Prediction:
 [ 155.02416992  182.17414856  181.6109314   197.39781189  138.35871887]
step: 800 cost: 6.63136 
Prediction:
 [ 154.92506409  182.24224854  181.58076477  197.37454224  138.4493103 ]
step: 900 co

Now get input as matrix form

In [1]:
%reset -f
import tensorflow as tf

In [2]:
x_data = [[73.,80.,75.],[93.,88.,93.],[89.,91.,90.],[96.,98.,100.],[73.,66.,70.]] # Each [] is instance
y_data = [[152.],[185.],[180.],[196.],[142.]] #Each [] corresponds to x_data

# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) # None give general n rows
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b #XW+b form with matrix multiplication


cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_data,Y:y_data})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 4786.4 
Prediction:
 [[  81.2086792 ]
 [ 117.38716888]
 [ 105.37864685]
 [ 115.69789886]
 [  93.7061615 ]]
step: 100 cost: 36.9226 
Prediction:
 [[ 142.58474731]
 [ 190.70448303]
 [ 177.8547821 ]
 [ 194.59439087]
 [ 149.53970337]]
step: 200 cost: 34.9871 
Prediction:
 [[ 142.81799316]
 [ 190.54473877]
 [ 177.92642212]
 [ 194.64492798]
 [ 149.3311615 ]]
step: 300 cost: 33.1536 
Prediction:
 [[ 143.04499817]
 [ 190.38923645]
 [ 177.99613953]
 [ 194.69407654]
 [ 149.12820435]]
step: 400 cost: 31.4169 
Prediction:
 [[ 143.26594543]
 [ 190.23788452]
 [ 178.0639801 ]
 [ 194.74186707]
 [ 148.93069458]]
step: 500 cost: 29.7719 
Prediction:
 [[ 143.48098755]
 [ 190.09057617]
 [ 178.1300354 ]
 [ 194.7883606 ]
 [ 148.73846436]]
step: 600 cost: 28.2134 
Prediction:
 [[ 143.69035339]
 [ 189.94721985]
 [ 178.1943512 ]
 [ 194.83360291]
 [ 148.55142212]]
step: 700 cost: 26.7374 
Prediction:
 [[ 143.89407349]
 [ 189.80767822]
 [ 178.25694275]
 [ 194.87757874]
 [ 148.36936951]]
step: 800 c

Now consider how to load the data from file 

In [5]:
%reset -f
import tensorflow as tf
import numpy as np

In [6]:
xy = np.loadtxt('../data/data-01-test-score.csv',delimiter=',',dtype=np.float32)
x_data = xy[:,0:-1]
y_data = xy[:,[-1]] #[-1] makes vector form

# Print data set
print(x_data.shape,x_data,len(x_data))
print(y_data.shape,y_data)

(6, 3) [[  73.   80.   75.]
 [  93.   88.   93.]
 [  89.   91.   90.]
 [  96.   98.  100.]
 [  73.   66.   70.]
 [  53.   46.   55.]] 6
(6, 1) [[ 152.]
 [ 185.]
 [ 180.]
 [ 196.]
 [ 142.]
 [ 101.]]


In [7]:
# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) # None give general n rows
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b #XW+b form with matrix multiplication


cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(5001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_data,Y:y_data})
    if step % 1000 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 51.5387 
Prediction:
 [[ 163.62705994]
 [ 188.26223755]
 [ 189.99159241]
 [ 203.5710144 ]
 [ 144.4967041 ]
 [ 100.87438965]]
step: 1000 cost: 7.46297 
Prediction:
 [[ 156.36949158]
 [ 182.03544617]
 [ 182.54788208]
 [ 195.72898865]
 [ 140.15042114]
 [  98.3711853 ]]
step: 2000 cost: 4.28506 
Prediction:
 [[ 155.30825806]
 [ 182.55172729]
 [ 182.11721802]
 [ 195.45347595]
 [ 140.83016968]
 [  99.38075256]]
step: 3000 cost: 2.6482 
Prediction:
 [[ 154.54663086]
 [ 182.9214325 ]
 [ 181.80751038]
 [ 195.25880432]
 [ 141.31396484]
 [ 100.1076355 ]]
step: 4000 cost: 1.80419 
Prediction:
 [[ 153.99989319]
 [ 183.18600464]
 [ 181.58457947]
 [ 195.12193298]
 [ 141.65736389]
 [ 100.63166809]]
step: 5000 cost: 1.36808 
Prediction:
 [[ 153.60728455]
 [ 183.37527466]
 [ 181.42393494]
 [ 195.026474  ]
 [ 141.90022278]
 [ 101.01010132]]


In [8]:
# Ask my score

print("Your score will be",sess.run(hypo,feed_dict={X:[[100,70,101]]}))
print("Other scores will be",sess.run(hypo,feed_dict={X:[[60,70,110],[90,100,80]]}))

Your score will be [[ 175.03944397]]
Other scores will be [[ 123.0182724 ]
 [ 192.18850708]]


We can generate queue for large/multiple files

In [10]:
%reset -f
import tensorflow as tf
import numpy as np

filename_queue = tf.train.string_input_producer(
['../data/data-01-test-score.csv'],shuffle=False, name='filename_queue')
#We can add multipltfile in above ['file1','file2',...]

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Default value, in case of empty cols. Also specifies the type of the 
# decorded result
record_defaults = [[0.],[0.],[0.],[0.]]
xy = tf.decode_csv(value,record_defaults=record_defaults)

# Collect batches of csv in
train_x_batch, train_y_batch = \
  tf.train.batch([xy[0:-1],xy[-1:]],batch_size=10)
    
# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) 
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b 

cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())


# Start populating the filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_batch,Y:y_batch})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

coord.request_stop()
coord.join(threads)



    

step: 0 cost: 14717.4 
Prediction:
 [[ 42.69482422]
 [ 52.86798859]
 [ 51.45066071]
 [ 54.50455475]
 [ 41.89380264]
 [ 28.56013107]
 [ 42.69482422]
 [ 52.86798859]
 [ 51.45066071]
 [ 54.50455475]]
step: 100 cost: 0.626312 
Prediction:
 [[ 142.04830933]
 [ 102.38562012]
 [ 151.93344116]
 [ 184.17515564]
 [ 180.82421875]
 [ 195.39202881]
 [ 142.04830933]
 [ 102.38562012]
 [ 151.93344116]
 [ 184.17515564]]
step: 200 cost: 0.658256 
Prediction:
 [[ 180.82696533]
 [ 195.39242554]
 [ 142.04164124]
 [ 102.37463379]
 [ 151.94216919]
 [ 184.16905212]
 [ 180.82696533]
 [ 195.39242554]
 [ 142.04164124]
 [ 102.37463379]]
step: 300 cost: 0.546823 
Prediction:
 [[ 151.90800476]
 [ 184.11190796]
 [ 180.7791748 ]
 [ 195.33781433]
 [ 141.99610901]
 [ 102.33516693]
 [ 151.90800476]
 [ 184.11190796]
 [ 180.7791748 ]
 [ 195.33781433]]
step: 400 cost: 0.6151 
Prediction:
 [[ 142.03155518]
 [ 102.35559082]
 [ 151.96134949]
 [ 184.16073608]
 [ 180.83540344]
 [ 195.39656067]
 [ 142.03155518]
 [ 102.35559082]
