## Tensor Flow basic : Lab 4
### Multivariable linear regression

Instead of single function $H(x) = wx+b$, we can have multi-variables $w_1 x_1 + w_2 x_2 + ... + w_n x_n +b$. For implementation, we can write (like using matrix multiplication)
\begin{align}
(x_1 \,\,\, x_2 \,\,\, x_3) \cdot (w_1 \,\,\, w_2 \,\,\, w_3)^T = (x_1 w_1 + x_2 w_2 + x_3 w_3) \Rightarrow H(X) = X W 
\end{align}
For multiple case
\begin{align}
\begin{pmatrix}
x_{11} & x_{12} \\
x_{21} & x_{22}
\end{pmatrix}
\cdot
\begin{pmatrix}
w_1\\
w_2
\end{pmatrix}
=
\begin{pmatrix}
x_{11} w_1 + x_{12} w_2 \\
x_{21} w_1 + x_{22} w_2
\end{pmatrix}
\end{align}

Again this is $H(X) = X W$ form. Note that this expression should satisfy $[n,m] \cdot [m,l] = [n,l]$ 

In [10]:
import tensorflow as tf

In [11]:
x1_data = [73., 93., 89., 96., 73.]
x2_data = [80., 88., 91., 98., 66.]
x3_data = [75., 93., 90., 100., 70.]
y_data = [152., 185., 180., 196., 142.]

# Placeholders for a tensor what will be always fed

x1 = tf.placeholder(tf.float32)
x2 = tf.placeholder(tf.float32)
x3 = tf.placeholder(tf.float32)

Y = tf.placeholder(tf.float32)

w1 = tf.Variable(tf.random_normal([1]), name = 'weight1')
w2 = tf.Variable(tf.random_normal([1]), name = 'weight2')
w3 = tf.Variable(tf.random_normal([1]), name = 'weight3')
b = tf.Variable(tf.random_normal([1]), name = 'bias')

hypo = x1*w1 + x2*w2 + x3*w3 + b

# Define cost function usual way
cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={x1:x1_data,x2:x2_data,x3:x3_data,Y:y_data})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 3373.59 
Prediction:
 [ 104.33953857  119.79135132  120.9046936   131.21409607   90.4752121 ]
step: 100 cost: 9.58387 
Prediction:
 [ 155.67913818  181.72425842  181.8105011   197.55088806  137.76094055]
step: 200 cost: 9.09063 
Prediction:
 [ 155.56242371  181.80444336  181.77494812  197.52372742  137.86738586]
step: 300 cost: 8.62332 
Prediction:
 [ 155.44882202  181.88244629  181.74032593  197.4972229   137.97102356]
step: 400 cost: 8.18063 
Prediction:
 [ 155.33828735  181.95838928  181.70663452  197.47140503  138.07189941]
step: 500 cost: 7.76132 
Prediction:
 [ 155.23069763  182.03224182  181.67384338  197.44621277  138.17007446]
step: 600 cost: 7.36409 
Prediction:
 [ 155.1260376   182.10417175  181.64196777  197.42170715  138.26567078]
step: 700 cost: 6.98779 
Prediction:
 [ 155.02416992  182.17414856  181.6109314   197.39781189  138.35871887]
step: 800 cost: 6.63136 
Prediction:
 [ 154.92506409  182.24224854  181.58076477  197.37454224  138.4493103 ]
step: 900 co

Now get input as matrix form

In [12]:
%clear -f
import tensorflow as tf

[H[2J

In [18]:
x_data = [[73.,80.,75.],[93.,88.,93.],[89.,91.,90.],[96.,98.,100.],[73.,66.,70.]] # Each [] is instance
y_data = [[152.],[185.],[180.],[196.],[142.]] #Each [] corresponds to x_data

# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) # None give general n rows
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b #XW+b form with matrix multiplication


cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(2001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_data,Y:y_data})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 57407.0 
Prediction:
 [[-51.79951859]
 [-77.09215546]
 [-68.389534  ]
 [-76.0223999 ]
 [-60.76807022]]
step: 100 cost: 37.6019 
Prediction:
 [[ 160.48045349]
 [ 178.50872803]
 [ 183.22105408]
 [ 198.00256348]
 [ 134.28187561]]
step: 200 cost: 35.6248 
Prediction:
 [[ 160.24520874]
 [ 178.66999817]
 [ 183.14894104]
 [ 197.95069885]
 [ 134.49328613]]
step: 300 cost: 33.752 
Prediction:
 [[ 160.01622009]
 [ 178.82695007]
 [ 183.07873535]
 [ 197.90016174]
 [ 134.69900513]]
step: 400 cost: 31.978 
Prediction:
 [[ 159.79336548]
 [ 178.97970581]
 [ 183.01043701]
 [ 197.85099792]
 [ 134.89926147]]
step: 500 cost: 30.2976 
Prediction:
 [[ 159.57646179]
 [ 179.12835693]
 [ 182.94392395]
 [ 197.80314636]
 [ 135.09413147]]
step: 600 cost: 28.7058 
Prediction:
 [[ 159.36535645]
 [ 179.27305603]
 [ 182.87919617]
 [ 197.75660706]
 [ 135.28379822]]
step: 700 cost: 27.1981 
Prediction:
 [[ 159.15989685]
 [ 179.41387939]
 [ 182.81620789]
 [ 197.71131897]
 [ 135.46838379]]
step: 800 cost: 2

Now consider how to load the data from file 

In [19]:
%clear -f
import tensorflow as tf
import numpy as np

[H[2J

In [25]:
xy = np.loadtxt('../data/data-01-test-score.csv',delimiter=',',dtype=np.float32)
x_data = xy[:,0:-1]
y_data = xy[:,[-1]] #[-1] makes vector form

# Print data set
print(x_data.shape,x_data,len(x_data))
print(y_data.shape,y_data)

(6, 3) [[  73.   80.   75.]
 [  93.   88.   93.]
 [  89.   91.   90.]
 [  96.   98.  100.]
 [  73.   66.   70.]
 [  53.   46.   55.]] 6
(6, 1) [[ 152.]
 [ 185.]
 [ 180.]
 [ 196.]
 [ 142.]
 [ 101.]]


In [58]:
# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) # None give general n rows
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b #XW+b form with matrix multiplication


cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for step in range(5001):
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_data,Y:y_data})
    if step % 1000 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

step: 0 cost: 35911.2 
Prediction:
 [[-32.64051437]
 [-27.05799866]
 [-33.05258942]
 [-32.98287201]
 [-20.38162994]
 [ -9.51700115]]
step: 1000 cost: 15.1309 
Prediction:
 [[ 147.01899719]
 [ 185.84634399]
 [ 178.32501221]
 [ 196.76850891]
 [ 141.61300659]
 [ 108.85575867]]
step: 2000 cost: 9.88694 
Prediction:
 [[ 148.34786987]
 [ 185.22676086]
 [ 178.88638306]
 [ 197.02812195]
 [ 140.87832642]
 [ 107.50975037]]
step: 3000 cost: 7.04906 
Prediction:
 [[ 149.3039093 ]
 [ 184.78862   ]
 [ 179.29640198]
 [ 197.19044495]
 [ 140.38310242]
 [ 106.5185318 ]]
step: 4000 cost: 5.45917 
Prediction:
 [[ 149.99243164]
 [ 184.4803772 ]
 [ 179.59762573]
 [ 197.28387451]
 [ 140.0584259 ]
 [ 105.78275299]]
step: 5000 cost: 4.52074 
Prediction:
 [[ 150.48899841]
 [ 184.26506042]
 [ 179.8205719 ]
 [ 197.32875061]
 [ 139.85491943]
 [ 105.2310791 ]]


In [39]:
# Ask my score

print("Your score will be",sess.run(hypo,feed_dict={X:[[100,70,101]]}))
print("Other scores will be",sess.run(hypo,feed_dict={X:[[60,70,110],[90,100,80]]}))

Your score will be [[ 182.27368164]]
Other scores will be [[ 187.31791687]
 [ 174.16117859]]


We can generate queue for large/multiple files

In [60]:
%clear -f
import tensorflow as nf
import numpy as np

filename_queue = tf.train.string_input_producer(
['../data/data-01-test-score.csv'],shuffle=False, name='filename_queue')
#We can add multipltfile in above ['file1','file2',...]

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Default value, in case of empty cols. Also specifies the type of the 
# decorded result
record_defaults = [[0.],[0.],[0.],[0.]]
xy = tf.decode_csv(value,record_defaults=record_defaults)

# Collect batches of csv in
train_x_batch, train_y_batch = \
  tf.train.batch([xy[0:-1],xy[-1:]],batch_size=10)
    
# Placeholders
X = tf.placeholder(tf.float32,shape=[None,3]) 
Y = tf.placeholder(tf.float32,shape=[None,1])

W = tf.Variable(tf.random_normal([3,1]),name='weight')
b = tf.Variable(tf.random_normal([1]),name='bias')

# Hypothesis
hypo = tf.matmul(X,W)+b 

cost = tf.reduce_mean(tf.square(hypo - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())


# Start populating the filename queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run([cost,hypo,train],feed_dict={X:x_batch,Y:y_batch})
    if step % 100 == 0:
        print("step:",step,"cost:",cost_val,"\nPrediction:\n",hy_val)

coord.request_stop()
coord.join(threads)



    

[H[2Jstep: 0 cost: 118680.0 
Prediction:
 [[-158.61079407]
 [-187.89002991]
 [-186.11689758]
 [-206.62063599]
 [-139.61180115]
 [-109.13677216]
 [-158.61079407]
 [-187.89002991]
 [-186.11689758]
 [-206.62063599]]
step: 100 cost: 2.46122 
Prediction:
 [[ 144.84068298]
 [ 100.55780792]
 [ 151.64483643]
 [ 185.05117798]
 [ 181.3276062 ]
 [ 193.53787231]
 [ 144.84068298]
 [ 100.55780792]
 [ 151.64483643]
 [ 185.05117798]]
step: 200 cost: 3.18529 
Prediction:
 [[ 181.2359314 ]
 [ 193.44297791]
 [ 144.74012756]
 [ 100.48659515]
 [ 151.58354187]
 [ 184.93772888]
 [ 181.2359314 ]
 [ 193.44297791]
 [ 144.74012756]
 [ 100.48659515]]
step: 300 cost: 2.40466 
Prediction:
 [[ 151.59950256]
 [ 184.91847229]
 [ 181.23635864]
 [ 193.44851685]
 [ 144.71165466]
 [ 100.46885681]
 [ 151.59950256]
 [ 184.91847229]
 [ 181.23635864]
 [ 193.44851685]]
step: 400 cost: 2.36735 
Prediction:
 [[ 144.76823425]
 [ 100.51408386]
 [ 151.70678711]
 [ 185.01034546]
 [ 181.34559631]
 [ 193.57261658]
 [ 144.76823425]
 