# Queue Runners
### Introduction to tf.train.batch
https://www.tensorflow.org/programmers_guide/reading_data

In [1]:
import tensorflow as tf

# string_input_producer
creates a FIFO queue for holding the filenames until the reader needs them.  
**shuffle=False** has options for shuffling and setting a maximum number of epochs.   
A queue runner adds the whole list of filenames to the queue once for each epoch,  
*shuffling the filenames within an epoch if* ** shuffle=True**.

Each execution of read reads a ***single line*** from the file. 


In [2]:
import numpy as np
import time
 
start = time.clock()

filename_queue = tf.train.string_input_producer(
   ['data-01-test-score.csv'], shuffle=False, name='filename_queue')

# read data by line with the file queue
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

***Default values, in case of empty columns. ***  
Also specifies the type of the decoded result.

The ***decode_csv*** op then parses the result into a list of tensors.

The ***record_defaults*** argument determines the type of the resulting tensors   
and sets the default value to use if a value is missing in the input string.


In [3]:
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)

# collect batches of csv in
# x_data = xy[:, 0:-1]
# y_data = xy[:,[-1]]  ##### error
train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

In [4]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables (like the epoch counter) in the graph.
sess.run(tf.global_variables_initializer())


# tf.train.Coordinator

This is responsible for letting all the threads know if anything has signalled a shut down. 


#  tf.train.start_queue_runners 

to populate the queue before you call run or eval to execute the read.

Otherwise read will **block(挂起)** while it waits for filenames from the queue.

https://www.tensorflow.org/programmers_guide/threading_and_queues

In [5]:
# Start input enqueue threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

In [6]:
for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run(
        [cost, hypothesis, train], 
        feed_dict={X: x_batch, Y: y_batch})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, 
                   "\nPrediction:\n", hy_val)

        
elapsed = (time.clock() - start)
print("Time used:",elapsed)


0 Cost:  1892.98 
Prediction:
 [[ 109.53697968]
 [ 133.37252808]
 [ 130.52285767]
 [ 143.58514404]
 [ 100.71166992]
 [  76.17424774]
 [ 108.7512207 ]
 [  82.46498108]
 [ 126.54227448]
 [ 120.20107269]]
10 Cost:  4.04989 
Prediction:
 [[ 152.63623047]
 [ 185.17926025]
 [ 181.565979  ]
 [ 199.17636108]
 [ 140.22294617]
 [ 105.30709076]
 [ 150.36376953]
 [ 113.31462097]
 [ 174.96234131]
 [ 165.05342102]]
20 Cost:  4.67571 
Prediction:
 [[ 152.92849731]
 [ 185.52770996]
 [ 181.91078186]
 [ 199.55160522]
 [ 140.48822021]
 [ 105.50189209]
 [ 150.64501953]
 [ 113.52372742]
 [ 175.28697205]
 [ 165.35321045]]
30 Cost:  4.67868 
Prediction:
 [[ 152.93190002]
 [ 185.52891541]
 [ 181.91346741]
 [ 199.554245  ]
 [ 140.48864746]
 [ 105.50141907]
 [ 150.64735413]
 [ 113.52606201]
 [ 175.2870636 ]
 [ 165.35238647]]
40 Cost:  4.67684 
Prediction:
 [[ 152.93333435]
 [ 185.52775574]
 [ 181.91384888]
 [ 199.55436707]
 [ 140.48730469]
 [ 105.49963379]
 [ 150.64782715]
 [ 113.52700043]
 [ 175.28497314]
 [ 1

# Wait for threads to finish.

In [7]:
coord.request_stop()
coord.join(threads)

# Compare the result for using *$train.batch$* or not

## *but I find the training result is not stable*

## without tf.train.batch
**2000 Cost: 17.8371 **

Prediction:
 [[ 156.09724426]
 [ 183.48483276]
 [ 182.92565918]
 [ 198.81103516]
 [ 139.30680847]
 [ 101.70875549]
 [ 149.45846558]
 [ 111.7910614 ]
 [ 170.08323669]
 [ 156.3453064 ]
 [ 143.18544006]
 [ 139.24273682]
 [ 189.29745483]
 [ 157.68014526]
 [ 149.4931488 ]
 [ 186.20089722]
 [ 149.38374329]
 [ 180.01205444]
 [ 181.16925049]
 [ 162.50370789]
 [ 174.76806641]
 [ 172.03723145]
 [ 167.09498596]
 [ 158.15000916]
 [ 190.96975708]]
 
** your score will be [[ 173.36975098]]
other scores will be [[ 154.59194946] [ 186.58578491]]**    
**Time used: 10.840138999999999**

## with tf.train.batch
**2000 Cost:  7.65403 **   
Prediction:
 [[ 152.10678101]
 [ 186.7678833 ]
 [ 181.92788696]
 [ 198.92625427]
 [ 142.76208496]
 [ 106.92510223]
 [ 148.96435547]
 [ 110.40462494]
 [ 176.42631531]
 [ 165.51098633]]
 
**your score will be [[ 193.04504395]]
other scores will be [[ 161.95559692] [ 175.77114868]]**   
**Time used: 9.697963999999999**

## with tf.shuffle_batch
**2000 Cost:  6.70459 **
Prediction:
 [[ 184.94348145]
 [ 151.80395508]
 [ 151.68878174]
 [ 177.66270447]
 [ 149.9175415 ]
 [ 183.26254272]
 [ 184.94348145]
 [ 147.39813232]
 [ 149.9175415 ]
 [ 151.68878174]
 [ 168.65119934]
 [ 189.71528625]
 [ 199.2883606 ]
 [ 184.38189697]
 [ 116.21501923]
 [ 184.38189697]
 [ 149.9175415 ]
 [ 151.68878174]
 [ 184.38189697]
 [ 144.53039551]
 [ 147.39813232]
 [ 177.66270447]
 [ 153.13598633]
 [ 190.19641113]
 [ 106.59677887]
 [ 151.68878174]
 [ 168.46600342]
 [ 156.65238953]
 [ 175.60734558]
 [ 144.49568176]]    
**your score will be [[ 191.89680481]]**  
**other scores will be [[ 182.5514679 ] [ 169.17152405]]**  
**Time used: 24.799417**

# Whole coding

In [8]:
import tensorflow as tf
import numpy as np
import time
 
start = time.clock()

filename_queue = tf.train.string_input_producer(
   ['data-01-test-score.csv'], shuffle=False, name='filename_queue')

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)

# collect batches of csv in
train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())

# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run(
       [cost, hypothesis, train], 
       feed_dict={X: x_batch, Y: y_batch})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, 
                   "\nPrediction:\n", hy_val)

# Ask my Score        
print("your score will be",
      sess.run(hypothesis,feed_dict={X:[[100,70,101]]}))
print("other scores will be",
     sess.run(hypothesis,feed_dict={X:[[60,70,100],[90,100,80]]}))
        
elapsed = (time.clock() - start)
print("Time used:",elapsed)


coord.request_stop()
coord.join(threads)


0 Cost:  11992.5 
Prediction:
 [[ 262.60955811]
 [ 307.27093506]
 [ 306.93695068]
 [ 337.09307861]
 [ 229.7208252 ]
 [ 172.29496765]
 [ 258.52471924]
 [ 200.66975403]
 [ 288.44003296]
 [ 272.34579468]]
10 Cost:  35.8659 
Prediction:
 [[ 157.71305847]
 [ 181.19764709]
 [ 182.7141571 ]
 [ 201.7973175 ]
 [ 133.57640076]
 [ 101.39997864]
 [ 157.23887634]
 [ 125.56539917]
 [ 170.60671997]
 [ 163.18663025]]
20 Cost:  34.252 
Prediction:
 [[ 156.99621582]
 [ 180.35877991]
 [ 181.87620544]
 [ 200.88111877]
 [ 132.94522095]
 [ 100.93172455]
 [ 156.54135132]
 [ 125.03121185]
 [ 169.82362366]
 [ 162.45535278]]
30 Cost:  34.1 
Prediction:
 [[ 156.98239136]
 [ 180.36495972]
 [ 181.87084961]
 [ 200.87173462]
 [ 132.95852661]
 [ 100.93870544]
 [ 156.52261353]
 [ 125.00029755]
 [ 169.83032227]
 [ 162.45574951]]
40 Cost:  33.9549 
Prediction:
 [[ 156.9733429 ]
 [ 180.37683105]
 [ 181.87112427]
 [ 200.8684845 ]
 [ 132.97616577]
 [ 100.94885254]
 [ 156.5085144 ]
 [ 124.97284698]
 [ 169.84234619]
 [ 162.4

# shuffle_batch
***min_after_dequeue*** defines how big a buffer we will randomly samplefrom    
-- ***bigger means better shuffling but slower start up and morememory used.***   
**capacity** must be larger than **min_after_dequeue**   
and the amount larger determines the maximum we will prefetch.  
##### Recommendation:
***min_after_dequeue + (num_threads + a small safety margin) * batch_size***

In [9]:
import tensorflow as tf
import numpy as np
import time
 
start = time.clock()

filename_queue = tf.train.string_input_producer(
   ['data-01-test-score.csv'], shuffle=False, name='filename_queue')

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)


In [10]:
# collect batches of csv in
# train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)

min_after_dequeue = 10000
batch_size = 30
min_after_dequeue = 100
capacity = min_after_dequeue + 3 * batch_size
train_x_batch, train_y_batch = tf.train.shuffle_batch(
   [xy[0:-1], xy[-1:]], batch_size=batch_size, capacity=capacity,
   min_after_dequeue=min_after_dequeue)

In [11]:
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')

# Hypothesis
hypothesis = tf.matmul(X, W) + b

# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))

# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())

# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

for step in range(2001):
    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
    cost_val, hy_val, _ = sess.run(
       [cost, hypothesis, train], 
       feed_dict={X: x_batch, Y: y_batch})
    if step % 10 == 0:
        print(step, "Cost: ", cost_val, 
                   "\nPrediction:\n", hy_val)

# Ask my Score        
print("your score will be",
      sess.run(hypothesis,feed_dict={X:[[100,70,101]]}))
print("other scores will be",
     sess.run(hypothesis,feed_dict={X:[[60,70,100],[90,100,80]]}))
        
elapsed = (time.clock() - start)
print("Time used:",elapsed)


coord.request_stop()
coord.join(threads)

0 Cost:  42196.1 
Prediction:
 [[-27.72947884]
 [-34.30932236]
 [-38.2382164 ]
 [-33.52061081]
 [-34.30932236]
 [-34.07260132]
 [-24.22107124]
 [-45.36829758]
 [-33.89299774]
 [-49.82388687]
 [-30.22100258]
 [-59.40832901]
 [-38.2382164 ]
 [-49.95827866]
 [-57.03979874]
 [-16.59227943]
 [-38.02487183]
 [-53.23507309]
 [-45.36829758]
 [-24.22107124]
 [-57.03979874]
 [-34.07260132]
 [-33.84267044]
 [-49.95827866]
 [-30.22100258]
 [-33.84267044]
 [-39.66466141]
 [-33.89299774]
 [-49.82388687]
 [-59.40832901]]
10 Cost:  82.4467 
Prediction:
 [[ 171.74865723]
 [ 143.28683472]
 [ 187.32063293]
 [ 171.01728821]
 [ 150.15858459]
 [ 188.60983276]
 [ 143.28683472]
 [ 171.62289429]
 [ 150.15858459]
 [ 171.74865723]
 [ 187.32063293]
 [  90.76651764]
 [ 171.01728821]
 [ 130.27871704]
 [ 130.27871704]
 [ 171.74865723]
 [ 153.67047119]
 [ 130.68635559]
 [ 166.20631409]
 [ 200.30015564]
 [ 171.74865723]
 [ 188.28027344]
 [ 156.99423218]
 [ 130.27871704]
 [ 188.28027344]
 [ 153.67047119]
 [ 188.2802734