## Text Line Reader

In [1]:
import tensorflow as tf

  return f(*args, **kwds)


**string_input_producer** returns a queue which is read by **TextLineReader** line by line for every run

In [2]:
filename_q = tf.train.string_input_producer(["50_Startups.csv"])

In [3]:
reader = tf.TextLineReader(skip_header_lines=1)

In [4]:
key, value = reader.read(filename_q)

In [5]:
sess = tf.InteractiveSession()

Since the queues run asynchrnously, we need to instantiate queue runners and threads

In [6]:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)

In [7]:
k,v = sess.run([key,value])
k,v

(b'50_Startups.csv:2', b'165349.2,136897.8,471784.1,New York,192261.83')

In [8]:
for i in range(51):
    k,v = sess.run([key,value])
    print(k,v)

b'50_Startups.csv:3' b'162597.7,151377.59,443898.53,California,191792.06'
b'50_Startups.csv:4' b'153441.51,101145.55,407934.54,Florida,191050.39'
b'50_Startups.csv:5' b'144372.41,118671.85,383199.62,New York,182901.99'
b'50_Startups.csv:6' b'142107.34,91391.77,366168.42,Florida,166187.94'
b'50_Startups.csv:7' b'131876.9,99814.71,362861.36,New York,156991.12'
b'50_Startups.csv:8' b'134615.46,147198.87,127716.82,California,156122.51'
b'50_Startups.csv:9' b'130298.13,145530.06,323876.68,Florida,155752.6'
b'50_Startups.csv:10' b'120542.52,148718.95,311613.29,New York,152211.77'
b'50_Startups.csv:11' b'123334.88,108679.17,304981.62,California,149759.96'
b'50_Startups.csv:12' b'101913.08,110594.11,229160.95,Florida,146121.95'
b'50_Startups.csv:13' b'100671.96,91790.61,249744.55,California,144259.4'
b'50_Startups.csv:14' b'93863.75,127320.38,249839.44,Florida,141585.52'
b'50_Startups.csv:15' b'91992.39,135495.07,252664.93,California,134307.35'
b'50_Startups.csv:16' b'119943.24,156547.42,25651

In [9]:
coord.request_stop()
coord.join(threads)

**decoding CSV**

In [22]:
sess.run(tf.decode_csv(v,record_defaults=[[1.], [1.], [1.], ["na"], [1.]]))

[162597.7, 151377.6, 443898.53, b'California', 191792.06]

**Example**

Getting features and labels from multiple CSV file

In [31]:
filename_q = tf.train.string_input_producer(["50_Startups.csv","50_Startups_2.csv"])
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(filename_q)

col1, col2, col3, col4, dep_var = tf.decode_csv(value, record_defaults=[[1.], [1.], [1.], ["na"], [1.]])
features = tf.stack([col1, col2, col3])

sess = tf.InteractiveSession()

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)

for i in range(51):
    k,f,c,v = sess.run([key,features,col4,dep_var])
    print(k,f,c,v)

coord.request_stop()
coord.join(threads)

sess.close()

b'50_Startups_2.csv:2' [165349.2 136897.8 471784.1] b'a' 192261.83
b'50_Startups_2.csv:3' [162597.7  151377.6  443898.53] b'a' 191792.06
b'50_Startups_2.csv:4' [153441.52 101145.55 407934.53] b'b' 191050.39
b'50_Startups_2.csv:5' [144372.4  118671.85 383199.62] b'b' 182901.98
b'50_Startups_2.csv:6' [142107.34  91391.77 366168.4 ] b'd' 166187.94
b'50_Startups_2.csv:7' [131876.9   99814.71 362861.38] b'd' 156991.12
b'50_Startups_2.csv:8' [134615.45 147198.88 127716.82] b'a' 156122.52
b'50_Startups.csv:2' [165349.2 136897.8 471784.1] b'New York' 192261.83
b'50_Startups.csv:3' [162597.7  151377.6  443898.53] b'California' 191792.06
b'50_Startups.csv:4' [153441.52 101145.55 407934.53] b'Florida' 191050.39
b'50_Startups.csv:5' [144372.4  118671.85 383199.62] b'New York' 182901.98
b'50_Startups.csv:6' [142107.34  91391.77 366168.4 ] b'Florida' 166187.94
b'50_Startups.csv:7' [131876.9   99814.71 362861.38] b'New York' 156991.12
b'50_Startups.csv:8' [134615.45 147198.88 127716.82] b'California'

### Batching

In [32]:
filename_q = tf.train.string_input_producer(["50_Startups.csv","50_Startups_2.csv"],shuffle=True)
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(filename_q)

col1, col2, col3, col4, label = tf.decode_csv(value, record_defaults=[[1.], [1.], [1.], ["na"], [1.]])
example = tf.stack([col1, col2, col3])

min_after_dequeue = 10000
batch_size = 5
capacity = min_after_dequeue + 3 * batch_size

example_batch, label_batch = tf.train.shuffle_batch( [example, label], batch_size=batch_size, capacity=capacity,
      min_after_dequeue=min_after_dequeue)

In [33]:
sess = tf.InteractiveSession()

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
sess.run([example_batch,label_batch])

[array([[131876.9 ,  99814.71, 362861.38],
        [ 67532.53, 105751.03, 304768.72],
        [ 64664.71, 139553.16, 137962.62],
        [ 38558.51,  82982.09, 174999.3 ],
        [ 72107.6 , 127864.55, 353183.8 ]], dtype=float32),
 array([156991.12, 108733.99, 107404.34,  81005.76, 105008.31],
       dtype=float32)]

In [34]:
sess.run([example_batch,label_batch])

[array([[134615.45, 147198.88, 127716.82],
        [ 23640.93,  96189.63, 148001.11],
        [ 75328.87, 144135.98, 134050.06],
        [ 75328.87, 144135.98, 134050.06],
        [ 78389.47, 153773.44, 299737.28]], dtype=float32),
 array([156122.52,  71498.49, 105733.54, 105733.54, 111313.02],
       dtype=float32)]

In [35]:
coord.request_stop()
coord.join(threads)

sess.close()

In [36]:
col1

<tf.Tensor 'DecodeCSV_14:0' shape=() dtype=float32>

In [37]:
tf.__version__

'1.6.0-rc1'