## Reading CSV Data

Reference:
https://www.tensorflow.org/versions/r0.9/how_tos/reading_data/index.html


--------------

data1.csv

11,12,13,14,0

21,22,23,24,0

31,32,33,34,1

41,42,43,44,0

51,52,53,54,1

--------------

data2.csv

61,62,63,64,0

71,72,,,1

81,82,,,0

91,92,93,94,0

--------------


## Reading Single Example

In [1]:
import tensorflow as tf

In [2]:
def read_single_example(filename):
    filename_queue = tf.train.string_input_producer(filename, shuffle=False)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Default values, in case of empty columns. Also specifies the type of the
    # decoded result.
    record_defaults = [[1], [1], [1], [1], [1]]
    col1, col2, col3, col4, col5 = tf.decode_csv(
        value, record_defaults=record_defaults)
    features = tf.pack([col1, col2, col3, col4])
    labels = col5
    return features, col5

In [3]:
def run_data_reader(features,labels):
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in range(20):
            print i
            feature_val,label_val = sess.run([features, labels])
            print feature_val,label_val
        coord.request_stop()
        coord.join(threads)

In [4]:
features,labels = read_single_example(["data1.csv","data2.csv"])
run_data_reader(features,labels)

0
[11 12 13 14] 0
1
[21 22 23 24] 0
2
[31 32 33 34] 1
3
[41 42 43 44] 0
4
[51 52 53 54] 1
5
[61 62 63 64] 0
6
[71 72  1  1] 1
7
[81 82  1  1] 0
8
[91 92 93 94] 0
9
[11 12 13 14] 0
10
[21 22 23 24] 0
11
[31 32 33 34] 1
12
[41 42 43 44] 0
13
[51 52 53 54] 1
14
[61 62 63 64] 0
15
[71 72  1  1] 1
16
[81 82  1  1] 0
17
[91 92 93 94] 0
18
[11 12 13 14] 0
19
[21 22 23 24] 0


In [5]:
## Reading Shuffled Single Example

In [15]:
def read_shuffle_single_example(filename):
    filename_queue = tf.train.string_input_producer(filename, shuffle=True)

    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Default values, in case of empty columns. Also specifies the type of the
    # decoded result.
    record_defaults = [[1], [1], [1], [1], [1]]
    col1, col2, col3, col4, col5 = tf.decode_csv(
        value, record_defaults=record_defaults)
    features = tf.pack([col1, col2, col3, col4])
    labels = col5
    return features, col5

In [16]:
features_shuffle, labels_shuffle = read_shuffle_single_example(["data1.csv","data2.csv"])
run_data_reader(features_shuffle,labels_shuffle)

0
[61 62 63 64] 0
1
[71 72  1  1] 1
2
[81 82  1  1] 0
3
[91 92 93 94] 0
4
[11 12 13 14] 0
5
[21 22 23 24] 0
6
[31 32 33 34] 1
7
[41 42 43 44] 0
8
[51 52 53 54] 1
9
[11 12 13 14] 0
10
[21 22 23 24] 0
11
[31 32 33 34] 1
12
[41 42 43 44] 0
13
[51 52 53 54] 1
14
[61 62 63 64] 0
15
[71 72  1  1] 1
16
[81 82  1  1] 0
17
[91 92 93 94] 0
18
[61 62 63 64] 0
19
[71 72  1  1] 1


## Reading Batch Example

In [8]:
def read_shuffle_batch(filenames, batch_size):
    example, label = read_single_example(filenames)
    # min_after_dequeue defines how big a buffer we will randomly sample
    #   from -- bigger means better shuffling but slower start up and more
    #   memory used.
    # capacity must be larger than min_after_dequeue and the amount larger
    #   determines the maximum we will prefetch.  Recommendation:
    #   min_after_dequeue + (num_threads + a small safety margin) * batch_size
    min_after_dequeue = 10000
    capacity = min_after_dequeue + 3 * batch_size
    example_batch, label_batch = tf.train.shuffle_batch(
        [example, label], batch_size=batch_size, capacity=capacity,
        min_after_dequeue=min_after_dequeue)
    return example_batch, label_batch

In [9]:
features_shuffle_batch, labels_shuffle_batch = read_shuffle_batch(["data1.csv","data2.csv"],5)
run_data_reader(features_shuffle_batch, labels_shuffle_batch)


0
[[31 32 33 34]
 [61 62 63 64]
 [91 92 93 94]
 [51 52 53 54]
 [21 22 23 24]] [1 0 0 1 0]
1
[[41 42 43 44]
 [51 52 53 54]
 [61 62 63 64]
 [61 62 63 64]
 [51 52 53 54]] [0 1 0 0 1]
2
[[61 62 63 64]
 [11 12 13 14]
 [31 32 33 34]
 [91 92 93 94]
 [51 52 53 54]] [0 0 1 0 1]
3
[[11 12 13 14]
 [81 82  1  1]
 [71 72  1  1]
 [91 92 93 94]
 [91 92 93 94]] [0 0 1 0 0]
4
[[61 62 63 64]
 [21 22 23 24]
 [61 62 63 64]
 [31 32 33 34]
 [71 72  1  1]] [0 0 0 1 1]
5
[[11 12 13 14]
 [61 62 63 64]
 [21 22 23 24]
 [91 92 93 94]
 [91 92 93 94]] [0 0 0 0 0]
6
[[21 22 23 24]
 [71 72  1  1]
 [81 82  1  1]
 [41 42 43 44]
 [81 82  1  1]] [0 1 0 0 0]
7
[[51 52 53 54]
 [71 72  1  1]
 [91 92 93 94]
 [71 72  1  1]
 [21 22 23 24]] [1 1 0 1 0]
8
[[91 92 93 94]
 [41 42 43 44]
 [41 42 43 44]
 [61 62 63 64]
 [21 22 23 24]] [0 0 0 0 0]
9
[[21 22 23 24]
 [91 92 93 94]
 [41 42 43 44]
 [51 52 53 54]
 [91 92 93 94]] [0 0 0 1 0]
10
[[91 92 93 94]
 [81 82  1  1]
 [21 22 23 24]
 [71 72  1  1]
 [31 32 33 34]] [0 0 0 1 1]
11
[[31 3

In [12]:
def read_batch(filenames, batch_size):
    example, label = read_single_example(filenames)
    # min_after_dequeue defines how big a buffer we will randomly sample
    #   from -- bigger means better shuffling but slower start up and more
    #   memory used.
    # capacity must be larger than min_after_dequeue and the amount larger
    #   determines the maximum we will prefetch.  Recommendation:
    #   min_after_dequeue + (num_threads + a small safety margin) * batch_size
    min_after_dequeue = 10000
    capacity = min_after_dequeue + 3 * batch_size
    example_batch, label_batch = tf.train.batch(
        [example, label], batch_size=batch_size, capacity=capacity)
    return example_batch, label_batch

In [14]:
features_batch, labels_batch = read_batch(["data1.csv","data2.csv"],5)
run_data_reader(features_batch, labels_batch)


0
[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]
 [41 42 43 44]
 [51 52 53 54]] [0 0 1 0 1]
1
[[61 62 63 64]
 [71 72  1  1]
 [81 82  1  1]
 [91 92 93 94]
 [11 12 13 14]] [0 1 0 0 0]
2
[[21 22 23 24]
 [31 32 33 34]
 [41 42 43 44]
 [51 52 53 54]
 [61 62 63 64]] [0 1 0 1 0]
3
[[71 72  1  1]
 [81 82  1  1]
 [91 92 93 94]
 [11 12 13 14]
 [21 22 23 24]] [1 0 0 0 0]
4
[[31 32 33 34]
 [41 42 43 44]
 [51 52 53 54]
 [61 62 63 64]
 [71 72  1  1]] [1 0 1 0 1]
5
[[81 82  1  1]
 [91 92 93 94]
 [11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]] [0 0 0 0 1]
6
[[41 42 43 44]
 [51 52 53 54]
 [61 62 63 64]
 [71 72  1  1]
 [81 82  1  1]] [0 1 0 1 0]
7
[[91 92 93 94]
 [11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]
 [41 42 43 44]] [0 0 0 1 0]
8
[[51 52 53 54]
 [61 62 63 64]
 [71 72  1  1]
 [81 82  1  1]
 [91 92 93 94]] [1 0 1 0 0]
9
[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]
 [41 42 43 44]
 [51 52 53 54]] [0 0 1 0 1]
10
[[61 62 63 64]
 [71 72  1  1]
 [81 82  1  1]
 [91 92 93 94]
 [11 12 13 14]] [0 1 0 0 0]
11
[[21 2