In [38]:
import tensorflow as tf
import numpy as np
import spectreader
from PIL import Image
import os

In [39]:
NUM_THREADS = 4

# Will train data samples equal to either
#    nepochs * number of data records in the combined list of TFRecord files
#  or
#    k_nbatchsize * knbatches
#  which ever is smaller 
#
# Note: if you don't run for an integer number of epochs, some samples may be over/under represented

k_nepochs=2  #Can feed queues with all the data TFRecord files up to this many times
k_batchsize = 1
k_nbatches = 100 
k_shownum = 0 # limits the number of images displayed

# data dependent parameters
k_nClass=2
height=256
width=856

def getImage(fname, nepochs) :
    """ Reads data from the prepaired file of TFRecords, does any preprocessing """ 
    """ e.g. get_datafiles('data', 'train-') """

    label, image = spectreader.getImage(fname, nepochs)

    image=tf.reshape(image,[height*width])

    # re-define label as a "one-hot" vector 
    # it will be [0,1] or [1,0] here. 
    # This approach can easily be extended to more classes.
    label=tf.stack(tf.one_hot(label-1, k_nClass))
    return label, image

def get_datafiles(a_dir, startswith):
    """ Returns a list of files in adir that start with the string startswith """ 
    return  [a_dir + '/' + name for name in os.listdir(a_dir)
            if name.startswith(startswith)]


In [40]:
# This runs without a session, but sets up several ops for the graph.
# This function does *not* get called during the session (as I understand it)!
#target, data = getImage("data/train-00000-of-00001", k_nepochs)
#target, data = getImage(["data/train-00000-of-00001"], k_nepochs)
target, data = getImage(get_datafiles('data', 'train-'), k_nepochs)

getImage ['data/train-00000-of-00002', 'data/train-00001-of-00002']


In [41]:
imageBatch, labelBatch = tf.train.shuffle_batch(
    [data, target], batch_size=k_batchsize,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=10,  #1000,
    min_after_dequeue=5) #500


In [42]:
label_count=np.zeros(k_nClass)

with tf.Session() as sess:
    #!!! need this line if there is a num_epochs other than none in getImage string_input_producer
    sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
    
    # Create a coordinator, launch the queue runner threads.
    coord = tf.train.Coordinator()
    #XX  enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
    enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)
    
    shownum=0
    
    try:
        for step in xrange(k_nbatches): 
            if coord.should_stop():
                break
            #XX  data_batch, label_batch = sess.run(dequeue_op)
            data_batch, label_batch = sess.run([imageBatch, labelBatch])

            print('------  step: ' + str(step))
            # print('data_batch tf.shape is ' + str(tf.shape(data_batch))) # 2, its a 2D array, a list of data vectors
            print('batch size is ' + str(data_batch.shape[0])) # (rows, colums)
            print('data size is ' + str(data_batch.shape[1])) #the length of a column
            print label_batch
            #print data_batch

            for i in range(k_batchsize) :
                if shownum < k_shownum :
                    foo=Image.fromarray(np.reshape(data_batch[i]*255, (height, width)))
                    foo.show()
                    shownum += 1
                    
            label_count +=  np.sum(label_batch, axis=0)
                    
    except Exception, e:
        # Catch the enque errors
        # Get image has run all its epochs and won't enqueue shuffle_batch any more. 
        coord.request_stop(e)

    finally :
        coord.request_stop()
        coord.join(enqueue_threads)
        
print('label count is ' + str(label_count))

------  step: 0
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 1
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 2
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 3
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 4
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 5
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 6
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 7
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 8
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 9
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 10
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 11
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 12
batch size is 1
data size is 219136
[[ 0.  1.]]
------  step: 13
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 14
batch size is 1
data size is 219136
[[ 1.  0.]]
------  step: 15
batch size is 1
da