In [22]:
#!/usr/bin/env ipython
# coding: utf-8

import numpy as np
import tensorflow as tf
import scipy.io as io

all_label_dict = {
    'N':0,
    'A':1,
    'O':2,
    '~':3
}

all_data = []
all_label = []
all_lens = []
annotations = open('./raw/REFERENCE.csv', 'r').read().splitlines()
for i, line in enumerate(annotations):
    fname, all_label_str = line.split(',')
    
    x = io.loadmat('./raw/'+fname+'.mat')['val']\
        .astype(np.float32).squeeze()
    
    # [0, 1]
    # x -= x.min()
    # x /= x.max()
    
    # [-1, 1]
    # x -= x.min()
    # x /= x.max()
    # x *= 2
    # x -= 0
    
    # Normal
    x -= x.mean()
    x /= x.std()
    
    all_data.append(x)
    
    y = all_label_dict[all_label_str]
    all_label.append(y)
    
    all_lens.append(len(x))
    if i%50==0: 
        print('\rReading files: %05d   ' % i, end='', flush=True)

print('\rReading files: %05d   ' % i, end='', flush=True)
    
assert(len(all_label) == len(all_data) == len(all_lens))
print('\nReading successful!')
all_data_size = len(all_data)

# No problem with different lengths
# Using np.array because slice indexing does not copy the all_data
# While native python slicing does
all_data = np.array(all_data)
all_label = np.array(all_label)
all_lens = np.array(all_lens)
class_weights = np.histogram(all_label, bins=len(all_label_dict))[0]/len(all_label)
all_weight = class_weights[all_label]

def shuffle():
    global all_data
    global all_label
    global all_lens
    p = np.random.permutation(all_data_size)
    # Using fancy indexing for Unison Shuffle
    all_data = all_data[p]
    all_label = all_label[p]
    all_lens = all_lens[p]
    
def join_samples(sample_list, sample_all_lens):
    res = np.zeros((len(sample_list), sample_all_lens.max(), 1))
    for idx, (sample, l) in enumerate(zip(sample_list, sample_all_lens)):
        res[idx, :l] = sample[None, :, None]
    return res 

def random_batch(batch_size=8):
    shuffle()
    n = batch_size
    for i in range(0, all_data_size, batch_size):
        yield all_data[i:i+n], all_label[i:i+n], all_lens[i:i+n]



Reading files: 08527   
Reading successful!


In [24]:
all_weight[:5]

array([ 0.6043621 ,  0.6043621 ,  0.6043621 ,  0.09040807,  0.29983583])

In [21]:
class_weights = np.histogram(all_label, bins=len(all_label_dict))[0]/len(all_label)
print(class_weights, all_label[:5])
class_weights[all_label[:5]]


[ 0.6043621   0.09040807  0.29983583  0.005394  ] [0 0 0 1 2]


array([ 0.6043621 ,  0.6043621 ,  0.6043621 ,  0.09040807,  0.29983583])

In [11]:
x_input_data = tf.placeholder(tf.float32, [None], name='in_node')
q = tf.PaddingFIFOQueue(capacity=32, dtypes=[tf.float32], shapes=[x_input_data.shape])
enqueue_op = q.enqueue(x_input_data)
padded_batch = q.dequeue_many(2)

In [13]:
with tf.Session() as sess:
    print('session started')
    for i, sample in enumerate(all_data[:10]):
        enqueue_op.run({x_input_data:sample})
        print('\rEnqueued: %d'%i, end='', flush=True)
    print('\ndone')
    for i in range(3): 
        print(padded_batch.eval().shape)

session started
Enqueued: 9
done
(2, 9000)
(2, 18000)
(2, 18000)


In [17]:
tf.convert_to_tensor(all_data[:2].tolist())

<tf.Tensor 'Const_1:0' shape=(9000,) dtype=float32>

# tf.train.batch usage

* Must use new queue
* The `train.batch`  uses threading, and has its own queue
* Feeding only with a **new queue**

In [4]:
x_input_data = tf.placeholder(tf.float32, [None], name='in_node')
q = tf.FIFOQueue(capacity=all_data_size, dtypes=tf.float32)
#x_input_data = tf.Print(x_input_data, data=[x_input_data], message="Raw inputs data generated:", summarize=6)
enqueue_op = q.enqueue(x_input_data)
input = q.dequeue()
input.set_shape([None])
batched_data = tf.train.batch(
    tensors=[input], 
    batch_size=3, 
    dynamic_pad=True, 
    enqueue_many=False
)
qsize = q.size()
numberOfThreads = 1 
qr = tf.train.QueueRunner(q, [enqueue_op] * numberOfThreads)
tf.train.add_queue_runner(qr)
#input = tf.Print(input, data=[q.size()], message="Nb elements left:")

In [6]:
with tf.Session() as sess:
    print('Session started')
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    '''for i, sample in enumerate(all_data):
        sess.run(enqueue_op, {x_input_data:sample})
        print('\rEnqueued: %d'%i, end='', flush=True)
        #print('Current size of queue:', q.size().eval())
    '''
    sess.run(batched_data, {x_input_data:all_data}).shape
    print()
    print('done')
    #while not coord.should_stop():
    '''res = sess.run(batched_data)
    print(qsize.eval())
    res = sess.run(batched_data)
    print(qsize.eval())
    res = sess.run(batched_data)  
    print(qsize.eval())
    '''
    #print(res.shape)             
    #print('Current size of queue:', q.size().eval())
    coord.request_stop()
    coord.join(threads)
    
    
#res

Session started
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.CancelledError'>, Run call was cancelled


ValueError: setting an array element with a sequence.

In [2]:
import tensorflow as tf

# We simulate some raw input data
# let's start with only 3 samples of 1 data point
x_input_data = tf.random_normal([3], mean=-1, stddev=4)

# We build a FIFOQueue inside the graph 
# You can see it as a waiting line that holds waiting data
# In this case, a line with only 3 positions
q = tf.FIFOQueue(capacity=3, dtypes=tf.float32)

# We need an operation that will actually fill the queue with our data
# "enqueue_many" slices "x_input_data" along the 0th dimension to make multiple queue elements
enqueue_op = q.enqueue_many(x_input_data) # <- x1 - x2 -x3 |

# We need a dequeue op to get the next elements in the queue following the FIFO policy.
input = q.dequeue() 
# The input tensor is the equivalent of a placeholder now 
# but directly connected to the data sources in the graph

# Each time we use the input tensor, we print the number of elements left
# in the queue
input = tf.Print(input, data=[q.size()], message="Nb elements left:")

# fake graph: START
y = input + 1
# fake graph: END 

# We start the session as usual
with tf.Session() as sess:
    print('start')
    # We first run the enqueue_op to load our data into the queue
    sess.run(enqueue_op)
    # Now, our queue holds 3 elements, it's full. 
    # We can start to consume our data
    sess.run(y)
    sess.run(y) 
    sess.run(y) 
    print('end')
    # Now our queue is empty, if we call it again, our program will hang right here
    # waiting for the queue to be filled by at least one more datum
    #sess.run(y) 

start
end


In [None]:
enqueue_op.run({x_input_data:[0, 1, 2, 3, 4, 5, 6]})

In [1]:
import tensorflow as tf

x = tf.range(1, 10, name="x")

# A queue that outputs 0,1,2,3,...
range_q = tf.train.range_input_producer(limit=5, shuffle=False)
slice_end = range_q.dequeue()

# Slice x to variable length, i.e. [0], [0, 1], [0, 1, 2], ....
y = tf.slice(x, [0], [slice_end], name="y")

batched_data = tf.train.batch(
    tensors=[y],
    batch_size=5,
    dynamic_pad=True,
    name="y_batch",
    enqueue_many=False
)

#res = tf.contrib.learn.run_n({"y": batched_data}, n=1, feed_dict=None)
with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    res = sess.run(batched_data)
    coord.request_stop()
    coord.join(threads)
res

array([[0, 0, 0, 0],
       [1, 0, 0, 0],
       [1, 2, 0, 0],
       [1, 2, 3, 0],
       [1, 2, 3, 4]], dtype=int32)

In [3]:
batched_data

<tf.Tensor 'y_batch_1:0' shape=(5, ?) dtype=int32>

In [4]:
import tensorflow as tf
tensor_list_initializer = [tf.placeholder(tf.float32, [None])]*20
tensor_list = [tf.Variable(init) for init in tensor_list_initializer]


ValueError: initial_value must have a shape specified: Tensor("Placeholder_1:0", shape=(?,), dtype=float32)

# TFRecords

Write **`tf.train.SequenceExample`**

In [25]:
def make_example(sequence, label, weight):
    # The object we return
    ex = tf.train.SequenceExample()
    # A non-sequential feature of our example
    sequence_length = len(sequence)
    ex.context.feature['length'].int64_list.value.append(sequence_length)
    ex.context.feature['label'].int64_list.value.append(label)
    ex.context.feature['weight'].float_list.value.append(weight)
    
    fl_val = ex.feature_lists.feature_list['data']
    for token in sequence:
        fl_val.feature.add().float_list.value.append(token)

    return ex

In [28]:
def write_TFRecord(data, label, weight, fname='train'):
    with open(fname + '.TFRecord', 'w') as fp:
        writer = tf.python_io.TFRecordWriter(fp.name)
        print('Sampling...')
        for i, (x, y, w) in enumerate(zip(data, label, weight)):
            ex = make_example(x, y, w)
            
            writer.write(ex.SerializeToString())
            print('\r%05d'%i, end=' ', flush=True)
        writer.close()
        print("\nWrote to {}".format(fp.name))

In [39]:
write_TFRecord(all_data[:20], all_label[:20], all_weight[:20])

Sampling...
00019  
Wrote to train.TFRecord


# Read

In [30]:
import tensorflow as tf
def parse_TFRecords_example(filename_queue):
    # Define how to parse the example
    
    reader = tf.TFRecordReader()
    _, example = reader.read(filename_queue)
    
    context_features = {
        'length': tf.FixedLenFeature([], dtype=tf.int64),
        'label': tf.FixedLenFeature([], dtype=tf.int64),
        'weight': tf.FixedLenFeature([], dtype=tf.float32),
    }
    sequence_features = {
        'data': tf.FixedLenSequenceFeature([], dtype=tf.float32)
    }
    con_parsed, seq_parsed = tf.parse_single_sequence_example(
        serialized=example,
        context_features=context_features,
        sequence_features=sequence_features
    )
    res = (seq_parsed['data'], 
        con_parsed['length'], 
        con_parsed['label'], 
        con_parsed['weight'])
    return res
parse_example = parse_TFRecords_example

# TensorFlow fetching

In [35]:
filename_queue = tf.train.string_input_producer(['train.TFRecord'])
data, seq_len, label, weight = parse_example(filename_queue)
q = tf.PaddingFIFOQueue(
    capacity=32, 
    dtypes=[tf.float32, tf.int64, tf.int64, tf.float32],
    shapes=[[None], [], [], []])
enqueue_op = q.enqueue([data, seq_len, label, weight])
qr = tf.train.QueueRunner(q, [enqueue_op])
tf.train.add_queue_runner(qr)

batch_size=7
batch_op = q.dequeue_many(n=batch_size)
res = []
with tf.Session() as sess:
    print('Sess started')
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for _ in range(5):
        res.append(sess.run(batch_op))
        print('fetched batch:', _)
    coord.request_stop()
    coord.join(threads)
    sess.close()
    


Sess started
fetched batch: 0
fetched batch: 1
fetched batch: 2
fetched batch: 3
fetched batch: 4


In [36]:
res[0][-1]

array([ 0.60436213,  0.60436213,  0.60436213,  0.09040806,  0.29983583,
        0.60436213,  0.60436213], dtype=float32)