In [1]:
# follow TensorFlow demo for time series windowing
# https://www.tensorflow.org/guide/data#time_series_windowing

In [None]:
# this is an example provided by Google on how to use TensorFlow functionality
# to build a supervised-learning database (time series windowing)
# initially, this functionality will not be used for DPLSTM

In [1]:
import tensorflow as tf
print(tf.__version__)

1.15.0


In [2]:
# original time series is just a range from 0 to 99999
source = tf.data.Dataset.range(100000)

In [3]:
# batch the dataset with 7 elements
batches = source.batch(7, drop_remainder=True)

In [4]:
# iteration over datasets is only valid in eager execution
# replace the following cycle with a TensorFlow iterator

# for batch in batches.take(5):
#   print(batch.numpy())

In [5]:
# an iterator on a dataset with only 5 rows of batches dataset
iterator = batches.take(5).make_one_shot_iterator()
next_element = iterator.get_next()

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


In [6]:
# evaluate tensors in the temporary dataset to print them
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

[0 1 2 3 4 5 6]
[ 7  8  9 10 11 12 13]
[14 15 16 17 18 19 20]
[21 22 23 24 25 26 27]
[28 29 30 31 32 33 34]


In [7]:
# now use slicing and a map transformation to get also the targets

In [8]:
def get_features_and_label(batch):
    '''
    feature: lectures in SLDB rows, from the first one to one before the last
    target: lectures in SLDB rows, from the second one to the last one
    '''
    return batch[:-1], batch[1:]

In [9]:
test = batches.map(get_features_and_label)

In [10]:
iterator = test.take(5).make_one_shot_iterator()
next_element = iterator.get_next()

In [11]:
# evaluate tensors in the temporary dataset to print them
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

(array([0, 1, 2, 3, 4, 5]), array([1, 2, 3, 4, 5, 6]))
(array([ 7,  8,  9, 10, 11, 12]), array([ 8,  9, 10, 11, 12, 13]))
(array([14, 15, 16, 17, 18, 19]), array([15, 16, 17, 18, 19, 20]))
(array([21, 22, 23, 24, 25, 26]), array([22, 23, 24, 25, 26, 27]))
(array([28, 29, 30, 31, 32, 33]), array([29, 30, 31, 32, 33, 34]))


In [12]:
# now use tf.Dataset.windows for finer control of time series windowing

In [13]:
# embedding dimension
window_size = 10
# offset value between rows of the SLDB
shift = 3
windows = source.window(window_size, shift=shift)

In [14]:
def sub_to_batch(sub):
    return sub.batch(window_size, drop_remainder=True)

In [15]:
some_windows = windows.flat_map(sub_to_batch).take(5)

In [16]:
iterator = some_windows.make_one_shot_iterator()
next_element = iterator.get_next()

In [17]:
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

[0 1 2 3 4 5 6 7 8 9]
[ 3  4  5  6  7  8  9 10 11 12]
[ 6  7  8  9 10 11 12 13 14 15]
[ 9 10 11 12 13 14 15 16 17 18]
[12 13 14 15 16 17 18 19 20 21]


In [21]:
# using the make windows dataset from TensorFlow demo

def make_window_dataset(ds, window_size=5, shift=1, stride=1):
    '''
    ds: time series dataset
    window_size: the embedding dimension
    shift: offset value between rows
    stride: time lag or tau
    '''
    windows = ds.window(window_size, shift=shift, stride=stride)
    
    def sub_to_batch(sub):
        return sub.batch(window_size, drop_remainder=True)

    windows = windows.flat_map(sub_to_batch)
    return windows

In [22]:
# testing the function
demo_dataset = make_window_dataset(source, window_size=10, shift = 5, stride=3)

In [23]:
iterator = demo_dataset.take(5).make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

[ 0  3  6  9 12 15 18 21 24 27]
[ 5  8 11 14 17 20 23 26 29 32]
[10 13 16 19 22 25 28 31 34 37]
[15 18 21 24 27 30 33 36 39 42]
[20 23 26 29 32 35 38 41 44 47]


In [24]:
# a more convenient example with shift=1, to use all available data
one_window_dataset = make_window_dataset(source, window_size=10, shift = 1, stride=3)

In [25]:
iterator = one_window_dataset.take(5).make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

[ 0  3  6  9 12 15 18 21 24 27]
[ 1  4  7 10 13 16 19 22 25 28]
[ 2  5  8 11 14 17 20 23 26 29]
[ 3  6  9 12 15 18 21 24 27 30]
[ 4  7 10 13 16 19 22 25 28 31]


In [26]:
def split_at_last_element(batch):
  # shift features and labels one step relative to each other.
  return batch[:-1], batch[-1]

In [27]:
def split_at_nth_backwards_element(batch, n):
    # split dataset in features and labels at n-th element
    return batch[:-n], batch[-n:]

In [28]:
split_dataset = one_window_dataset.map(lambda row: split_at_nth_backwards_element(row, 2))

In [29]:
iterator = split_dataset.take(5).make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

(array([ 0,  3,  6,  9, 12, 15, 18, 21]), array([24, 27]))
(array([ 1,  4,  7, 10, 13, 16, 19, 22]), array([25, 28]))
(array([ 2,  5,  8, 11, 14, 17, 20, 23]), array([26, 29]))
(array([ 3,  6,  9, 12, 15, 18, 21, 24]), array([27, 30]))
(array([ 4,  7, 10, 13, 16, 19, 22, 25]), array([28, 31]))


In [30]:
# finally, a function that includes the complete functionality
# use variable names consistent within the research team
# m: embedding dimension
# tau: time lag
# shift: shift value from one sample to the next one
# n_targets: (how many steps to predict at once?)

In [31]:
def make_supervised_learning_database(batch, m, tau, shift=1, n_targets=1):
    
    def make_window_dataset(batch, window_size, shift, stride):
        windows = batch.window(window_size, shift=shift, stride=stride)

        # tf.data.Dataset.window returns a Dataset of Datasets, must be flat_mapped
        # use the function provided by TensorFlow developers
        def sub_to_batch(sub):
            return sub.batch(window_size, drop_remainder=True)

        windows = windows.flat_map(sub_to_batch)
        return windows
    
    windows = make_window_dataset(batch=batch, window_size=m+n_targets, shift=shift, stride=tau)

    def split_at_nth_backwards_element(batch, n):
        # split dataset in features and labels at n-th element
        return batch[:-n], batch[-n:]
        
    features_and_labels = windows.map(lambda row: split_at_nth_backwards_element(row, n_targets))
        
    return features_and_labels

In [34]:
# demo SLDB: feature vector with 8 consecutive lectures that targets 3 steps ahead
features_and_labels = make_supervised_learning_database(source, m=8, tau=1, shift=1, n_targets=3)

In [35]:
iterator = features_and_labels.take(25).make_one_shot_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
    try:
        while True:
            row = sess.run(next_element)
            print(row)
    except:
        pass

(array([0, 1, 2, 3, 4, 5, 6, 7]), array([ 8,  9, 10]))
(array([1, 2, 3, 4, 5, 6, 7, 8]), array([ 9, 10, 11]))
(array([2, 3, 4, 5, 6, 7, 8, 9]), array([10, 11, 12]))
(array([ 3,  4,  5,  6,  7,  8,  9, 10]), array([11, 12, 13]))
(array([ 4,  5,  6,  7,  8,  9, 10, 11]), array([12, 13, 14]))
(array([ 5,  6,  7,  8,  9, 10, 11, 12]), array([13, 14, 15]))
(array([ 6,  7,  8,  9, 10, 11, 12, 13]), array([14, 15, 16]))
(array([ 7,  8,  9, 10, 11, 12, 13, 14]), array([15, 16, 17]))
(array([ 8,  9, 10, 11, 12, 13, 14, 15]), array([16, 17, 18]))
(array([ 9, 10, 11, 12, 13, 14, 15, 16]), array([17, 18, 19]))
(array([10, 11, 12, 13, 14, 15, 16, 17]), array([18, 19, 20]))
(array([11, 12, 13, 14, 15, 16, 17, 18]), array([19, 20, 21]))
(array([12, 13, 14, 15, 16, 17, 18, 19]), array([20, 21, 22]))
(array([13, 14, 15, 16, 17, 18, 19, 20]), array([21, 22, 23]))
(array([14, 15, 16, 17, 18, 19, 20, 21]), array([22, 23, 24]))
(array([15, 16, 17, 18, 19, 20, 21, 22]), array([23, 24, 25]))
(array([16, 17, 