In [1]:
# Learning tensorflow data

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [3]:
# Create dummy data
array_a = np.array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
array_b = np.array([-1, -2, -3, -4, -5])
print(array_a)
print(array_b)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[-1 -2 -3 -4 -5]


In [4]:
# Define tensorflow dataset
dataset = tf.data.Dataset.from_tensor_slices((array_a, array_b))
for a,b in dataset:
    print(a.numpy(), ' , ', b.numpy())

[0 1]  ,  -1
[2 3]  ,  -2
[4 5]  ,  -3
[6 7]  ,  -4
[8 9]  ,  -5


In [5]:
# Preprocessing: Apply Python functions
def tf_some_python_function(input_a, input_b):
    [input_a,] = tf.py_function(np.log1p, [input_a], [tf.float32])
    return input_a, input_b

mapped_dataset = dataset.map(tf_some_python_function)

for a,b in mapped_dataset:
    print(a.numpy(), ' , ', b.numpy())

[0.        0.6931472]  ,  -1
[1.0986123 1.3862944]  ,  -2
[1.609438  1.7917595]  ,  -3
[1.9459101 2.0794415]  ,  -4
[2.1972246 2.3025851]  ,  -5


In [6]:
# Batches
batch_size = 2
take_n_batch = 5

print('\nBatched data, keep partial:')            
batched_dataset = dataset.batch(batch_size, drop_remainder=False)

for i, batch in enumerate(batched_dataset.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())
        
print('\nBatched data, keep partial, repeat:')            
batched_dataset = dataset.batch(batch_size, drop_remainder=False)
batched_dataset = batched_dataset.repeat()

for i, batch in enumerate(batched_dataset.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())


Batched data, keep partial:
batch 0
[[0 1]
 [2 3]]
[-1 -2]
batch 1
[[4 5]
 [6 7]]
[-3 -4]
batch 2
[[8 9]]
[-5]

Batched data, keep partial, repeat:
batch 0
[[0 1]
 [2 3]]
[-1 -2]
batch 1
[[4 5]
 [6 7]]
[-3 -4]
batch 2
[[8 9]]
[-5]
batch 3
[[0 1]
 [2 3]]
[-1 -2]
batch 4
[[4 5]
 [6 7]]
[-3 -4]


In [7]:
# Shuffle
# shuffle, batch, repeat order should be kept
shuffled = dataset.shuffle(buffer_size=100)
shuffled = shuffled.batch(batch_size)
shuffled = shuffled.repeat()
for i, batch in enumerate(shuffled.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())

batch 0
[[8 9]
 [6 7]]
[-5 -4]
batch 1
[[0 1]
 [2 3]]
[-1 -2]
batch 2
[[4 5]]
[-3]
batch 3
[[4 5]
 [0 1]]
[-3 -1]
batch 4
[[6 7]
 [8 9]]
[-4 -5]


In [8]:
# Timeseries forecasting

In [9]:
# Define timeseries
timeseries = tf.data.Dataset.range(100000)

In [10]:
# Simple rolling window
batches = timeseries.batch(10, drop_remainder=True)

print('Rolling window:')
for i, example in enumerate(batches.take(3)):
    print('window', i, ':', example.numpy())

Rolling window:
window 0 : [0 1 2 3 4 5 6 7 8 9]
window 1 : [10 11 12 13 14 15 16 17 18 19]
window 2 : [20 21 22 23 24 25 26 27 28 29]


In [11]:
# Rolling window, window = feature data + data to be predicted
def label_next_5_steps(batch):
    return (batch[:-5], batch[-5:])

inputs_and_labels = batches.map(label_next_5_steps)

print('Split rolling window into input features and labels(predicted features):')
for input_feature, label in inputs_and_labels.take(3):
    print(input_feature.numpy(), " => ", label.numpy())

Split rolling window into input features and labels(predicted features):
[0 1 2 3 4]  =>  [5 6 7 8 9]
[10 11 12 13 14]  =>  [15 16 17 18 19]
[20 21 22 23 24]  =>  [25 26 27 28 29]


In [12]:
# Rolling window generalized
# Create dataset of windows out of timeseries dataset
input_feature_steps = 5
predict_steps = 3
stride = 3
shift = 2
window_size = input_feature_steps + predict_steps
windows = timeseries.window(size=window_size,
                            shift=shift,
                            stride=stride)

# Batchify then flatten the dataset of batches into a dataset of their elements
windows = windows.flat_map(lambda x: x.batch(window_size, drop_remainder=True))

print('Rolling window:')
for i, example in enumerate(windows.take(3)):
    print('window', i, ':', example.numpy())

# Split rolling window into input features and labels(predicted features)
def split_at_n(batch, n):
    return batch[:-n], batch[-n:]

inputs_and_labels = windows.map(lambda x: split_at_n(x, n=predict_steps))

print()
print('Split rolling window into input features and labels(predicted features):')
for input_feature, label in inputs_and_labels.take(3):
    print(input_feature.numpy(), "=>", label.numpy())


Rolling window:
window 0 : [ 0  3  6  9 12 15 18 21]
window 1 : [ 2  5  8 11 14 17 20 23]
window 2 : [ 4  7 10 13 16 19 22 25]

Split rolling window into input features and labels(predicted features):
[ 0  3  6  9 12] => [15 18 21]
[ 2  5  8 11 14] => [17 20 23]
[ 4  7 10 13 16] => [19 22 25]
