In [1]:
# Learning tensorflow data

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
# Create dummy data
array_a = np.array([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
array_b = np.array([-1, -2, -3, -4, -5])
print(array_a)
print(array_b)

In [None]:
# Define tensorflow dataset
dataset = tf.data.Dataset.from_tensor_slices((array_a, array_b))
for a,b in dataset:
    print(a.numpy(), ' , ', b.numpy())

In [None]:
# Preprocessing: Apply Python functions
def tf_some_python_function(input_a, input_b):
    [input_a,] = tf.py_function(np.log1p, [input_a], [tf.float32])
    return input_a, input_b

mapped_dataset = dataset.map(tf_some_python_function)

for a,b in mapped_dataset:
    print(a.numpy(), ' , ', b.numpy())

In [None]:
# Batches
batch_size = 2
take_n_batch = 5

print('\nBatched data, keep partial:')            
batched_dataset = dataset.batch(batch_size, drop_remainder=False)

for i, batch in enumerate(batched_dataset.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())
        
print('\nBatched data, keep partial, repeat:')            
batched_dataset = dataset.batch(batch_size, drop_remainder=False)
batched_dataset = batched_dataset.repeat()

for i, batch in enumerate(batched_dataset.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())

In [None]:
# Shuffle
# shuffle, batch, repeat order should be kept
shuffled = dataset.shuffle(buffer_size=100)
shuffled = shuffled.batch(batch_size)
shuffled = shuffled.repeat()
for i, batch in enumerate(shuffled.take(take_n_batch)):
    print('batch', i)
    for arr in batch:
        print(arr.numpy())

In [None]:
# Timeseries forecasting

In [None]:
# Define timeseries
timeseries = tf.data.Dataset.range(100000)

In [None]:
# Simple rolling window
batches = timeseries.batch(10, drop_remainder=True)

print('Rolling window:')
for i, example in enumerate(batches.take(3)):
    print('window', i, ':', example.numpy())

In [None]:
# Rolling window, window = feature data + data to be predicted
def label_next_5_steps(batch):
    return (batch[:-5], batch[-5:])

inputs_and_labels = batches.map(label_next_5_steps)

print('Split rolling window into input features and labels(predicted features):')
for input_feature, label in inputs_and_labels.take(3):
    print(input_feature.numpy(), " => ", label.numpy())

In [None]:
# Rolling window generalized
# Create dataset of windows out of timeseries dataset
input_feature_steps = 5
predict_steps = 3
stride = 3
shift = 2
window_size = input_feature_steps + predict_steps
windows = timeseries.window(size=window_size,
                            shift=shift,
                            stride=stride)

# Batchify then flatten the dataset of batches into a dataset of their elements
windows = windows.flat_map(lambda x: x.batch(window_size, drop_remainder=True))

print('Rolling window:')
for i, example in enumerate(windows.take(3)):
    print('window', i, ':', example.numpy())

# Split rolling window into input features and labels(predicted features)
def split_at_n(batch, n):
    return batch[:-n], batch[-n:]

inputs_and_labels = windows.map(lambda x: split_at_n(x, n=predict_steps))

print()
print('Split rolling window into input features and labels(predicted features):')
for input_feature, label in inputs_and_labels.take(3):
    print(input_feature.numpy(), "=>", label.numpy())


In [None]:
# Generator in 'real' action

In [None]:
# Generating data and labels
def dummy_function(x):
    a = 0.4 * np.sin(0.3 * x) * np.abs(x)
    b = np.log1p(np.abs(x)) * np.cos(x)
    c = np.exp(-x**2 / 100) * np.cos(x)
    return [a, b, c]

x = np.arange(-10, 10, 0.01)
df = pd.DataFrame(np.array([dummy_function(xi) for xi in x]), columns=['Data_col_1', 'Data_col_2', 'Data_col_3'])

df[['Data_col_1', 'Data_col_2', 'Data_col_3']].plot()

In [None]:
# Rolling window generalized
# Create timeseries dataset
timeseries = tf.data.Dataset.from_tensor_slices(df[['Data_col_1', 'Data_col_2', 'Data_col_3']].values)

# Create dataset of windows out of timeseries dataset
input_feature_steps = 1
predict_steps = 3
stride = 2
shift = 3
window_size = input_feature_steps + predict_steps
windows = timeseries.window(size=window_size,
                             shift=shift,
                             stride=stride)

# Batchify then flatten the dataset of batches into a dataset of their elements
windows = windows.flat_map(lambda x: x.batch(window_size, drop_remainder=True))

print('Rolling window:')
for i, example in enumerate(windows.take(3)):
    print('window', i, ':', example.numpy())

# Split rolling window into input features and labels(predicted features)
def split_at_n(batch, n):
    return batch[:-n], batch[-n:]

inputs_and_labels = windows.map(lambda x: split_at_n(x, n=predict_steps))

print()
print('Split rolling window into input features and labels(predicted features):')
for input_feature, label in inputs_and_labels.take(3):
    print(input_feature.numpy(), "=>", label.numpy())
    
# Make batches and shuffle before feeding the neural network
inputs_and_labels = inputs_and_labels.batch(100)
inputs_and_labels = inputs_and_labels.shuffle(buffer_size=100)

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape

def create_model():   
    input0 = Input(shape=(input_feature_steps, 3))
    
    flatten = Flatten() (input0)
  
    dense0 = Dense(128, activation='relu', name='1st') (flatten)
  
    dense1 = Dense(32, activation='relu', name='2nd') (dense0)
    
    output0 = Dense(predict_steps*3, name='3rd') (dense1)
    output0 = Reshape((predict_steps, 3)) (output0)
   
    model = Model(input0, output0)
  
    model.compile(optimizer='adam', loss='mse')

    return model

model = create_model()
model.summary()

In [None]:
model.fit(inputs_and_labels, epochs=10, verbose=1)