In [1]:
import random
import numpy as np
import keras.utils

Using TensorFlow backend.


We generate a sequence in the following data format:
* *x* and *y* describing a position in a grid of `100 x 100`
* *c* describing a control status with 3 possible states (0 = starting, 1 = holding, 2 = pausing)

State transitions follow this diagram:

```
+-+     +-+     +-+
|0| --> |1| --> |2|
+++  ^  +++  ^  +++
 ^   |   |   |   |
 |   |   |   |   |
 +---+---+   +---+
 |               |
 |               |
 +---------------+
```

This results in `100 x 100 x 3 = 30.000` possible one-hot encoded values ranging from 1 - 30.000.

In [30]:
STATES_COUNT = 3

STATE_STARTING = 0
STATE_HOLDING = 1
STATE_PAUSING = 2

DEFAULT_POSITION = [0, 0]


def random_position(grid_size):
    return [random.randint(0, grid_size - 1) for _ in range(2)]


def flip_position(pos):
    return [pos[1], pos[0]]
    

def next_state(previous_state):
    if previous_state == STATE_STARTING:
        next_state = STATE_HOLDING
    elif previous_state == STATE_HOLDING:
        next_state = random.choice([
            STATE_STARTING,
            STATE_HOLDING,
            STATE_PAUSING
        ])
    elif previous_state == STATE_PAUSING:
        next_state = random.choice([
            STATE_STARTING,
            STATE_PAUSING
        ])
    else:
        next_state = random.choice([
            STATE_STARTING,
            STATE_PAUSING
        ])
    return next_state
    

def generate_sequence(grid_size, seq_len):
    sequence = []
    current_state = None
    current_position = DEFAULT_POSITION
    for _ in range(seq_len):
        current_state = next_state(current_state)
        if current_state == STATE_STARTING:
            current_position = random_position(grid_size)
        elif current_state == STATE_PAUSING:
            current_position = DEFAULT_POSITION
        feature_vector = np.concatenate([current_position, [current_state]])
        sequence.append(feature_vector)
    return sequence


def generate_alternative_sequence(seq, grid_size):
    sequence = generate_sequence(grid_size, len(seq))
    for i in range(len(seq)):
        if sequence[i][2] == STATE_STARTING:
            # "react" to other sequence by flipping it
            flipped = flip_position([seq[i][0], seq[i][1]])
            # overwrite position of new sequence
            sequence[i][0] = flipped[0]
            sequence[i][1] = flipped[1]
    return sequence
    

def encode_sequence_indexes(sequence, grid_size):
    encoded = []
    m = np.zeros((grid_size, grid_size, STATES_COUNT))
    for vector in sequence:
        index = np.ravel_multi_index(vector, m.shape) + 1
        encoded.append(index)
    return encoded


def generate_dataset(grid_size, seq_len, n_samples):
    src_data, target_pad_data, target_data = [], [], []
    # all x/y positions in grid * state variants + 1 start symbol
    num_classes = (grid_size * grid_size * STATES_COUNT) + 1
    for _ in range(n_samples):
        # generate source sequence
        src = generate_sequence(grid_size, seq_len)
        src_indexed = encode_sequence_indexes(src, grid_size)
        src_encoded = keras.utils.to_categorical(
                                                src_indexed,
                                                num_classes=num_classes)
        # generate target sequence
        target = generate_alternative_sequence(src, grid_size)
        target_indexed = encode_sequence_indexes(target, grid_size)
        target_encoded = keras.utils.to_categorical(
                                                target_indexed,
                                                num_classes=num_classes)
        # generated target input sequence
        target_pad = target[:-1]
        target_pad_indexed = encode_sequence_indexes(target_pad, grid_size)
        # begin with start symbol 0
        target_pad_indexed = [0] + target_pad_indexed
        target_pad_encoded = keras.utils.to_categorical(
                                                target_pad_indexed,
                                                num_classes=num_classes)
        # add to dataset
        src_data.append(src_encoded)
        target_pad_data.append(target_pad_encoded)
        target_data.append(target_encoded)
    return src_data, target_pad_data, target_data

In [33]:
src, target_pad, target = generate_dataset(grid_size=100,
                                           seq_len=10,
                                           n_samples=1)
print(src[0].shape)

(10, 30001)
