<a href="https://colab.research.google.com/github/goku2130/workspace/blob/master/Parity%20XOR%20using%20LSTM(Open%20AI%20Challenge).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf

MAX_SEQ_LEN = 100
BATCH = 1024
IS_FIXED_LEN = False
NUM_SAMPLES = 100000
POST_PADDING = False

def generator_xor(is_fixed_len = True):
    """
    Generates a dataset of a sequence of 1's and 0's using numpy functions and also its parity. It provides a 
    capability to have a fixed length sequence or variable length
    @param is_fixed_len(Bool): Whether length is fixed or not
    @return: a tuple of two numpy arrays: output parity
    """
    i = 0

    while i < NUM_SAMPLES:

        if is_fixed_len:

            max_seq_len = MAX_SEQ_LEN

        else:
           
            max_seq_len = np.random.randint(1, MAX_SEQ_LEN)

        series = np.random.randint(low=0, high=2, size=(max_seq_len))
        output = np.array([1 - np.sum(series) % 2 , np.sum(series) % 2])
        series = np.reshape(series,(max_seq_len, 1))
        yield output, series
        i += 1


In [2]:
for op,seq in generator_xor(is_fixed_len = IS_FIXED_LEN):
  print('Sample output: XOR =>{} \n Sequence => {}'.format(op, seq))
  break

Sample output: XOR =>[0 1] 
 Sequence => [[0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]]


In [7]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(None, 1)))
model.add(tf.keras.layers.Masking(mask_value=-10, input_shape=(MAX_SEQ_LEN, 1)))
model.add(tf.keras.layers.LSTM(1, return_sequences= False, stateful=False,))
model.add(tf.keras.layers.Dense(2, activation='sigmoid'))
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(
    loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy']
)
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_1 (Masking)          (None, None, 1)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1)                 12        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 4         
Total params: 16
Trainable params: 16
Non-trainable params: 0
_________________________________________________________________
None


In [4]:
def split_dataset(dataset: tf.data.Dataset, validation_data_fraction: float):
    """
    Splits a dataset of type tf.data.Dataset into a training and validation dataset using given ratio. Fractions are
    rounded up to two decimal places.
    @param dataset: the input dataset to split.
    @param validation_data_fraction: the fraction of the validation data as a float between 0 and 1.
    @return: a tuple of two tf.data.Datasets as (training, validation)
    """

    validation_data_percent = round(validation_data_fraction * 100)
    if not (0 <= validation_data_percent <= 100):
        raise ValueError("validation data fraction must be ∈ [0,1]")

    dataset = dataset.enumerate()
    train_dataset = dataset.filter(lambda f, data: f % 100 > validation_data_percent)
    validation_dataset = dataset.filter(lambda f, data: f % 100 <= validation_data_percent)

    # remove enumeration
    train_dataset = train_dataset.map(lambda f, data: data)
    validation_dataset = validation_dataset.map(lambda f, data: data)

    return train_dataset, validation_dataset

In [8]:
def xor_data_generator_wrapper(batch_size, train_flag = True):
  data_series = tf.data.Dataset.from_generator(generator_xor, args=[IS_FIXED_LEN], 
                                                output_types=(tf.int32, tf.int32), 
                                                output_shapes=((2,), (None, 1)))

  #train_series, val_series = split_dataset(data_series, 0.2)
  
  if train_flag:

    train_series = data_series.padded_batch(batch_size = batch_size, padding_values=-10,
                                            padded_shapes=([2,], [MAX_SEQ_LEN, 1]))
    if POST_PADDING:

      while True:
          batch_output, batch_seq = next(iter(train_series))
          yield batch_seq, batch_output 

    else:

       while True:
          batch_output, batch_seq = next(iter(train_series))
          yield tf.reverse(batch_seq, [1]) , batch_output 

  """else:

    val_series = val_series.padded_batch(batch_size = batch_size, padding_values=-10,
                                        padded_shapes=([2,], [MAX_SEQ_LEN, 1]))

    while True:
      batch_output, batch_seq = next(iter(val_series))
      yield tf.reverse(batch_seq, [1]), batch_output 
  """


num_batches = int(NUM_SAMPLES/BATCH)
train_data_generator = xor_data_generator_wrapper(batch_size=BATCH)
model.fit_generator(generator=train_data_generator,
                    steps_per_epoch=num_batches, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f2012d62780>