In [3]:
from transformer import *
from utils import *
import tensorflow as tf
import numpy
import time
import matplotlib.pyplot as plt

In [4]:
class parameters():
    number_sentence = 3
    num_layers = 4
    d_model = 90
    dff = 512
    num_heads = 10
    input_vocab_size = 0
    target_vocab_size = 0
    dropout_rate = 0.1
    freq = 100
    feature = 1
    data = 'eeg'
    seed = 1234
    n_batches = 10

In [5]:
params = parameters()
input_set, target_set, seq_len_set, original_set = load_data(params)
input_set,_ = pad_sequences(input_set,dtype = np.float32)
target_set,_ = pad_sequences(target_set,dtype = np.int64)
params.d_model = input_set.shape[-1]
params.target_vocab_size = len(params.dictionary)

In [7]:
create_padding_mask(input_set[:3,:,:])

<tf.Tensor: id=7, shape=(3, 1, 1, 401, 90), dtype=float32, numpy=
array([[[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]],



       [[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]],



       [[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]]], dtype=float32)>

In [8]:
full_dataset = tf.data.Dataset.from_tensor_slices((input_set, target_set)).shuffle(
        8192, seed=params.seed).batch(params.n_batches)

In [6]:
learning_rate = CustomSchedule(params.d_model)

optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,
                                     epsilon=1e-9)
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(
    name='train_accuracy')

transformer = Transformer(
        num_layers=params.num_layers,
        d_model=params.d_model, num_heads=params.num_heads,
        dff=params.dff,target_vocab_size=params.target_vocab_size,
        pe_target=6000)
checkpoint_path = "./checkpoints/train"

ckpt = tf.train.Checkpoint(transformer=transformer,
                           optimizer=optimizer)

ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)

# if a checkpoint exists, restore the latest checkpoint.
if ckpt_manager.latest_checkpoint:
  ckpt.restore(ckpt_manager.latest_checkpoint)
  print ('Latest checkpoint restored!!')

In [20]:
EPOCHS = 20
# The @tf.function trace-compiles train_step into a TF graph for faster
# execution. The function specializes to the precise shape of the argument
# tensors. To avoid re-tracing due to the variable sequence lengths or variable
# batch sizes (the last batch is smaller), use input_signature to specify
# more generic shapes.

train_step_signature = [
    tf.TensorSpec(shape=(None,None, None), dtype=tf.float32),
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]

@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
    tar_inp = tar[:, :-1]
    tar_real = tar[:, 1:]

    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

    with tf.GradientTape() as tape:
        predictions, _ = transformer(inp, tar_inp, 
                                     True, 
                                     enc_padding_mask, 
                                     combined_mask, 
                                     dec_padding_mask)
        loss = loss_function(tar_real, predictions)

    gradients = tape.gradient(loss, transformer.trainable_variables)    
    optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))

    train_loss(loss)
    train_accuracy(tar_real, predictions)

In [21]:
for epoch in range(EPOCHS):
    start = time.time()

    train_loss.reset_states()
    train_accuracy.reset_states()
  
  # inp -> portuguese, tar -> english
    for (batch, (inp, tar)) in enumerate(full_dataset):
        train_step(inp, tar)

    if batch % 50 == 0:
        print ('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format(
          epoch + 1, batch, train_loss.result(), train_accuracy.result()))

    if (epoch + 1) % 5 == 0:
        ckpt_save_path = ckpt_manager.save()
        print ('Saving checkpoint for epoch {} at {}'.format(epoch+1,
                                                         ckpt_save_path))

    print ('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format(epoch + 1, 
                                                train_loss.result(), 
                                                train_accuracy.result()))

    print ('Time taken for 1 epoch: {} secs\n'.format(time.time() - start))

ValueError: in converted code:

    <ipython-input-7-1c25a515b170>:21 train_step  *
        predictions, _ = transformer(inp, tar_inp,
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/gautam-admin/Brainology/transformer/codes/transformer/transformer.py:154 call  *
        enc_output = self.encoder(inp, training, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/gautam-admin/Brainology/transformer/codes/transformer/transformer.py:97 call  *
        x = self.enc_layers[i](x, training, mask)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/gautam-admin/Brainology/transformer/codes/transformer/transformer.py:29 call  *
        attn_output, _ = self.mha(x, x, x, mask)  # (batch_size, input_seq_len, d_model)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:847 __call__
        outputs = call_fn(cast_inputs, *args, **kwargs)
    /home/gautam-admin/Brainology/transformer/codes/transformer/attention.py:68 call  *
        q = self.wq(q)  # (batch_size, seq_len, d_model)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:817 __call__
        self._maybe_build(inputs)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:2141 _maybe_build
        self.build(input_shapes)
    /home/gautam-admin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/core.py:1015 build
        raise ValueError('The last dimension of the inputs to `Dense` '

    ValueError: The last dimension of the inputs to `Dense` should be defined. Found `None`.


In [11]:
for (batch, (inp, tar)) in enumerate(full_dataset):
    a = 1
tar_inp = tar[:, :-1]
tar_real = tar[:, 1:]
enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)

In [7]:
params.dictionary

{'<start>': 0,
 '<end>': 1,
 'this': 3,
 'was': 4,
 'easy': 5,
 'for': 6,
 'us': 7,
 'jane': 8,
 'may': 9,
 'earn': 10,
 'more': 11,
 'money': 12,
 'by': 13,
 'working': 14,
 'hard': 15,
 'she': 16,
 'is': 17,
 'thinner': 18,
 'than': 19,
 'i': 20,
 'am': 21}

In [15]:
enc_padding_mask

<tf.Tensor: id=65, shape=(10, 1, 1, 401, 90), dtype=float32, numpy=
array([[[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]],



       [[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]],



       [[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.],
          [1., 1., 1., ..., 1., 1., 1.]]]],



       ...,



       [[[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0.