In [13]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
import os
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
assert tf.config.list_physical_devices('GPU')

%autoreload
import dataset

import schedulers

%autoreload
import transformer

In [54]:
BATCH_SIZE = 32

MAXLEN = 60

LETTERS_SIZE = len(dataset.letters_table)
NIQQUD_SIZE = len(dataset.niqqud_table)
DAGESH_SIZE = len(dataset.dagesh_table)
SIN_SIZE = len(dataset.sin_table)

d_model = 1000

model = transformer.Transformer(
    num_layers=1,
    d_model=d_model,
    num_heads=1,
    dff=256,
    input_vocab_size=LETTERS_SIZE,
    target_vocab_size=NIQQUD_SIZE, 
    maximum_position_encoding_input=MAXLEN, 
    maximum_position_encoding_target=MAXLEN,
    rate=0.0
)
learning_rate = transformer.CustomSchedule(d_model, warmup_steps=1000)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9),
    loss=transformer.train_loss
)

# model.build((None, MAXLEN))
# model.summary()
model.save_weights('./checkpoints/uninit')

In [55]:
def load_data(source, validation=0.1):
    filenames = [os.path.join('texts', f) for f in source]
    train, valid = dataset.load_data(filenames, validation, maxlen=MAXLEN)
    return train, valid

def fit(data):
    transformer.train_loss.reset_states()
    transformer.train_accuracy.reset_states()

    for i in range(len(data[0])):
        s = slice(i, (i+1))
        res = model.train_step(data[0].normalized[s], data[0].niqqud[s])
        print(i, {k:round(v, 4) for k, v in res.items()}, end='      \r')

In [56]:
model.load_weights('./checkpoints/uninit')
history = fit(data_modern)
# print(true_accuracy(data_modern))
model.save_weights('./checkpoints/modern')

17826 {'loss': 1.5824, 'acc': 0.4748}loss': 2.1412, 'acc': 0.3864} {'loss': 2.0333, 'acc': 0.4071} {'loss': 1.7163, 'acc': 0.4662} {'loss': 1.7141, 'acc': 0.466} {'loss': 1.6958, 'acc': 0.4669} {'loss': 1.69, 'acc': 0.4674} {'loss': 1.6893, 'acc': 0.4674} {'loss': 1.6732, 'acc': 0.4691} {'loss': 1.6717, 'acc': 0.4691} {'loss': 1.664, 'acc': 0.4698} {'loss': 1.6617, 'acc': 0.47} {'loss': 1.6438, 'acc': 0.4709} {'loss': 1.6414, 'acc': 0.4712} {'loss': 1.6383, 'acc': 0.4709} {'loss': 1.6331, 'acc': 0.4711} {'loss': 1.6323, 'acc': 0.4712} {'loss': 1.6322, 'acc': 0.4711} {'loss': 1.6279, 'acc': 0.4715} {'loss': 1.6262, 'acc': 0.4718} {'loss': 1.6229, 'acc': 0.472} {'loss': 1.6226, 'acc': 0.472} {'loss': 1.6201, 'acc': 0.4722} {'loss': 1.6151, 'acc': 0.4724} {'loss': 1.615, 'acc': 0.4724} {'loss': 1.6121, 'acc': 0.4724} {'loss': 1.612, 'acc': 0.4725} {'loss': 1.6116, 'acc': 0.4725} {'loss': 1.6101, 'acc': 0.4727} {'loss': 1.6098, 'acc': 0.4727} {'loss': 1.608, 'acc': 0.4729} {'loss': 1.6079,

In [None]:
model.load_weights('./checkpoints/modern')

def print_predictions(data, s):
    batch = data.normalized[s]
    prediction = model.predict(batch)
    [actual_niqqud, actual_dagesh, actual_sin] = [dataset.from_categorical(prediction[0]), dataset.from_categorical(prediction[1]), dataset.from_categorical(prediction[2])]
    [expected_niqqud, expected_dagesh, expected_sin] = [data.niqqud[s], data.dagesh[s], data.sin[s]]
    actual = dataset.merge(data.text[s], ts=batch, ns=actual_niqqud, ds=actual_dagesh, ss=actual_sin)
    expected = dataset.merge(data.text[s], ts=batch, ns=expected_niqqud, ds=expected_dagesh, ss=expected_sin)
    total = []
    for i, (a, e) in enumerate(zip(actual, expected)):
        print('מצוי: ', a)
        print('רצוי: ', e)
        last = expected_niqqud[i].tolist().index(0)
        res = expected_niqqud[i][:last] == actual_niqqud[i][:last]
        total.extend(res)
        print(round(np.mean(res), 2), f'({last - sum(res)} out of {last})')
        print()
    print(round(np.mean(total), 3))

print_predictions(data_modern[1], slice(0, None))

In [None]:
def plot_attention_weights(attention, sentence, result, layer):
    fig = plt.figure(figsize=(16, 8))

    sentence = tokenizer_pt.encode(sentence)

    attention = tf.squeeze(attention[layer], axis=0)

    for head in range(attention.shape[0]):
        ax = fig.add_subplot(2, 4, head+1)

        # plot the attention weights
        ax.matshow(attention[head][:-1, :], cmap='viridis')

        fontdict = {'fontsize': 10}

        ax.set_xticks(range(len(sentence)+2))
        ax.set_yticks(range(len(result)))

        ax.set_ylim(len(result)-1.5, -0.5)

        ax.set_xticklabels(
            ['<start>']+[tokenizer_pt.decode([i]) for i in sentence]+['<end>'], 
            fontdict=fontdict, rotation=90)

        ax.set_yticklabels([tokenizer_en.decode([i]) for i in result 
                            if i < tokenizer_en.vocab_size], 
                           fontdict=fontdict)

        ax.set_xlabel('Head {}'.format(head+1))

    plt.tight_layout()
    plt.show()