In [None]:
%load_ext autoreload

In [None]:
import os
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
assert tf.config.list_physical_devices('GPU')

import collections

%autoreload
import dataset

%autoreload
import transformer

In [None]:
BATCH_SIZE = 32

MAXLEN = 40

LETTERS_SIZE = len(dataset.letters_table)
NIQQUD_SIZE = len(dataset.niqqud_table)
DAGESH_SIZE = len(dataset.dagesh_table)
SIN_SIZE = len(dataset.sin_table)

d_model = 54

model = transformer.Transformer(
    num_layers=1,
    d_model=d_model,
    num_heads=6,
    dff=1024,
    input_vocab_size=LETTERS_SIZE,
    target_vocab_size=NIQQUD_SIZE, 
    maximum_position_encoding_input=MAXLEN,
    maximum_position_encoding_target=MAXLEN,
    rate=0.1
)

learning_rate = transformer.CustomSchedule(d_model, warmup_steps=3000)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='sparse_categorical_crossentropy',  # transformer.MaskedCategoricalCrossentropy(),  # tf.keras.losses.sparse_categorical_crossentropy,
    metrics=['accuracy']
)
# pseudo "build" step, to allow printing a summary:
h = model.pseudo_build(MAXLEN, MAXLEN)
model.summary()
model.save_weights('./checkpoints/uninit')

In [None]:
def load_data(source, validation=0.1):
    filenames = [os.path.join('texts', f) for f in source]
    train, valid = dataset.load_data(filenames, validation, maxlen=MAXLEN)
    return train, valid


def fit(data, epochs=1):
    train, valid = data
    total = len(data[0])//BATCH_SIZE
    history = collections.defaultdict(list)
    for epoch in range(epochs):
        model.reset_metrics()
        for i in range(total):
            s = slice(i*BATCH_SIZE, (i+1)*BATCH_SIZE)
            res = model.train_step(train.normalized[s], train.niqqud[s])
            out = ' - '.join(f"{k}: {v:.4f}" for k, v in res.items() if k != "predictions")
            print(f"{i:4d}/{total:4d} - {out}", end='      \r')
        print()
        for k, v in res.items():
            history[k].append(res[k].numpy())
    return history

In [None]:
data_other = load_data(['biblical', 'garbage'])

In [None]:
data_mix = load_data(['poetry', 'rabanit', 'pre_modern'])

In [None]:
data_modern = load_data(validation=0.1, source=['modern'])

In [None]:
model.load_weights('./checkpoints/uninit')
history = fit(data_other, epochs=1)
model.save_weights('./checkpoints/other')

In [217]:
model.load_weights('./checkpoints/other')
history = fit(data_mix, epochs=1) # (102, 2048, 6) warmup=270 : 974,746 - 0.8323  (7 - same)
model.save_weights('./checkpoints/mix')

7237/7238 - loss: 0.0142 - accuracy: 0.9991                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             

In [218]:
model.load_weights('./checkpoints/mix')
history = fit(data_modern, epochs=1)
# print(true_accuracy(data_modern)
model.save_weights('./checkpoints/modern')

 868/ 869 - loss: 0.0015 - accuracy: 1.0000                                                                                                                                                                        


In [None]:
model.load_weights('./checkpoints/modern')

def print_predictions(data, s):
    batch = data.normalized[s]
    prediction = model.predict(batch)
    [actual_niqqud, actual_dagesh, actual_sin] = [dataset.from_categorical(prediction[0]), dataset.from_categorical(prediction[1]), dataset.from_categorical(prediction[2])]
    [expected_niqqud, expected_dagesh, expected_sin] = [data.niqqud[s], data.dagesh[s], data.sin[s]]
    actual = dataset.merge(data.text[s], ts=batch, ns=actual_niqqud, ds=actual_dagesh, ss=actual_sin)
    expected = dataset.merge(data.text[s], ts=batch, ns=expected_niqqud, ds=expected_dagesh, ss=expected_sin)
    total = []
    for i, (a, e) in enumerate(zip(actual, expected)):
        print('מצוי: ', a)
        print('רצוי: ', e)
        last = expected_niqqud[i].tolist().index(0)
        res = expected_niqqud[i][:last] == actual_niqqud[i][:last]
        total.extend(res)
        print(f'{np.mean(res):.2f} ({last - sum(res)} out of {last})')
        print()
    print(round(np.mean(total), 3))

print_predictions(data_modern[1], slice(0, None))

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(nrows=1, ncols=len(history))

for i, v in enumerate(history.values()):
    ax[i].plot(v)

plt.tight_layout()

In [None]:
model.load_weights('./checkpoints/modern')
def evaluate(inp_sentence):
    encoder_input = tf.expand_dims(inp_sentence, 0)
    output = [0] * len(inp_sentence)
    size = inp_sentence.shape[0]
    last = inp_sentence.tolist().index(0)
    for i in range(last):
        padding_mask = transformer.create_padding_mask(encoder_input)
        look_ahead_mask = tf.cast(tf.constant([[[[0]*i + [1]*(MAXLEN-i)]]]), dtype=float)
        combined_mask = tf.maximum(padding_mask, look_ahead_mask)
        
        output_tensor = tf.constant([output], dtype=tf.int32)
        print(combined_mask)
        # predictions.shape == (batch_size, seq_len, vocab_size)
        predictions, _ = model(encoder_input, output_tensor, False, padding_mask, combined_mask, padding_mask)
        print(predictions)
        # select the last character from the seq_len dimension
        # predictions = predictions[: ,-1:, :]  # (batch_size, 1, vocab_size)
        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)

        # concatentate the predicted_id to the output which is given to the decoder as its input.
        print(predicted_id)
        output[i] = predicted_id.numpy()[0][i]
        # print(combined_mask[0, 0, i, :].numpy())
        print(output)
        print()
    print(output)
evaluate(data_modern[0].normalized[0])

In [216]:

def merge(normalized, prediction):
    sentence = []
    for c, n in zip(normalized, prediction):
        if c == dataset.letters_table.PAD_TOKEN:
            break
        sentence.append(dataset.letters_table.indices_char[c])
        sentence.append(dataset.niqqud_table.indices_char[n])
    return ''.join(sentence)

d = data_other
text = d[1].normalized[BATCH_SIZE:2*BATCH_SIZE]
actual = d[1].niqqud[BATCH_SIZE:2*BATCH_SIZE]
enc_padding_mask, combined_mask, dec_padding_mask = transformer.create_masks(text, actual)
prediction = model(text, actual, False, enc_padding_mask, combined_mask, dec_padding_mask)[0]  # np.argmax(history['predictions'], axis=-1)[0]
prediction = np.argmax(prediction, axis=-1)
n = 15
print(text[n])
print(prediction[n])
print(actual[n])
print(np.mean(prediction == actual))
print(merge(text[n], prediction[n]))
print(merge(text[n], actual[n]))

[22 35 29  5 24 40 26 27  5 42 21 22 20 35 43 33 22 13  5 22 35 29  5 24
 26 26 30  5 24 32  5 22 24 34 20  5  0  0  0  0]
[ 2 15  1  1 13  8  1 10  1  8  1 11 15  2 10  1 14  1  1  2 15  1  1 15
  6  1  1  1  7  1  1 10  8  8  1  1  0  0  0  0]
[ 2 15  1  1 13  8  1 10  1  8  1 11 15  2 10  1 14  1  1  2 15  1  1 15
  6  1  1  1  7  1  1 10  8  8  1  1  0  0  0  0]
1.0
וְעַל חֻקֶיךָ שֶהוֹדַעְתָנוּ. וְעַל חַיִים חֵן וָחֶסֶד 
וְעַל חֻקֶיךָ שֶהוֹדַעְתָנוּ. וְעַל חַיִים חֵן וָחֶסֶד 
