In [5]:
import os
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

import dataset
import schedulers

import tensorflow as tf
assert tf.config.list_physical_devices('GPU')

from tensorflow_addons.layers.crf import CRF
from tensorflow_addons.text.crf import crf_log_likelihood

In [6]:

class ModelWithCRFLoss(tf.keras.Model):

    def compute_loss(self, data, training=False):
        x, ys = data
        ys = list(ys.values())
        potentials = []
        crf_losses = []
        preds = self(x, training=training)
        for p, y in zip(preds, ys):
            [_, potential, sequence_length, chain_kernel] = p
            crf_loss = -crf_log_likelihood(potential, y, sequence_length, chain_kernel)[0]
            potentials.append(potential)
            crf_losses.append(crf_loss)
        self.compiled_metrics.update_state(ys, potentials, None)
        return crf_losses

    def log_outputs(self, crf_losses):
        return { # **{f'crf_loss_{i}': x for i, x in enumerate(crf_losses)},
                **{m.name: m.result() for m in self.metrics}}

    def train_step(self, data):
        with tf.GradientTape() as tape:
            crf_losses = self.compute_loss(data, training=True)
            # total_losses[i] = sum(crf_losses[i]) + sum(self.losses[i])

        gradients = tape.gradient(crf_losses, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return self.log_outputs(crf_losses)

    def test_step(self, data):
        crf_losses = self.compute_loss(data, training=False)
        return self.log_outputs(crf_losses)


BATCH_SIZE = 32

def build_model(UNITS=128):
    LETTERS_SIZE = len(dataset.letters_table)
    NIQQUD_SIZE = len(dataset.niqqud_table)
    DAGESH_SIZE = len(dataset.dagesh_table)
    SIN_SIZE = len(dataset.sin_table)

    inp = keras.Input(batch_shape=(None, None), batch_size=BATCH_SIZE)
    layer = layers.Embedding(LETTERS_SIZE, UNITS, mask_zero=True)(inp)
    layer = layers.Bidirectional(layers.LSTM(UNITS, return_sequences=True), merge_mode='sum')(layer)
    
    outputs = [
        CRF(NIQQUD_SIZE, name='N')(layer),
        CRF(DAGESH_SIZE, name='D')(layer),
        CRF(SIN_SIZE   , name='S')(layer),
    ]
    model = ModelWithCRFLoss(inputs=inp, outputs=outputs)
    model.build((None, None))
    return model

model = build_model()

model.summary()
model.save_weights('./checkpoints/crf_uninit')

Model: "model_with_crf_loss"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, None, 128)    5632        input_1[0][0]                    
__________________________________________________________________________________________________
bidirectional (Bidirectional)   (None, None, 128)    263168      embedding[0][0]                  
__________________________________________________________________________________________________
N (CRF)                         [(None, None), (None 2352        bidirectional[0][0]              
________________________________________________________________________________

In [7]:
def accuracy(y_true, y_pred):
    K = keras.backend
    f = K.floatx()
    # convert dense predictions to labels
    y_pred_labels =  K.cast(K.argmax(y_pred, axis=-1), f)
    
    res = K.cast(K.equal(y_true, y_pred_labels), f)
    return K.sum(res) / K.sum(K.cast(K.not_equal(y_true, 0), f))


def fit(train_validation, lr):
    train, valid = train_validation
    model.compile(metrics="accuracy", loss="ce", optimizer=tf.keras.optimizers.Adam(learning_rate=lr))
    
    y  = {'N': train.niqqud, 'D': train.dagesh, 'S': train.sin }
    vy = {'N': valid.niqqud, 'D': valid.dagesh, 'S': valid.sin }
    return model.fit(train.normalized, y, validation_data=(valid.normalized, vy), batch_size=BATCH_SIZE, epochs=1)


def load_data(source, maxlen=82, validation=0.1):
    filenames = [os.path.join('texts', f) for f in source]
    train, valid = dataset.load_data(filenames, validation, maxlen=maxlen)
    return train, valid

In [8]:
data_mix = load_data(validation=0.1, source=['poetry', 'rabanit', 'pre_modern'])

In [9]:
data_modern = load_data(validation=0.1, source=['modern'])

In [10]:
model.load_weights('./checkpoints/crf_uninit')
history = fit(data_mix, lr=2e-4)
model.save_weights('./checkpoints/crf_mix')



InternalError:  [_Derived_]  Failed to call ThenRnnBackward with model config: [rnn_mode, rnn_input_mode, rnn_direction_mode]: 2, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]: [1, 128, 128, 1, 82, 32, 128] 
	 [[{{node gradients/cond_grad/If/then/_0/gradients/CudnnRNNV3_grad/CudnnRNNBackpropV3}}]]
	 [[StatefulPartitionedCall]]
	 [[gradient_tape/model_with_crf_loss/embedding/embedding_lookup/Reshape/_400]] [Op:__inference_train_function_25576]

Function call stack:
train_function -> train_function -> train_function


In [None]:
model.load_weights('./checkpoints/crf_mix')
history = fit(data_modern, lr=2e-4)
model.save_weights('./checkpoints/crf_modern')

In [110]:

def print_predictions(data, s):
    batch = data.normalized[s]
    [actual_niqqud, _, _, _], [actual_dagesh, _, _, _], [actual_sin, _, _, _] = model.predict(batch)
    [expected_niqqud, expected_dagesh, expected_sin] = [data.niqqud[s], data.dagesh[s], data.sin[s]]
    actual = dataset.merge(data.text[s], ts=batch, ns=actual_niqqud, ds=actual_dagesh, ss=actual_sin)
    expected = dataset.merge(data.text[s], ts=batch, ns=expected_niqqud, ds=expected_dagesh, ss=expected_sin)
    total = []
    for i, (a, e) in enumerate(zip(actual, expected)):
        print('מצוי: ', a)
        print('רצוי: ', e)
        last = expected_niqqud[i].tolist().index(0)
        res = expected_niqqud[i][:last] == actual_niqqud[i][:last]
        total.extend(res)
        print(round(np.mean(res), 2), f'({last - sum(res)} out of {last})')
        print()
    print(round(np.mean(total), 3))

model.load_weights('./checkpoints/crf_modern')
print_predictions(data_modern[1], slice(0, 32))

מצוי:  מִבַחְינִתנו", אומר סודרי, "אנשִים צְמאים למידע על תנְועת שְ"ס". נְכון ליום שְישִי בְצהריים, 
רצוי:  מִבְּחִינָתֵנוּ", אוֹמֵר סוּדְרִי, "אֲנָשִׁים צְמֵאִים לְמֵידָע עַל תְּנוּעַת שָׁ"ס". נָכוֹן לְיוֹם שִׁישִּׁי בַּצָּהֳרַיִים, 
0.59 (33 out of 81)

מצוי:  לְהיות נִמַוְך בְהרבה. "ייתכן שְחלק מְהגְולשִים נִכַנְסו לאתר כְמה פְעמים אך גם זה נתון מִעודד 
רצוי:  לִהְיוֹת נָמוּךְ בְּהַרְבֵּה. "יִיתָּכֵן שֶׁחֵלֶק מֵהַגּוֹלְשִׁים נִכְנְסוּ לַאֲתַר כַּמָּה פְּעָמִים אַךְ גַּם זֶה נָתוּן מְעוֹדֵד 
0.51 (39 out of 80)

מצוי:  וְלא מַפְרט. אטרקציות נְוספְות שַמְתוִכַנְנות מְהאתר: תפילות בַכְותל בַשְידור חי. לסגור בַשְבת או 
רצוי:  וְלֹא מְפָרֵט. אַטְרַקְצִיּוֹת נוֹסָפוֹת שֶׁמְּתוּכְנָנוֹת מֵהָאֲתָר: תְּפִילּוֹת בַּכּוֹתֶל בְּשִׁידּוּר חַי. לִסְגּוֹר בְּשַׁבָּת אוֹ 
0.49 (41 out of 81)

מצוי:  שַׁקְל ליורוקום שְישִמַשְ לפְרעון חלק מְהחְוב הבְנקאי שְל יורוקום בְסך 961 מיליון שְקל. בַמְקביל 
רצוי:  שֶׁקֶל לְיוּרוֹקוֹם שֶׁיְּשַׁמֵּשׁ לְפִרְעוֹן חֵלֶק מֵהַחוֹב הַבַּנְקָאִי שֶׁל יוּרוֹקוֹם בְּסַךְ