In [1]:
import os
import datetime

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras import layers

import dataset
assert tf.config.list_physical_devices('GPU')


Using TensorFlow backend.


In [142]:
MAXLEN = 64
BATCH_SIZE = 32

def build_model():
    EMBED_DIM = 512
    UNITS = 256

    LETTERS_SIZE = len(dataset.letters_table)
    NIQQUD_SIZE = len(dataset.niqqud_table)
    DAGESH_SIZE = len(dataset.dagesh_table)
    SIN_SIZE = len(dataset.sin_table)

    common_input = tf.keras.Input(batch_shape=(None, MAXLEN), batch_size=BATCH_SIZE)
    
    common = layers.Embedding(LETTERS_SIZE, EMBED_DIM, mask_zero=True)(common_input)
    common = layers.Bidirectional(layers.LSTM(UNITS, return_sequences=True, dropout=0.1), merge_mode='sum')(common)
    common = layers.add([common,
             layers.Bidirectional(layers.LSTM(UNITS, return_sequences=True, dropout=0.1), merge_mode='sum')(common)])

    model = tf.keras.Model(inputs=[common_input], outputs=[
        layers.Softmax(name='N')(layers.Dense(NIQQUD_SIZE)(common)),
        layers.Softmax(name='D')(layers.Dense(DAGESH_SIZE)(common)),
        layers.Softmax(name='S')(layers.Dense(SIN_SIZE)(common))
    ])

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    tf.keras.utils.plot_model(model, to_file='model.png')
    model.summary()
    return model
    
model = build_model()
model.save_weights('./checkpoint_uninit')
def fit(data, learning_rates):
    return model.fit(data.normalized_texts, [data.niqqud_texts, data.dagesh_texts, data.sin_texts],
          batch_size=BATCH_SIZE,
          epochs=len(learning_rates),
          validation_data=(data.normalized_validation, [data.niqqud_validation,  data.dagesh_validation, data.sin_validation]),
          callbacks=[
              tf.keras.callbacks.LearningRateScheduler(lambda epoch, lr: learning_rates[epoch], verbose=0),
              # tf.keras.callbacks.ModelCheckpoint(filepath='checkpoints/ckpt_{epoch}', save_weights_only=True),
          ]
    )

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.
Model: "model_37"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_39 (InputLayer)           [(None, 64)]         0                                            
__________________________________________________________________________________________________
embedding_38 (Embedding)        (None, 64, 512)      22016       input_39[0][0]                   
__________________________________________________________________________________________________
bidirectional_103 (Bidirectiona (None, 64, 256)      1574912     embedding_38[0][0]               
__________________________________________________________________________________________________
bidirectional_104 (Bidirectiona (None, 64, 256)      1050624     bidirectional_103[0][0]          
_____

In [143]:
rabanit = ['birkat_hamazon.txt', 'kuzari.txt', 'hakdama_leorot.txt', 'hartzaat_harav.txt', 'orhot_hayim.txt', 'rambam_mamre.txt', 'short_table.txt',
           'tomer_dvora.txt', 'elef_layla.txt', 'bialik']
modern = ['itzhak_berkman', 'zevi_scharfstein', 'pesah_kaplan', 'abraham_regelson', 'elisha_porat', 'uriel_ofek', 'yisrael_dushman', 'zvi_zviri',
          'sipurim.txt' ,'atar_hashabat.txt', 'ali_baba.txt', 'ricky.txt', 'imagination.txt', 'adamtsair.txt', 'katarsis.txt']

def load_data(source):
    filenames = [os.path.join('texts', f) for f in source]
    return dataset.load_file(filenames, BATCH_SIZE, 0.1, maxlen=MAXLEN, shuffle=True)

data_rabanit = load_data(rabanit)
data_modern = load_data(modern)

In [None]:
model.load_weights('./checkpoint_uninit')
history = fit(data_rabanit, [3e-3, 3e-4])
model.save_weights('./checkpoint_rabanit')

Train on 102211 samples, validate on 11357 samples
Epoch 1/2
Epoch 2/2

In [139]:
model.load_weights('./checkpoint_rabanit')
history = fit(data_modern, [3e-3, 3e-4])

Train on 23356 samples, validate on 2596 samples
Epoch 1/2
Epoch 2/2


In [None]:
fig, ax = plt.subplots(nrows=2, ncols=2)

for n, v in enumerate(['accuracy', 'loss'], 0):
    for n1, t in enumerate(['D', 'N'], 0):
        p = ax[n][n1]
        p.plot(history.history[t + '_' + v][0:])
        p.plot(history.history['val_' + t + '_' +  v][0:])
        p.legend([t + '_Train', t + '_Test'], loc='center right')

plt.tight_layout()

In [141]:
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model, '.')

In [140]:
def print_predictions(data, k):
    s = slice(k*BATCH_SIZE, (k+1)*BATCH_SIZE)
    batch = data.normalized_validation[s]
    prediction = model.predict(batch)
    [actual_niqqud, actual_dagesh, actual_sin] = [dataset.from_categorical(prediction[0]), dataset.from_categorical(prediction[1]), dataset.from_categorical(prediction[2])]
    [expected_niqqud, expected_dagesh, expected_sin] = [data.niqqud_validation[s], data.dagesh_validation[s], data.sin_validation[s]]
    actual = data.merge(batch, ns=actual_niqqud, ds=actual_dagesh, ss=actual_sin)
    expected = data.merge(batch, ns=expected_niqqud, ds=expected_dagesh, ss=expected_sin)
    for i, (a, e) in enumerate(zip(actual, expected)):
        print('מצוי: ', a)
        print('רצוי: ', e)
        print()

print_predictions(data_modern, 1)

מצוי:  מַרְגִימוֹתֵיהֶם הַפְּרוּעוֹת, וְהִקְשִׁיבוּ לְקוֹלוֹ שֶׁל נִימֶר הַמְנַסָּר בַּחֲשֵׁכָה," לֹא הָיְתָה
רצוי:  מֵרְגִימוֹתֵיהֶם הַפְּרוּעוֹת, וְהִקְשִׁיבוּ לְקוֹלוֹ שֶׁל נִימֶר הַמְנַסֵר בַּחֲשֵׁכָה," לֹא הָיְתָה

מצוי:  בַּמָקוֹם הַזֶּה. אַתֶּם לֹא מְסֻגָּלִים לְהִלָּחֵם כְּמוֹ גְבָרִים וּפִירָטִים, אָז לְכָל הַשֵׁדִים
רצוי:  בַּמָקוֹם הַזֶה. אַתֶּם לֹא מְסֻגָלִים לְהִלָחֵם כְּמוֹ גְבָרִים וּפִּירָטִים, אָז לְכָל הַשֵׁדִים

מצוי:  שֶׁלָּנוּ הֶחְלִיט לְאַרְגֵן נְסִיעָה לַצָפוֹן לְכָל הַמִּשְׁפָּחוֹת שֶׁמִּתְפַּלְּלוֹת בְּבֵית הַכְּנֶסֶת.
רצוי:  שֶׁלָּנוּ הֶחְלִיט לְאַרְגֵּן נְסִיעָה לַצָּפוֹן לְכֹל הַמִּשְׁפָּחוֹת שֶׁמִּתְפַּלְּלוֹת בְּבֵּית הַכְּנֶסֶת.

מצוי:  בַּחֹרֶשׁ מִבַּעַד לַחַלּוֹן. פַּרְלוּד לִפַסְנְתֵּר שֶׁל שׁוֹפָן. מְאַט אֶת הַצְּלִילָה לַיָּם
רצוי:  בַּחֹרֶשׁ מִבַּעַד לַחַלּוֹן. פְּרֶלוּד לִפְסַנְתֵּר שֶׁל שׁוֹפֶּן. מֵאֵט אֶת הַצְּלִילָה לְיַם

מצוי:  הָרַכִּים? וּבְכֵן רֹאשׁ וְרִאשׁוֹן לַיּוֹצְאִים הָיָה חֲבֵרְנוּ חִזְקוּ. אוֹ כְּמוֹ שֶׁהָיָה נוֹהֵג
רצוי:  הָרַכִּים