In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

import dataset
import utils
assert tf.config.list_physical_devices('GPU')

Using TensorFlow backend.


In [17]:
BATCH_SIZE = 32
MAXLEN = 64

LETTERS_SIZE = len(dataset.letters_table)
NIQQUD_SIZE = len(dataset.niqqud_table)
DAGESH_SIZE = len(dataset.dagesh_table)
SIN_SIZE = len(dataset.sin_table)
KINDS_SIZE = 2

def build_model(EMBED_DIM=110, UNITS=220):

    layer = input_text = tf.keras.Input(batch_shape=(None, MAXLEN), batch_size=BATCH_SIZE)
    
    layer = layers.Embedding(LETTERS_SIZE, EMBED_DIM, mask_zero=True)(layer)
    layer = layers.Bidirectional(layers.LSTM(UNITS, return_sequences=True, dropout=0.0), merge_mode='sum')(layer)
    layer = layers.add([layer,
            layers.Bidirectional(layers.LSTM(UNITS, return_sequences=True, dropout=0.0), merge_mode='sum')(layer)])
    
    outputs = [
        layers.Softmax(name='N')(layers.Dense(NIQQUD_SIZE)(layer)),
        layers.Softmax(name='D')(layers.Dense(DAGESH_SIZE)(layer)),
        layers.Softmax(name='S')(layers.Dense(SIN_SIZE)(layer)),
        layers.Softmax(name='K')(layers.LSTM(KINDS_SIZE)(layer))
    ]
    model = tf.keras.Model(inputs=[input_text], outputs=outputs)

    # tf.keras.utils.plot_model(model, to_file='model.png')
    return model

model = build_model()

model.summary()
model.save_weights('./checkpoints/uninit')

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 64)]         0                                            
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, 64, 110)      4840        input_5[0][0]                    
__________________________________________________________________________________________________
bidirectional_8 (Bidirectional) (None, 64, 220)      582560      embedding_4[0][0]                
__________________________________________________________________________________________________
bidirectional_9 (Bidirectional) (None, 64, 220)      776160      bidirectional_8[0][0]            
____________________________________________________________________________________________

In [23]:
def fit(data, scheduler, verbose=1):
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    callbacks = []
    if isinstance(scheduler, utils.CircularLearningRate):
        scheduler.set_dataset(data, BATCH_SIZE)
    if scheduler:
        callbacks.append(scheduler)
    x  = data.normalized_texts
    vx = data.normalized_validation
    kind_texts = np.ones((data.normalized_texts.shape[0], 1))
    kind_validation = np.ones((data.normalized_validation.shape[0], 1))
    y  = {'N': data.niqqud_texts,      'D': data.dagesh_texts,      'S': data.sin_texts,      'C': data.normalized_texts,      'K': kind_texts}
    vy = {'N': data.niqqud_validation, 'D': data.dagesh_validation, 'S': data.sin_validation, 'C': data.normalized_validation, 'K': kind_validation}
    return model.fit(x, y, validation_data=(vx, vy), batch_size=BATCH_SIZE, epochs=1, verbose=verbose, callbacks=callbacks)


In [6]:
def load_data(source, maxlen=MAXLEN, validation=0.1):
    filenames = [os.path.join('texts', f) for f in source]
    return dataset.load_file(filenames, BATCH_SIZE, validation, maxlen=maxlen, shuffle=True)

In [25]:
data_rabanit = load_data(['rabanit'])

In [26]:
data_pre_modern = load_data(['pre_modern'])

In [37]:
data_modern = load_data(validation=0.2, source=['modern'])

In [28]:
model.load_weights('./checkpoints/uninit')
history = fit(data_rabanit, utils.CircularLearningRate(20e-4, 50e-4, 5e-4))
model.save_weights('./checkpoints/rabanit')

Train on 79891 samples, validate on 8877 samples


In [29]:
model.load_weights('./checkpoints/rabanit')
history = fit(data_pre_modern, utils.CircularLearningRate(20e-4, 40e-4, 0.1e-4))
model.save_weights('./checkpoints/pre_modern')

Train on 79084 samples, validate on 8788 samples


In [38]:
model.load_weights('./checkpoints/pre_modern')
history = fit(data_modern, utils.CircularLearningRate(6e-3, 6e-3, 0.5e-3))

Train on 13619 samples, validate on 3405 samples


In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(nrows=2, ncols=2)

for n, v in enumerate(['accuracy', 'loss'], 0):
    for n1, t in enumerate(['D', 'N'], 0):
        p = ax[n][n1]
        p.plot(history.history[t + '_' + v][0:])
        p.plot(history.history['val_' + t + '_' +  v][0:])
        p.legend([t + '_Train', t + '_Test'], loc='center right')

plt.tight_layout()

In [40]:
import tensorflowjs as tfjs
tfjs.converters.save_keras_model(model, '.')

In [39]:
def print_predictions(data, k):
    s = slice(k*BATCH_SIZE, (k+1)*BATCH_SIZE)
    batch = data.normalized_validation[s]
    prediction = model.predict(batch)
    [actual_niqqud, actual_dagesh, actual_sin] = [dataset.from_categorical(prediction[0]), dataset.from_categorical(prediction[1]), dataset.from_categorical(prediction[2])]
    [expected_niqqud, expected_dagesh, expected_sin] = [data.niqqud_validation[s], data.dagesh_validation[s], data.sin_validation[s]]
    actual = data.merge(batch, ns=actual_niqqud, ds=actual_dagesh, ss=actual_sin)
    expected = data.merge(batch, ns=expected_niqqud, ds=expected_dagesh, ss=expected_sin)
    for i, (a, e) in enumerate(zip(actual, expected)):
        print('מצוי: ', a)
        print('רצוי: ', e)
        print()

print_predictions(data_modern, 1)

מצוי:  פְּגִיעָה בִּבְסִיסִי מְחַבְּלִים מִחוּץ לַשֶׁטַח יִשְׂרָאֵל. לְדוּגְמָה, בְּאַפְּרִיל 5555 נֶעֱרַךְ
רצוי:  פְּגִיעָה בִּבְסִיסֵי מְחַבְּלִים מִחוּץ לְשֶׁטַח יִשְׂרָאֵל. לְדוּגְמָה, בְּאַפְּרִיל 5555 נֶעֱרַךְ

מצוי:  עֲלוֹנִים שֶׁחִילֵּק לְצוֹרֶךְ פִּרְסוּם סִפְרוּ הֶחָדָשׁ, גְּנִיבַת צְמִיגִים בְּטֶרֶם כְּנִיסָתוֹ
רצוי:  עֲלוֹנִים שֶׁחִילֵּק לְצוֹרֶךְ פִּרְסוּם סִפְרוֹ הֶחָדָשׁ, גְּנֵיבַת צְמִיגִים בְּטֶרֶם כְּנִיסָתוֹ

מצוי:  מַעֲרֶכֶת שְׁמַע בְּ-555 אֶלֶף שֶׁקַל עַל יְדֵי הָאֲזְנָה לְשִׁידּוּרֵי עֲרוּץ הַסְּפּוֹרְט. מָה
רצוי:  מַעֲרֶכֶת שָׁמַע בְּ-555 אֶלֶף שֶׁקֶל עַל יְדֵי הַאֲזָנָה לְשִׁידּוּרֵי עֲרוּץ הַסְּפּוֹרְט. מָה

מצוי:  הוֹלִידַיי, שֶׁרָצוּ 5.55 מַיְילִים לְמִשְׂחָק (כְּ-5.55 ק"מ). הָעוּבְדָּה שֶׁשְּׁנֵיהֶם הֵם
רצוי:  הוֹלִידֵיי, שֶׁרָצוּ 5.55 מָיְילִים לְמִשְׂחָק (כִּ-5.55 ק"מ). הָעוּבְדָּה שֶׁשְּׁנֵיהֶם הֵם

מצוי:  "נִרְאָה לִי הַרְבֵּה יוֹתֵר כֵּיף. וּפָחוֹת מֵעִיף." לוֹלִי הִפָנְתָה אֶת פָּנֶיהָ וְהִבִּיטָה
רצוי:  "נִרְאֶה לִי הַרְבֵּה יוֹתֵר כֵּיף. וּפָחוֹת מְע

In [None]:
shutil.rmtree(os.sep.join([tempfile.gettempdir(), '.tensorboard-info']), ignore_errors=True)
shutil.rmtree('logs', ignore_errors=True)
os.makedirs('logs')
# %tensorboard --logdir logs

In [None]:
input = layers.Input((2, 1))
layer = layers.Bidirectional(layers.GRU(units=50, return_sequences=True))(input)
output = layers.Dense(1, activation='sigmoid')(layer)
model = tf.keras.Model(inputs=[input], outputs=[output])

lr = 1
for i in range(4):
    lr /= 3
    x = [[[np.random.random()], [np.random.random()]] for _ in range(100000)]
    y = [[[a], [a]] for [[a], [_]] in x]
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss="mean_squared_error")
    model.fit(x, y, epochs=1, verbose=1)
print(model.predict([[[1], [0.5]]]))
print(model.evaluate([[[1], [0.5]]], [[[1], [1]]]))
# print(result)