## Import dependencies

In [1]:
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import TimeDistributed, LSTM, Dense, Input, Conv2D, ReLU
from tensorflow.keras.layers import MaxPool2D, Dropout, Reshape, Flatten, BatchNormalization
from tensorflow.keras.losses import CategoricalCrossentropy

import numpy as np
import librosa
import unicodedata
import json
import os


## Data augumentation functions

In [2]:
def add_noise(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def change_pitch(data, sampling_rate, pitch_factor):
    return librosa.effects.pitch_shift(data, sampling_rate, pitch_factor)

def change_speed(data, speed_factor):
    return librosa.effects.time_stretch(data, speed_factor)

## Now we generate data:

In [3]:
def gimatria(letters):
    alphabet = 'אבגדהוזחטיכלמנסעפצקרשת'
    result = 0
    try:
        for l in letters:
            n = alphabet.index(l) + 1
            d = int(n/10)
            result += (int(n%10) + d) * (10 ** d)
    except:
        print('Wrong parameter, hebrew letters only expected! Got: ', letters)
    return result

accent_names = np.asarray([
    '[START]',
    '[END]',
    "HEBREW ACCENT ETNAHTA",
    "HEBREW ACCENT SEGOL",
    "HEBREW ACCENT SHALSHELET",
    "HEBREW ACCENT ZAQEF QATAN",
    "HEBREW ACCENT ZAQEF GADOL",
    "HEBREW ACCENT TIPEHA",
    "HEBREW ACCENT REVIA",
    "HEBREW ACCENT ZARQA",
    "HEBREW ACCENT PASHTA",
    "HEBREW ACCENT YETIV",
    "HEBREW ACCENT TEVIR",
    "HEBREW ACCENT GERESH",
    "HEBREW ACCENT GERESH MUQDAM",
    "HEBREW ACCENT GERSHAYIM",
    "HEBREW ACCENT QARNEY PARA",
    "HEBREW ACCENT TELISHA GEDOLA",
    "HEBREW ACCENT PAZER",
    "HEBREW ACCENT ATNAH HAFUKH",
    "HEBREW ACCENT MUNAH",
    "HEBREW ACCENT MAHAPAKH",
    "HEBREW ACCENT MERKHA",
    "HEBREW ACCENT MERKHA KEFULA",
    "HEBREW ACCENT DARGA",
    "HEBREW ACCENT QADMA",
    "HEBREW ACCENT TELISHA QETANA",
    "HEBREW ACCENT YERAH BEN YOMO",
    "HEBREW ACCENT OLE",
    "HEBREW ACCENT ILUY",
    "HEBREW ACCENT DEHI",
    "HEBREW ACCENT ZINOR",
    "HEBREW POINT METEG",
    "HEBREW PUNCTUATION PASEQ",
])

## Actual function that generate data

In [4]:
def generate_data():
    books = [
             'בראשית',
             'שמות',
             'ויקרא',
             'במדבר',
             'דברים',
    ]
    aliyot = {
        'ראשון' : 1,
        'שני': 2,
        'שלישי': 3,
        'רביעי': 4,
        'חמישי': 5,
        'שישי': 6,
        'שביעי': 7,
    }
    EXPECTED_SAMPLE_RATE = 16000

    for book_name in books:
        chapters_in_book = []
        with open(os.path.join('outputs', book_name + '.json'), 'r', encoding='utf-8') as file:
            data = json.load(file)
        pasuk_counter = 0
        for chap in data['chapters']:
            psukim = data['psukim'][pasuk_counter:pasuk_counter + data['chapters'][chap]]
            chapters_in_book.append(psukim)
            pasuk_counter += data['chapters'][chap]
                  
        for w_chap in data['weekly_chaps']:
            if w_chap in data['double_chaps']: continue
            for alia in data['weekly_chaps'][w_chap]['aliyot']:
                alia_p = []
                f = data['weekly_chaps'][w_chap]['aliyot'][alia][0].split('-')
                to = data['weekly_chaps'][w_chap]['aliyot'][alia][1].split('-')
                f_chap = gimatria(f[0])
                f_pasuk = gimatria(f[1][1:-1])
                to_chap = gimatria(to[0])
                to_pasuk = gimatria(to[1][1:-1])
                
                if f_chap == to_chap:
                    alia_p = chapters_in_book[f_chap - 1][f_pasuk - 1:to_pasuk]
                else:
                    for c in range(f_chap - 1, to_chap):
                        if c == f_chap - 1:
                            for i in range(f_pasuk - 1, len(chapters_in_book[c])):
                                alia_p.append(chapters_in_book[c][i])
                        elif c == to_chap - 1:
                            for i in range(to_pasuk):
                                alia_p.append(chapters_in_book[c][i])
                        else:
                            for i in chapters_in_book[c]:
                                alia_p.append(i)
                alia_accents = []
                for pasuk in alia_p:
                    for char in pasuk:
                        if unicodedata.name(char) in accent_names:
                            accent_vector = (accent_names == unicodedata.name(char)).astype(int)
                            alia_accents.append(accent_vector)
                #Add [START] and [END] vectors
                start = (accent_names == '[START]').astype(int)
                end = (accent_names == '[END]').astype(int)
                alia_accents = [start] + alia_accents + [end]

                audio_file = os.path.join('inputs', book_name, w_chap[5:], str(aliyot[alia]) + '.mp3')
                y, sr = librosa.load(audio_file, EXPECTED_SAMPLE_RATE)
                if len(y.shape) == 2:
                    y = y.mean(1)
                y = y.astype(np.float32)
                all_audio_data_after_augumentation = [
                                                      y,
                                                      add_noise(y, 0.05),
                                                      change_pitch(y, sr, 4),
                                                      change_pitch(y, sr, -6),
                                                      change_speed(y, 1.5),
                                                      change_speed(y, .75),
                ]
                for aud_data in all_audio_data_after_augumentation:
                    if aud_data.shape[0] % EXPECTED_SAMPLE_RATE:
                        zero_padding = EXPECTED_SAMPLE_RATE - (aud_data.shape[0] % EXPECTED_SAMPLE_RATE)
                        aud_data = np.pad(aud_data, (0, zero_padding))
                    aud_data = np.reshape(aud_data, (aud_data.shape[0] // EXPECTED_SAMPLE_RATE, EXPECTED_SAMPLE_RATE))
                    seq_of_specs = []
                    for frame in aud_data:
                        spectrogram = tf.signal.stft(frame, frame_length=255, frame_step=128)
                        spectrogram = tf.abs(spectrogram)
                        spectrogram = tf.reshape(spectrogram, spectrogram.shape + (1))
                        seq_of_specs.append(np.asarray(spectrogram))
                    yield seq_of_specs, alia_accents



## Here we build our generator for `Dataset`

In [5]:
dataset = generate_data() 

## And, finaly we will built our training model

In [6]:
num_labels = len(accent_names)
enc_input = Input(shape=(None, 124, 129, 1), name='input', dtype='float32')

enc_conv = Sequential([Conv2D(32, 3),
ReLU(0.01),
TimeDistributed(MaxPool2D(2, padding='same')),
Conv2D(64, 3),
ReLU(0.01),
TimeDistributed(MaxPool2D(2, padding='same')),
Conv2D(128, 3),
ReLU(0.01),
TimeDistributed(MaxPool2D(2, padding='same')),
Conv2D(256, 3),
ReLU(0.01),
TimeDistributed(MaxPool2D(3, padding='same')),
Conv2D(256, 3),
ReLU(0.01),
TimeDistributed(tf.keras.layers.MaxPool2D(3, padding='same'))])(enc_input)
#Output shape is (batch, sequence_length, 1, 1, 256)

enc_drop = Dropout(0.2)(enc_conv)
enc_reshape = TimeDistributed(Reshape((256,)))(enc_drop)

enc_output, enc_h, enc_c = LSTM(num_labels, return_state=True)(enc_reshape)

dec_input = Input(
    shape=(None, num_labels),
    name='decoder_input',
    dtype='float32'
)

decoder = LSTM(num_labels, return_sequences=True)(dec_input, initial_state=[enc_h, enc_c])
decoder = Dense(num_labels)(decoder)

model = Model([enc_input, dec_input], decoder)
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, None, 124,   0           []                               
                                129, 1)]                                                          
                                                                                                  
 sequential (Sequential)        (None, None, 1, 1,   977920      ['input[0][0]']                  
                                256)                                                              
                                                                                                  
 dropout (Dropout)              (None, None, 1, 1,   0           ['sequential[0][0]']             
                                256)                                                          

## Now we are training

In [None]:
import time
adam = tf.keras.optimizers.Adam()
accuracy = tf.keras.metrics.BinaryAccuracy()
model.compile(adam, 'categorical_crossentropy')

i = 0
for inp, tar in dataset:
    start = time.time()
    inp = np.asarray([inp])
    dec_inp = np.asarray([tar[:-1]])
    tar = np.asarray([tar[1:]])
    accuracy.reset_state()
    loss = model.train_on_batch([inp, dec_inp], tar)
    output = model.predict([inp, dec_inp])
    accuracy.update_state(tar, output)
    print(f'Step {i+1}, time taken {time.time()-start:.2f} sec, Loss {loss:.2f}, Accuracy {accuracy.result():.2f}\r')
print('\n')

## For now we just save the weights for future work

In [None]:
model.save_weights('askenaz_taamim_model.h5')