In [15]:
from tensorflow.keras.layers import Input, Embedding, LSTM, TimeDistributed, Dense, Concatenate, Bidirectional
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.preprocessing.sequence import pad_sequences
from BahdanauAttention import AttentionLayer
import numpy as np
import random
import eng_to_ipa as ipa
import pickle

In [16]:
class bi_model:
    def __init__(self, max_encoder_len, max_decoder_len, build_inference_model_encoder_vocab, num_decoder_vocab):
        self.latent_dim = 256
        self.embedding_dim = 200
        self.max_encoder_len = max_encoder_len
        self.max_decoder_len = max_decoder_len
        self.num_encoder_vocab = num_encoder_vocab
        self.num_decoder_vocab = num_decoder_vocab
        self.encoder_model_inference = None
        self.decoder_model_inference = None

        self.build_encoder()
        self.build_decoder()
        self.training_model = Model([self.encoder_inputs, self.decoder_inputs], self.decoder_outputs) 
        
    def build_encoder(self):
        self.encoder_inputs = Input(shape=(self.max_encoder_len,)) 
        # Embedding layer- i am using 1024 output-dim for embedding you can try diff values 100,256,512,1000
        self.enc_emb = Embedding(self.num_encoder_vocab, self.embedding_dim, trainable = True)(self.encoder_inputs)

        # Bidirectional lstm layer
        self.enc_lstm1 = Bidirectional(LSTM(self.latent_dim,return_sequences=True,return_state=True))
        self.encoder_outputs1, self.forw_state_h, self.forw_state_c, self.back_state_h, self.back_state_c = self.enc_lstm1(self.enc_emb)

        # Concatenate both h and c 
        self.final_enc_h = Concatenate()([self.forw_state_h,self.back_state_h])
        self.final_enc_c = Concatenate()([self.forw_state_c,self.back_state_c])

        # get Context vector
        self.encoder_states =[self.final_enc_h, self.final_enc_c]
    def build_decoder(self):
        self.decoder_inputs = Input(shape=(None,)) 

        # decoder embedding with same number as encoder embedding
        self.dec_emb_layer = Embedding(self.num_decoder_vocab, self.embedding_dim) 
        self.dec_emb = self.dec_emb_layer(self.decoder_inputs)   # apply this way because we need embedding layer for prediction 

        # In encoder we used Bidirectional so it's having two LSTM's so we have to take double units(256*2=512) for single decoder lstm
        # LSTM using encoder's final states as initial state
        self.decoder_lstm = LSTM(self.latent_dim*2, return_sequences=True, return_state=True) 
        self.decoder_outputs, _, _ = self.decoder_lstm(self.dec_emb, initial_state=self.encoder_states)

        # Using Attention Layer
        self.attention_layer = AttentionLayer()
        self.attention_result, self.attention_weights = self.attention_layer([self.encoder_outputs1, self.decoder_outputs])

        # Concat attention output and decoder LSTM output 
        self.decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([self.decoder_outputs, self.attention_result])

        # Dense layer with softmax
        self.decoder_dense = Dense(self.num_decoder_vocab, activation='softmax')
        self.decoder_outputs = self.decoder_dense(self.decoder_concat_input)
        
    def compile(self):
        self.training_model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics = ['acc'])
    
    def fit(self, x_tr, y_tr_in, y_tr_out, x_test, y_test_in, y_test_out, ep, batch_size):
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
        ck = ModelCheckpoint(filepath='segmenter_best_weights.h5', monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        Callbacks = [es, ck]
        self.training_model.fit([x_tr,y_tr_in], y_tr_out, epochs = ep, callbacks=Callbacks, batch_size = batch_size, validation_data=(([x_test,y_test_in]), y_test_out))

    def build_inference_model(self):
        self.encoder_model_inference = Model(self.encoder_inputs, outputs = [self.encoder_outputs1, self.final_enc_h, self.final_enc_c])
        self.encoder_model_inference.save('final_encoder_model_segmenter.h5')

        # Decoder Inference
        self.decoder_state_h = Input(shape=(self.latent_dim*2,)) # This numbers has to be same as units of lstm's on which model is trained
        self.decoder_state_c = Input(shape=(self.latent_dim*2,))

        # we need hidden state for attention layer
        # 36 is maximum length if english sentence It has to same as input taken by attention layer can see in model plot
        self.decoder_hidden_state_input = Input(shape=(self.max_encoder_len,self.latent_dim*2)) 
        # get decoder states
        self.dec_states = [self.decoder_state_h, self.decoder_state_c]

        # embedding layer 
        self.dec_emb2 = self.dec_emb_layer(self.decoder_inputs)
        self.decoder_outputs2, self.state_h2, self.state_c2 = self.decoder_lstm(self.dec_emb2, initial_state=self.dec_states)

        # Attention inference
        self.attention_result_inf, self.attention_weights_inf = self.attention_layer([self.decoder_hidden_state_input, self.decoder_outputs2])
        self.decoder_concat_input_inf = Concatenate(axis=-1, name='concat_layer')([self.decoder_outputs2, self.attention_result_inf])

        self.dec_states2= [self.state_h2, self.state_c2]
        self.decoder_outputs2 = self.decoder_dense(self.decoder_concat_input_inf)

        # get decoder model
        self.decoder_model_inference= Model(
                            [self.decoder_inputs] + [self.decoder_hidden_state_input, self.decoder_state_h, self.decoder_state_c],
                             [self.decoder_outputs2]+ self.dec_states2)
        self.decoder_model_inference.save('final_decoder_model_segmenter.h5')
        
    def decode_sequence(self, input_seq, i2o, o2i):
        e_out ,e_h, e_c = self.encoder_model_inference.predict(input_seq, verbose = 0)
        target_seq = np.zeros((1,1))
        target_seq[0,0] = o2i['<']

        stop_condition = False
        decoded_sentence = []

        while not stop_condition:
            (output_tokens, h, c) = self.decoder_model_inference.predict([target_seq] + [e_out, e_h, e_c], verbose = 0)

            # Sample a token
            sampled_token_index = np.argmax(output_tokens[0, -1, :])
            sampled_token = i2o[sampled_token_index]   

            if sampled_token != '>':
                decoded_sentence += [sampled_token]

            # Exit condition: either hit max length or find the stop word.
            if (sampled_token == '>') or (len(decoded_sentence) >= self.max_decoder_len):
                stop_condition = True

            # Update the target sequence (of length 1)
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_token_index

            # Update internal states
            (e_h, e_c) = (h, c)
        return decoded_sentence
    def word2seq(self, a2i, input_word):
        final_seq = []
        for c in input_word:
            final_seq += [a2i[c]]
        final_seq = pad_sequences([final_seq], maxlen=self.max_encoder_len, padding='post')[0]
        return final_seq
    
    def translate(self, input_word, a2i, i2o, o2i):
        seq = self.word2seq(a2i, input_word).reshape(1, self.max_encoder_len)
        return self.decode_sequence(seq, i2o, o2i)

In [46]:
dataset_file = open('dataset.txt')

dataset = dataset_file.readlines()
dataset = [word.strip('\n') for word in dataset]
random.shuffle(dataset)

In [47]:
# prepare parameters

# fetch metadata
metadata_file = open('metadata_ipa.txt')
metadata = metadata_file.readlines()
metadata = [line.strip('\n') for line in metadata]
metadata = [line.split('\t') for line in metadata]
metadata = [line[-1] for line in metadata]

max_encoder_len = int(metadata[0])
max_decoder_len = int(metadata[1])
num_encoder_vocab = int(metadata[2])
num_decoder_vocab = int(metadata[3])

# fetch dictionaries
e2i_file = open('ipa_e2i.pkl', 'rb')
i2e_file = open('ipa_i2e.pkl', 'rb')
d2i_file = open('ipa_d2i.pkl', 'rb')
i2d_file = open('ipa_i2d.pkl', 'rb')

e2i = pickle.load(e2i_file)
i2e = pickle.load(i2e_file)
d2i = pickle.load(d2i_file)
i2d = pickle.load(i2d_file)

e2i_file.close()
i2e_file.close()
d2i_file.close()
i2d_file.close()
metadata_file.close()

In [48]:
syl2ipa = bi_model(max_encoder_len, max_decoder_len, num_encoder_vocab, num_decoder_vocab)
syl2ipa.training_model.load_weights('syllable_translator_best_weights.h5')
syl2ipa.build_inference_model()









In [53]:
training_data_ipa = open('training_data_ipa2ipa.txt', 'w+', encoding = 'utf-8')
metadata_ipa_file = open('metadata_ipa2ipa.txt', 'w+')

ipa_max_encoder_len = 0
ipa_max_decoder_len = 0

data_counter = 0

ipa_encoder_vocab = []
ipa_decoder_vocab = []

def ipa_splitter(line):
    global training_data_ipa
    global ipa_max_encoder_len
    global ipa_max_decoder_len
    global ipa_encoder_vocab
    global ipa_decoder_vocab
    global data_counter
    word = line.replace(';', '')
    
    actual_translation = ipa.convert(word)
    if(not(actual_translation.endswith('*'))):
        actual_translation = '<' + actual_translation + '>'
        syllables = line.split(';')
        attempted_translation = ""
        for syl in syllables:
            translation = seq2word(syl2ipa.translate(syl, e2i, i2d, d2i))
            attempted_translation += translation
        ipa_max_encoder_len = max(len(attempted_translation),ipa_max_encoder_len)
        ipa_max_decoder_len = max(len(actual_translation),ipa_max_decoder_len)
        
        training_data_ipa.write(word + '\t' + attempted_translation + '\t' + actual_translation + '\n')
        data_counter += 1
        for c in attempted_translation:
            if c not in ipa_encoder_vocab:
                ipa_encoder_vocab += [c]
        for c in actual_translation:
            if c not in ipa_decoder_vocab:
                ipa_decoder_vocab += [c]

In [54]:
# creating syllable to ipa data
for line in dataset[:2000]:
    ipa_splitter(line)
    print(data_counter, end='\r')
print("completed")

ipa_encoder_vocab = sorted(ipa_encoder_vocab)
ipa_decoder_vocab = sorted(ipa_decoder_vocab)

ipa_encoder_to_int = dict((a,i) for i,a in enumerate(ipa_encoder_vocab, 1))
ipa_int_to_encoder = dict((i,a) for i,a in enumerate(ipa_encoder_vocab, 1))

ipa_decoder_to_int = dict((a,i) for i,a in enumerate(ipa_decoder_vocab, 1))
ipa_int_to_decoder = dict((i,a) for i,a in enumerate(ipa_decoder_vocab, 1))

ipa_num_encoder_vocab = len(ipa_encoder_vocab) + 1
ipa_num_decoder_vocab = len(ipa_decoder_vocab) + 1    

training_data_ipa.close()
metadata_ipa_file.write("ipa_max_encoder_len: " + '\t' + str(ipa_max_encoder_len) + '\n')
metadata_ipa_file.write("ipa_max_decoder_len: " + '\t' + str(ipa_max_decoder_len) + '\n')
metadata_ipa_file.write("ipa_num_encoder_vocab: " + '\t' + str(ipa_num_encoder_vocab) + '\n')
metadata_ipa_file.write("ipa_num_decoder_vocab: " + '\t' + str(ipa_num_decoder_vocab) + '\n')
metadata_ipa_file.close()

completed


In [None]:
e2i_file = open('ipa2ipa_e2i.pkl' , 'wb')
i2e_file = open('ipa2ipa_i2e.pkl' , 'wb')
d2i_file = open('ipa2ipa_d2i.pkl' , 'wb')
i2d_file = open('ipa2ipa_i2d.pkl' , 'wb')

pickle.dump(ipa_encoder_to_int, e2i_file)
pickle.dump(ipa_int_to_encoder, i2e_file)
pickle.dump(ipa_decoder_to_int, d2i_file)
pickle.dump(ipa_int_to_decoder, i2d_file)

e2i_file.close()
i2e_file.close()
d2i_file.close()
i2d_file.close()

In [19]:
def seq2word(array):
    final_string = ""
    for c in array:
        final_string += c
    return final_string