In [21]:
from tensorflow.keras.layers import Input, Embedding, LSTM, TimeDistributed, Dense
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.translate.bleu_score import sentence_bleu
import eng_to_ipa as ipa
import numpy as np
import random
import pickle

In [61]:
class s2s_model:
    def __init__(self, max_encoder_len, max_decoder_len, num_encoder_vocab, num_decoder_vocab):
        self.latent_dim = 256
        self.embedding_dim = 200
        self.max_encoder_len = max_encoder_len
        self.max_decoder_len = max_decoder_len
        self.num_encoder_vocab = num_encoder_vocab
        self.num_decoder_vocab = num_decoder_vocab
        
        self.build_encoder()
        self.build_decoder()
        
        self.training_model = Model([self.encoder_inputs, self.decoder_inputs], self.decoder_outputs)
        
    def build_encoder(self):
        self.encoder_inputs = Input(shape=(self.max_encoder_len, ))
        self.encoder_embed = Embedding(self.num_encoder_vocab, self.embedding_dim, trainable=True)(self.encoder_inputs)

        self.encoder_LSTM1 = LSTM(self.latent_dim, return_sequences=True, return_state=True, dropout = 0.4, recurrent_dropout = 0.4)
        self.encoder_output1, self.state_h1, self.state_c1 = self.encoder_LSTM1(self.encoder_embed)

        self.encoder_LSTM2 = LSTM(self.latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4)
        self.encoder_output2, self.state_h2, self.state_c2 = self.encoder_LSTM2(self.encoder_output1) # encoder LSTMs feed into each other

        self.encoder_LSTM3 = LSTM(self.latent_dim, return_sequences=True, return_state=True, dropout=0.4, recurrent_dropout=0.4)
        self.encoder_output, self.state_h, self.state_c = self.encoder_LSTM3(self.encoder_output2) # final outputs and states to pass to decoder LSTM
        
    def build_decoder(self):
        self.decoder_inputs = Input(shape=(None,))

        # define layer architecture, then match to inputs
        self.decoder_embed_layer = Embedding(self.num_decoder_vocab, self.embedding_dim, trainable=True)
        self.decoder_embed = self.decoder_embed_layer(self.decoder_inputs)

        # decoder LSTM layer
        self.decoder_LSTM = LSTM(self.latent_dim, return_sequences=True, return_state= True, dropout=0.4, recurrent_dropout=0.2)
        self.decoder_outputs, self.decoder_fwd_state, self.decoder_back_state = self.decoder_LSTM(self.decoder_embed, initial_state=[self.state_h, self.state_c])

        # dense layer (output layer)
        # keras.layers.TimeDistributed layer considers temporal dimension
        # Every input should be at least 3D, and the dimension of index one of the first input will be considered to be the temporal dimension.
        self.decoder_dense = TimeDistributed(Dense(self.num_decoder_vocab, activation='softmax'))
        self.decoder_outputs = self.decoder_dense(self.decoder_outputs)
        
    def compile(self):
        self.training_model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics = ['acc'])
        
    def fit(self, x_tr, y_tr_in, y_tr_out, x_test, y_test_in, y_test_out, ep, batch_size):
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
        ck = ModelCheckpoint(filepath='model_best_weights.h5', monitor='val_acc', verbose=2, save_best_only=True, mode='max')
        Callbacks = [es, ck]
        self.training_model.fit([x_tr,y_tr_in], y_tr_out, epochs = ep, callbacks=Callbacks, batch_size = batch_size, validation_data=(([x_test,y_test_in]), y_test_out))
    
    def build_inference_model(self):
        self.inference_encoder_model = Model(inputs= self.encoder_inputs, outputs=[self.encoder_output, self.state_h, self.state_c])

        self.inference_encoder_model.save('final_encoder_model.h5')

        # decoder setup
        self.decoder_state_input_h = Input(shape=(self.latent_dim,))
        self.decoder_state_input_c = Input(shape=(self.latent_dim,))
        self.decoder_hidden_state_input = Input(shape=(self.max_encoder_len, self.latent_dim))

        self.decoder_embed_i = self.decoder_embed_layer(self.decoder_inputs)

        self.decoder_output_i, self.state_h_i, self.state_c_i = self.decoder_LSTM(self.decoder_embed_i, initial_state = [self.decoder_state_input_h, self.decoder_state_input_c])

        self.decoder_output_i = self.decoder_dense(self.decoder_output_i)

        # final decoder inference model
        self.inference_decoder_model = Model([self.decoder_inputs] + [self.decoder_hidden_state_input, self.decoder_state_input_h, self.decoder_state_input_c], [self.decoder_output_i] + [self.state_h_i, self.state_c_i])

        # save the final inference model
        self.inference_decoder_model.save('final_segmenter_model.h5')
        
    def decode_sequence(self, input_seq, i2o, o2i):
        e_out,e_h, e_c = self.inference_encoder_model.predict(input_seq, verbose = 0)
        target_seq = np.zeros((1,1))
        target_seq[0,0] = o2i['<']

        stop_condition = False
        decoded_sentence = []

        while not stop_condition:
            (output_tokens, h, c) = self.inference_decoder_model.predict([target_seq] + [e_out, e_h, e_c], verbose = 0)

            # Sample a token
            sampled_token_index = np.argmax(output_tokens[0, -1, :])
            sampled_token = i2o[sampled_token_index]   

            if sampled_token != '>':
                decoded_sentence += [sampled_token]

            # Exit condition: either hit max length or find the stop word.
            if (sampled_token == '>') or (len(decoded_sentence) >= self.max_decoder_len):
                stop_condition = True

            # Update the target sequence (of length 1)
            target_seq = np.zeros((1, 1))
            target_seq[0, 0] = sampled_token_index

            # Update internal states
            (e_h, e_c) = (h, c)
        return decoded_sentence
    def word2seq(self, a2i, input_word):
        final_seq = []
        for c in input_word:
            final_seq += [a2i[c]]
        final_seq = pad_sequences([final_seq], maxlen=self.max_encoder_len, padding='post')[0]
        return final_seq
    
    def translate(self, input_word, a2i, i2o, o2i):
        seq = self.word2seq(a2i, input_word).reshape(1, self.max_encoder_len)
        return self.decode_sequence(seq, i2o, o2i)
    def load_inference_model(self, encoder_filename, decoder_filename):
        self.inference_encoder_model = load_model(encoder_filename, compile=False)
        self.inference_decoder_model = load_model(decoder_filename, compile=False)

In [62]:
testing_translator = s2s_model(10, 12, 28, 44)
testing_segmenter = s2s_model(9, 12, 27, 30)

In [77]:
testing_translator.load_inference_model('final_encoder_model_translator.h5', 'final_decoder_model_translator.h5')
testing_segmenter.load_inference_model('final_encoder_model_segmenter.h5', 'final_decoder_model_segmenter.h5')

In [78]:
ipa2i_file = open("ipa2i.pkl","rb")
o2i_file = open("o2i.pkl","rb")
i2ipa_file = open("i2ipa.pkl","rb")
i2o_file = open("i2o.pkl","rb")

a2i_file = open("a2i.pkl","rb")
o2i_file_seg = open("o2i_seg.pkl","rb")
i2a_file = open("i2a.pkl","rb")
i2o_file_seg = open("i2o_seg.pkl","rb")

new_words_file = open('valid_validation.txt')
new_words_list = new_words_file.readlines()
new_words_list = [word.strip('\n') for word in new_words_list]

ipa2i = pickle.load(ipa2i_file)
o2i = pickle.load(o2i_file)
i2ipa = pickle.load(i2ipa_file)
i2o = pickle.load(i2o_file)

a2i = pickle.load(a2i_file)
o2i_seg = pickle.load(o2i_file_seg)
i2a = pickle.load(i2a_file)
i2o_seg = pickle.load(i2o_file_seg)

In [76]:
output_file = open('output.txt', 'w+', encoding='UTF-8')

num_words = 0
sum_bleu = 0
quit

for new_word in new_words_list:
    segmented_array = testing_segmenter.translate(new_word, a2i, i2o_seg, o2i_seg)
    segmented_string = convert_to_string(segmented_array)
    ipa_array = testing_translator.translate(segmented_string, o2i, i2ipa, ipa2i)
    translated_string = convert_to_string(ipa_array)
    actual_translation = ipa.convert(new_word)
    bleu_score = -1.0
    if(actual_translation != '*'):
        bleu_score = calc_bleu(actual_translation, translated_string)
        sum_bleu += bleu_score
        num_words += 1
    output_file.write(new_word + '\t' + segmented_string + '\t' + translated_string + '\t' + actual_translation + '\t' + '%f'%bleu_score + '\n')
output_file.write("Number of words with translations = " + str(num_words) + '\n')
output_file.write("Average  Bleu Score = " + '%f'%(sum_bleu/num_words) + '\n')

print("Number of words with translations = ", num_words)
print("Average  Bleu Score = " + '%f'%(sum_bleu/num_words))

output_file.close()

Number of words with translations =  496
Average  Bleu Score = 0.538431


In [10]:
def convert_to_string(array):
    final = ""
    for c in array:
        final += c
    return final

In [42]:
def calc_bleu(ref, cand):
    reference = [*ref]
    candidate = [*cand]
    return sentence_bleu(reference, candidate, weights=(1, 0, 0, 0))

In [89]:
exit_words = ['quit', 'exit', 'stop']

input_word = ""

print("This program does its best to segment a given mono syllabic word into its constituent onset-rime pair, and outputs an IPA transcription of the word.")
print("This network works best on single syllable words with common rimes.")

while(True):
    input_word = input("Enter a monosyllabic word that does not contain the letter X, or 'quit' to exit: ")
    if(input_word in exit_words):
        print("Goodbye!")
        break
    segmented_array = testing_segmenter.translate(input_word, a2i, i2o_seg, o2i_seg)
    segmented_string = convert_to_string(segmented_array)
    ipa_array = testing_translator.translate(segmented_string, o2i, i2ipa, ipa2i)
    translated_string = convert_to_string(ipa_array)
    actual_translation = ipa.convert(input_word)
    print("Word segmented by onset-rime pair: " + segmented_string)
    print("Best guess ipa translation: " + translated_string)
    print("Actual ipa translation: " + actual_translation)

This program does its best to segment a given mono syllabic word into its constituent onset-rime pair, and outputs an IPA transcription of the word.
This network works best on single syllable words with common rimes.


Enter a monosyllabic word that does not contain the letter X, or 'quit' to exit:  trop


Word segmented by onset-rime pair: tr,op
Best guess ipa translation: trop*
Actual ipa translation: trop*


Enter a monosyllabic word that does not contain the letter X, or 'quit' to exit:  mlop


Word segmented by onset-rime pair: m,op
Best guess ipa translation: mɑp
Actual ipa translation: mlop*


Enter a monosyllabic word that does not contain the letter X, or 'quit' to exit:  krhing


Word segmented by onset-rime pair: kh,ump 
Best guess ipa translation: khump*
Actual ipa translation: krhing*


Enter a monosyllabic word that does not contain the letter X, or 'quit' to exit:  exit


Goodbye!
