In [1]:
import pandas as pd
from models.utils import get_all_characters, get_vocab, get_inv_vocab, preprocess_data
from models.rnn_attention import initialize_shared_weights, custom_model
from keras.optimizers import Adam
import numpy as np

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv('./data/data.csv')

In [3]:
dataset.head(10)

Unnamed: 0.1,Unnamed: 0,en,ig
0,0,into two,yiwa
1,1,"11 Like a dog that returns to its vomit, The s...",11 Dị nnọọ ka nkịta nke na-alaghachi n’agbọ ọ ...
2,2,rush,kpọwa
3,3,"4 So his father-in-law, the young woman’s fath...","4 Ọgọ nwoke ahụ, bụ́ nna nwa agbọghọ ahụ, ekwe..."
4,4,trap for animals,igbụdụ
5,5,"16 A day of the horn and of the battle cry, Ag...","16 ụbọchị ụda opi na nke iti mkpu agha, megide..."
6,6,"15 After all these things, Abʹsa·lom acquired ...",15 O wee ruo mgbe ihe ndị ahụ gasịrị na Absalọ...
7,7,cat,bụusụ
8,8,Rhynchophorus phoenicis,akpa nkwu
9,9,7 The length of time that David lived in the c...,7 Ụbọchị niile Devid biri n’ime ime obodo ndị ...


In [4]:
english_texts = dataset['en'].tolist()
igbo_texts = dataset['ig'].tolist()

In [5]:
english_characters = get_all_characters(english_texts)
igbo_characters = get_all_characters(igbo_texts)

In [6]:
num_english_tokens = len(english_characters)
num_igbo_tokens = len(igbo_characters)
Tx = 150
Ty = 170

In [7]:
print("Number of input", len(english_texts))
print("Number of unique igbo characters", num_igbo_tokens)
print("Number of unique english characters", num_english_tokens)

Number of input 45165
Number of unique igbo characters 117
Number of unique english characters 96


In [8]:
igbo_vocab = get_vocab(igbo_characters)
english_vocab = get_vocab(english_characters)
igbo_inv_vocab = get_inv_vocab(igbo_vocab)

In [9]:
num_english_tokens = len(english_characters)
num_igbo_tokens = len(igbo_characters)
print(igbo_vocab)
print(igbo_inv_vocab)
print(len(igbo_vocab))
print(len(igbo_inv_vocab))

defaultdict(<function get_vocab.<locals>.<lambda> at 0x000001386730E048>, {'<bos>': 0, '<pad>': 1, '<eos>': 2, '<unk>': 3, '\x1f': 4, ' ': 5, '!': 6, '"': 7, '#': 8, "'": 9, '(': 10, ')': 11, ',': 12, '-': 13, '.': 14, '/': 15, '0': 16, '1': 17, '2': 18, '3': 19, '4': 20, '5': 21, '6': 22, '7': 23, '8': 24, '9': 25, ':': 26, ';': 27, '?': 28, 'A': 29, 'B': 30, 'C': 31, 'D': 32, 'E': 33, 'F': 34, 'G': 35, 'H': 36, 'I': 37, 'J': 38, 'K': 39, 'L': 40, 'M': 41, 'N': 42, 'O': 43, 'P': 44, 'R': 45, 'S': 46, 'T': 47, 'U': 48, 'V': 49, 'W': 50, 'Y': 51, 'Z': 52, '[': 53, ']': 54, '`': 55, 'a': 56, 'b': 57, 'c': 58, 'd': 59, 'e': 60, 'f': 61, 'g': 62, 'h': 63, 'i': 64, 'j': 65, 'k': 66, 'l': 67, 'm': 68, 'n': 69, 'o': 70, 'p': 71, 'q': 72, 'r': 73, 's': 74, 't': 75, 'u': 76, 'v': 77, 'w': 78, 'y': 79, 'z': 80, '{': 81, '}': 82, '\xa0': 83, 'À': 84, 'Á': 85, 'È': 86, 'É': 87, 'Ì': 88, 'Í': 89, 'Ò': 90, 'Ó': 91, 'Ù': 92, 'à': 93, 'á': 94, 'è': 95, 'é': 96, 'ì': 97, 'í': 98, 'ò': 99, 'ó': 100, 'ù'

In [10]:
X, Xoh = preprocess_data(english_texts, english_vocab, 150)
Y, Yoh = preprocess_data(igbo_texts, igbo_vocab, 170)

In [27]:
n_s = 256
n_a = 128

In [12]:
repeator, concatenator, densor_1, densor_2, activator, dotor, post_attention_LSTM, output_layer = initialize_shared_weights(Tx, n_s, igbo_vocab)

In [19]:
Xoh.shape

(45165, 150, 100)

In [14]:
outputs = list(Yoh.swapaxes(0, 1))

In [15]:
len(outputs)

170

In [22]:
model = custom_model(Tx, Ty, n_a, n_s, len(english_vocab), len(igbo_vocab), repeator, concatenator, densor_1, densor_2, activator, dotor, post_attention_LSTM, output_layer)

In [23]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 150, 100)     0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 64)           0                                            
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 150, 64)      34048       input_2[0][0]                    
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector)  (None, 150, 64)      0           s0[0][0]                         
                                                                 lstm_1[170][0]                   
          

                                                                 bidirectional_2[0][0]            
                                                                 attentional_weights[291][0]      
                                                                 bidirectional_2[0][0]            
                                                                 attentional_weights[292][0]      
                                                                 bidirectional_2[0][0]            
                                                                 attentional_weights[293][0]      
                                                                 bidirectional_2[0][0]            
                                                                 attentional_weights[294][0]      
                                                                 bidirectional_2[0][0]            
                                                                 attentional_weights[295][0]      
          

In [24]:
opt = Adam(lr=0.005, decay=0.01)
model.compile(opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:
m = Xoh.shape[0]
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
ouputs = list(Yoh.swapaxes(0, 1))

NameError: name 'np' is not defined

In [None]:
model.fit([Xoh, s0, c0], outputs, epochs=10, batch_size=128)