In [13]:
import numpy as np
from tensorflow.keras.layers import Input, GRU, Dense, RepeatVector
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau

In [14]:
text = open("enc-dec.txt").read().split("\n")

In [15]:
symbols = "abcdefghijklmnopqrstuvwxyz "
n_symbols = len(symbols)
n_lines = len(text)
n_words = 10

In [16]:
EPOCHS = 30
BATCH_SIZE = 32

In [17]:
def add_spaces(string, length):
    result = string
    string_length = len(string)
    if string_length < length:
        result += (length - string_length) * " "
    return result

In [18]:
x_train = np.zeros((n_lines, n_words, n_symbols))
y_train = np.zeros((n_lines, n_words, n_symbols))

for i in range(n_lines):
    [code, word] = text[i].split("\t")
    code = add_spaces(code, n_words)
    word = add_spaces(word, n_words)
    for j in range(n_words):
        x_train[i, j, symbols.find(code[j])] = 1
        y_train[i, j, symbols.find(word[j])] = 1

print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)

x_train shape: (152273, 10, 27)
y_train shape: (152273, 10, 27)


In [19]:
sample = "onmltsrqpoihgrezcba lknrvjihgfueiizltflk"
codes = []
for i in range(0, len(sample), n_words):
    codes.append(sample[i:i+n_words])

print(codes)

['onmltsrqpo', 'ihgrezcba ', 'lknrvjihgf', 'ueiizltflk']


In [20]:
x_test = np.zeros((len(codes), n_words, n_symbols))

for i in range(len(codes)):
    for j in range(n_words):
        x_test[i, j, symbols.find(codes[i][j])] = 1

print("x_test shape:", x_test.shape)

x_test shape: (4, 10, 27)


In [21]:
def model_factory(input_shape):
    n_words, n_symbols = input_shape
    inputs = Input(shape=input_shape)
    x = GRU(units=256)(inputs)
    x = RepeatVector(n_words)(x)
    x = GRU(units=256, return_sequences=True)(x)
    x = GRU(units=256, return_sequences=True)(x)
    x = GRU(units=256, return_sequences=True)(x)
    outputs = Dense(units=n_symbols, activation="softmax")(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(
        loss=CategoricalCrossentropy(),
        optimizer=Adam(0.001),
        metrics=["accuracy"],
    )
    return model

In [22]:
input_shape = (n_words, n_symbols)
model = model_factory(input_shape)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 10, 27)]          0         
                                                                 
 gru_4 (GRU)                 (None, 256)               218880    
                                                                 
 repeat_vector_1 (RepeatVect  (None, 10, 256)          0         
 or)                                                             
                                                                 
 gru_5 (GRU)                 (None, 10, 256)           394752    
                                                                 
 gru_6 (GRU)                 (None, 10, 256)           394752    
                                                                 
 gru_7 (GRU)                 (None, 10, 256)           394752    
                                                           

In [23]:
lr_reducer = ReduceLROnPlateau(
    monitor='loss',
    factor=0.1,
    patience=3,
    verbose=0,
    min_delta=0.01,
    cooldown=0,
    min_lr=0,
)
history = model.fit(
    x=x_train,
    y=y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[lr_reducer],
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [24]:
prediction = model.predict(x_test)
decoded = ""
for i in range(x_test.shape[0]):
    for j in range(x_test.shape[1]):
        decoded += symbols[np.argmax(prediction[i, j])]
print(decoded)

    i        love     deep    clearning 
