In [1]:
with open('enc-dec.txt') as f:
  lines = f.read().splitlines()

In [2]:
import numpy as np

word_len = 10
alphabet = " abcdefghijklmnopqrstuvwxyz"
alpha_index = dict((c,i) for i,c in enumerate(alphabet))
# print(alpha_index)
index_alpha = dict((i,c) for i,c in enumerate(alphabet))
# print(index_alpha)

def convert(data):
  one_hot = []
  for d in data:
    vec = np.zeros((len(alphabet)))
    vec[alpha_index[d]] = 1
    one_hot.append(vec)
    # print(vec)
  return np.array(one_hot)

x = convert("sehlckaohz")
print(x.shape)

(10, 27)


In [3]:
x = []
y = []

for line in lines:
  splited = line.split('\t')
  enc = splited[0]
  while len(enc) < 10:
    enc += ' '

  dec = splited[1]
  while len(dec) < 10:
    dec += ' '

  enc_onehot = convert(enc)
  dec_onehot = convert(dec)

  x.append(enc_onehot)
  y.append(dec_onehot)

In [4]:
x = np.array(x)
y = np.array(y)
print(x.shape)
print(y.shape)

(152273, 10, 27)
(152273, 10, 27)


In [5]:
from sklearn.model_selection import train_test_split

# shuffle data and divide into train and test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=4)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(137045, 10, 27)
(137045, 10, 27)
(15228, 10, 27)
(15228, 10, 27)


In [6]:
BATCH_SIZE = 128

In [7]:
from keras.models import Sequential
from keras.layers import Dense, GRU, Dropout, RepeatVector

def build_model(layers, hidden):
  model = Sequential()
  model.add(GRU(hidden, input_shape=(word_len, len(alphabet))))

  # As the decoder RNN's input, repeatedly provide with the last output of
  # RNN for each time step. Repeat 'word_len' times
  model.add(RepeatVector(word_len))

  # The decoder RNN could be multiple layers stacked or a single layer.
  for _ in range(layers):
      model.add(GRU(hidden, return_sequences=True))

  model.add(Dense(len(alphabet), activation='softmax'))

  return model

In [8]:
# 4 GRU layers + 128 hidden units
model1 = build_model(3, 128)
model1.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, 128)               60288     
                                                                 
 repeat_vector (RepeatVector  (None, 10, 128)          0         
 )                                                               
                                                                 
 gru_1 (GRU)                 (None, 10, 128)           99072     
                                                                 
 gru_2 (GRU)                 (None, 10, 128)           99072     
                                                                 
 gru_3 (GRU)                 (None, 10, 128)           99072     
                                                                 
 dense (Dense)               (None, 10, 27)            3483      
                                                        

In [10]:
history = model1.fit(x_train, y_train,
                     batch_size=BATCH_SIZE,
                     epochs=15,
                     validation_data=(x_test, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [16]:
model1.save('saved_model/model1.h5')

In [11]:
# 6 GRU layers + 128 hidden units
model2 = build_model(5, 128)
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_4 (GRU)                 (None, 128)               60288     
                                                                 
 repeat_vector_1 (RepeatVect  (None, 10, 128)          0         
 or)                                                             
                                                                 
 gru_5 (GRU)                 (None, 10, 128)           99072     
                                                                 
 gru_6 (GRU)                 (None, 10, 128)           99072     
                                                                 
 gru_7 (GRU)                 (None, 10, 128)           99072     
                                                                 
 gru_8 (GRU)                 (None, 10, 128)           99072     
                                                      

In [12]:
history2 = model2.fit(x_train, y_train,
                     batch_size=BATCH_SIZE,
                     epochs=15,
                     validation_data=(x_test, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [17]:
model2.save('saved_model/model2.h5')

In [20]:
from keras.models import load_model

# train model2 for 10 more epochs (25 epochs in total)
model3 = load_model('saved_model/model2.h5')
history3 = model3.fit(x_train, y_train,
                     batch_size=BATCH_SIZE,
                     epochs=10,
                     validation_data=(x_test, y_test))
# improved 4%

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [21]:
model3.save('saved_model/model3.h5')

In [22]:
# 4 GRU layers + 256 hidden units
model4 = build_model(3, 256)
model4.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model4.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_10 (GRU)                (None, 256)               218880    
                                                                 
 repeat_vector_2 (RepeatVect  (None, 10, 256)          0         
 or)                                                             
                                                                 
 gru_11 (GRU)                (None, 10, 256)           394752    
                                                                 
 gru_12 (GRU)                (None, 10, 256)           394752    
                                                                 
 gru_13 (GRU)                (None, 10, 256)           394752    
                                                                 
 dense_2 (Dense)             (None, 10, 27)            6939      
                                                      

In [23]:
history4 = model4.fit(x_train, y_train,
                     batch_size=BATCH_SIZE,
                     epochs=15,
                     validation_data=(x_test, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [24]:
model4.save('saved_model/model4.h5')

In [25]:
# 6 GRU layers + 256 hidden units
model5 = build_model(5, 256)
model5.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model5.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_14 (GRU)                (None, 256)               218880    
                                                                 
 repeat_vector_3 (RepeatVect  (None, 10, 256)          0         
 or)                                                             
                                                                 
 gru_15 (GRU)                (None, 10, 256)           394752    
                                                                 
 gru_16 (GRU)                (None, 10, 256)           394752    
                                                                 
 gru_17 (GRU)                (None, 10, 256)           394752    
                                                                 
 gru_18 (GRU)                (None, 10, 256)           394752    
                                                      

In [26]:
history5 = model5.fit(x_train, y_train,
                     batch_size=BATCH_SIZE,
                     epochs=15,
                     validation_data=(x_test, y_test))

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [27]:
model5.save('saved_model/model5.h5')

In [36]:
# model4 has the best val accuracy
# so use it to decode the password
coded = "onmltsrqpoihgrezcba lknrvjihgfueiizltflk"

one_hot_coded = []
# prepocess the coded string
for i in range(0, len(coded), word_len):
  word = coded[i:i+word_len]
  one_hot = convert(word)
  one_hot_coded.append(one_hot)

one_hot_coded = np.array(one_hot_coded)
print(one_hot_coded.shape)

(4, 10, 27)


In [38]:
decoded = model4.predict(one_hot_coded)
print(decoded.shape)

(4, 10, 27)


In [44]:
max_prob = np.argmax(decoded, axis=-1)
print(max_prob.shape)
sentence = []
for w in max_prob:
  word = ""
  for ch in w:
    word += index_alpha[ch]
  sentence.append(word)  

(4, 10)


In [45]:
print(sentence)

['    i     ', '   love   ', 's deep    ', 'plearning ']
