In [27]:
from IPython import get_ipython
ipython = get_ipython()

if '__IPYTHON__' in globals():
    ipython.magic('load_ext autoreload')
    ipython.magic('autoreload 1')

import helper
from keras.models import Sequential
from keras.layers import GRU, Dense, TimeDistributed, RepeatVector, Bidirectional, Dropout
from keras.layers.embeddings import Embedding
from keras.optimizers import Adam
from keras.losses import sparse_categorical_crossentropy
from keras import callbacks
import collections
#Verify access to the GPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

#Load Data   
preproc_source_sentences, preproc_target_sentences, source_tokenizer, target_tokenizer =\
helper.Pickle_out_data("Preprocessed_Data.pickle")
X_train, X_test, Y_train, Y_test=helper.Pickle_out_data("Split_Test_Train_Data.pickle")
print('Preprocessed data Loaded')
embedding_matrix=helper.Pickle_out_data("embedded.pickle")   
print ('Embedding Matrix Loaded')
max_source_sequence_length = preproc_source_sentences.shape[1]
max_target_sequence_length = preproc_target_sentences.shape[1]
source_vocab_size = len(source_tokenizer.word_index)+1
target_vocab_size = len(target_tokenizer.word_index)+1

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 12831845383859684815
]
Preprocessed data Loaded
Embedding Matrix Loaded


In [28]:
"""I just loaded the dataset for giving you some information. 
you do not need to load that for training the model
the dataset was already preprocessed and sevad as variable"""
source_path = 'data/small_vocab_en'
target_path = 'data/small_vocab_fr'
source_sentences = load_data(source_path)
target_sentences = load_data(target_path)
print('Dataset Loaded')
source_words_counter = collections.Counter([word for sentence in source_sentences for word in sentence.split()])
target_words_counter = collections.Counter([word for sentence in target_sentences for word in sentence.split()])

print('{} English words.'.format(len([word for sentence in source_sentences for word in sentence.split()])))
print('{} unique English words.'.format(source_vocab_size-1))

print()
print('{} French words.'.format(len([word for sentence in target_sentences for word in sentence.split()])))
print('{} unique English words.'.format(target_vocab_size-1))

Dataset Loaded
1823250 English words.
199 unique English words.

1961295 French words.
345 unique English words.


In [24]:
# Define Model
def model_final(input_shape, output_sequence_length, s_size, t_size):
    """  
    Build and train a model that incorporates embedding, encoder-decoder, and bidirectional RNN on x and y
    :param input_shape: Tuple of input shape
    :param output_sequence_length: Length of output sequence
    :param english_vocab_size: Number of unique English words in the dataset
    :param french_vocab_size: Number of unique French words in the dataset
    :return: Keras model built, but not trained
    """
    # TODO: Implement
    # Hyperparameters
    learning_rate = 0.005
    
    # Build the layers    
    model = Sequential()
    # Embedding
    model.add(Embedding(s_size, 100, input_length=input_shape[1],
                         input_shape=input_shape[1:], weights=[embedding_matrix], trainable=False))
    # Encoder
    model.add(Bidirectional(GRU(100)))
    model.add(RepeatVector(output_sequence_length))
    # Decoder
    model.add(Bidirectional(GRU(100, return_sequences=True)))
    model.add(TimeDistributed(Dense(512, activation='relu')))
    model.add(Dropout(0.5))
    model.add(TimeDistributed(Dense(t_size, activation='softmax')))
    model.compile(loss=sparse_categorical_crossentropy,
                  optimizer=Adam(learning_rate),
                  metrics=['accuracy'])
    return model

model = model_final(preproc_source_sentences.shape,preproc_target_sentences.shape[1],
                    len(source_tokenizer.word_index)+1,
                    len(target_tokenizer.word_index)+1)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 15, 100)           20000     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 200)               120600    
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 21, 200)           0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 21, 200)           180600    
_________________________________________________________________
time_distributed_3 (TimeDist (None, 21, 512)           102912    
_________________________________________________________________
dropout_2 (Dropout)          (None, 21, 512)           0         
_________________________________________________________________
time_distributed_4 (TimeDist (None, 21, 346)          

In [4]:
#CallBacks
mfile = 'models/Glove_training_bach512.model.h5'
model_checkpoint=callbacks.ModelCheckpoint(mfile, monitor='accuracy', save_best_only=True, save_weights_only=True)
logger=callbacks.CSVLogger('results/training_bach_512.log')
tensorboard=callbacks.TensorBoard(log_dir='results/training_bach_512')
callbacks=[logger, tensorboard]

In [14]:
"""
some keras version have problem in calculating val loss
it will be most likely included in the next release 2.2.5.
Until then, you can update to the HEAD of master from pip by doing:
"""
!Pip install git+https://github.com/keras-team/keras.git -U

Collecting git+https://github.com/keras-team/keras.git
  Cloning https://github.com/keras-team/keras.git to c:\users\hadis\appdata\local\temp\pip-req-build-wqwmslvg
Building wheels for collected packages: Keras
  Building wheel for Keras (setup.py): started
  Building wheel for Keras (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Hadis\AppData\Local\Temp\pip-ephem-wheel-cache-rzb7nkh7\wheels\da\a4\7e\6b7bd9af18cc2e23b8dd5ed6de07a7e13bd80a17214eb88932
Successfully built Keras
Installing collected packages: Keras
  Found existing installation: Keras 2.3.0
    Uninstalling Keras-2.3.0:
      Successfully uninstalled Keras-2.3.0
Successfully installed Keras-2.3.0


  Running command git clone -q https://github.com/keras-team/keras.git 'C:\Users\Hadis\AppData\Local\Temp\pip-req-build-wqwmslvg'


In [7]:
#fit the model
model.fit(X_train, Y_train, batch_size=512, epochs=10, validation_split=0.01)

#Save Model
helper.save_model(model, 'models/Glove_training_bach_512')

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 7892.

In [12]:
"""Run this for opening tensorboard on your Jupyter Notebook
Yuo can also open that in the new browser."""
!pip install -q tf-nightly-2.0-preview
# Load the TensorBoard notebook extension
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [13]:
import datetime, os

logs_base_dir = "./logs"
os.makedirs(logs_base_dir, exist_ok=True)
%tensorboard --logdir {logs_base_dir}

Reusing TensorBoard on port 6006 (pid 11676), started 0:08:13 ago. (Use '!kill 11676' to kill it.)