In [1]:
#https://github.com/rohansawant7978/grammar_correction/blob/main/05%20Vanilla%20Encoder%20Decoder(Word).ipynb.ipynb
import zipfile
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense,RNN
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import seaborn as sns
import os
from datetime import datetime
import pytz
from tensorflow.keras.callbacks import LearningRateScheduler,ModelCheckpoint,EarlyStopping,TensorBoard,ReduceLROnPlateau
from tensorflow.keras.optimizers import Nadam,Adam
import warnings
warnings.filterwarnings('ignore')
import nltk.translate.bleu_score as bleu
import random
from tqdm import tqdm
import shutil
import io
from nltk.translate.gleu_score import sentence_gleu
import pickle

In [2]:
tokenizer_enc = pickle.load(open(r"tokenizer_enc.pkl", "rb"))
tokenizer_dec = pickle.load(open(r"tokenizer_dec.pkl", "rb"))

train_enc_inp = pickle.load(open(r"train_enc_inp.pkl", "rb"))
val_enc_inp = pickle.load(open(r"val_enc_inp.pkl", "rb"))
test_enc_inp = pickle.load(open(r"test_enc_inp.pkl", "rb"))

train_dec_inp = pickle.load(open(r"train_dec_inp.pkl", "rb"))
val_dec_inp = pickle.load(open(r"val_dec_inp.pkl", "rb"))
test_dec_inp = pickle.load(open(r"test_dec_inp.pkl", "rb"))

train_dec_out = pickle.load(open(r"train_dec_out.pkl", "rb"))
val_dec_out = pickle.load(open(r"val_dec_out.pkl", "rb"))
test_dec_out = pickle.load(open(r"test_dec_out.pkl", "rb"))

In [3]:
class Encoder(tf.keras.Model):
    def __init__(self,vocab_size,output_dim,enc_units,input_length):

        #Initialize Embedding layer
        #Intialize Encoder LSTM layer

        super().__init__()
        self.vocab_size = vocab_size
        self.output_dim = output_dim
        self.input_length = input_length
        self.enc_units = enc_units
        self.enc_output = 0
        self.enc_state_h = 0
        self.enc_state_c = 0

        self.embedding = Embedding(input_dim=self.vocab_size, output_dim=self.output_dim, 
                                   input_length=self.input_length,mask_zero=True,name="embedding_layer_encoder")
        
        self.enc = LSTM(self.enc_units, return_state=True,return_sequences=True, name="Encoder_LSTM")


    def call(self,input_sequence,states):
        input_embedding = self.embedding(input_sequence)
        self.enc_output,self.enc_state_h,self.enc_state_c = self.enc(input_embedding,initial_state=states)

        return self.enc_output, self.enc_state_h,self.enc_state_c

    def initialize_states(self,batch_size):
        ini_hidden_state = tf.zeros(shape=(batch_size,self.enc_units))
        ini_cell_state = tf.zeros(shape=(batch_size,self.enc_units))

        return ini_hidden_state,ini_cell_state

In [4]:
class Decoder(tf.keras.Model):
    def __init__(self,vocab_size,output_dim,dec_units,input_length):

        super().__init__()
        self.vocab_size = vocab_size
        self.output_dim = output_dim
        self.dec_units = dec_units
        self.input_length = input_length

        self.embedding = Embedding(input_dim=self.vocab_size, output_dim=self.output_dim,
                                   input_length=self.input_length,mask_zero=True,name="embedding_layer_decoder")
        
        self.dec = LSTM(self.dec_units, return_sequences=True, return_state=True, name="Decoder_LSTM")
        
    def call(self,input_sequence,initial_states):
        target_embedding = self.embedding(input_sequence)
        dec_output,dec_state_h,dec_state_c = self.dec(target_embedding, initial_state=initial_states)
        return dec_output,dec_state_h,dec_state_c

In [5]:
class Encoder_decoder(tf.keras.Model):
    def __init__(self,enc_vocab_size,enc_output_dim,enc_inp_length,enc_units,
                 dec_vocab_size,dec_output_dim,dec_inp_length,dec_units,batch_size):
        
        super().__init__() # https://stackoverflow.com/a/27134600/4084039
        self.encoder = Encoder(vocab_size=enc_vocab_size, output_dim=enc_output_dim, 
                               input_length=enc_inp_length, enc_units=enc_units)
        self.decoder = Decoder(vocab_size=dec_vocab_size, output_dim=dec_output_dim, 
                               input_length=dec_inp_length, dec_units=dec_units)
        self.dense   = Dense(dec_vocab_size, activation='softmax')
        self.ini_states = self.encoder.initialize_states(batch_size=batch_size)

    def call(self,data):
        input,output = data[0], data[1]
        enc_output,enc_h,enc_c = self.encoder(input,self.ini_states)
        dec_output,dec_h,dec_c = self.decoder(output, [enc_h, enc_c])
        output = self.dense(dec_output)
        return output


In [6]:
#https://www.tensorflow.org/tutorials/text/image_captioning#model
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)


In [7]:
enc_voc_size = len(tokenizer_enc.word_index) + 1 
dec_voc_size = len(tokenizer_dec.word_index) + 1
embedd_dim = 100
enc_inp_len = 12
dec_inp_len = 13
lstm_size=64
batch_size=1024

In [8]:

train_trunc_idx = (train_enc_inp.shape[0]//batch_size)*batch_size 
val_trunc_idx = (val_enc_inp.shape[0]//batch_size)*batch_size 

train_enc_inp_truncated = train_enc_inp[:train_trunc_idx]
train_dec_inp_truncated = train_dec_inp[:train_trunc_idx]
train_dec_out_truncated = train_dec_out[:train_trunc_idx]

val_enc_inp_truncated = val_enc_inp[:val_trunc_idx]
val_dec_inp_truncated = val_dec_inp[:val_trunc_idx]
val_dec_out_truncated = val_dec_out[:val_trunc_idx]

In [9]:
model  = Encoder_decoder(enc_vocab_size=enc_voc_size,enc_output_dim=embedd_dim,
                         enc_inp_length=enc_inp_len,enc_units=lstm_size,
                         dec_vocab_size=dec_voc_size,dec_output_dim=embedd_dim,
                         dec_inp_length=dec_inp_len,dec_units=lstm_size,
                         batch_size=batch_size)

#Tensorboard
!rm -rf ./logs/fit
IST = pytz.timezone('Etc/GMT+3')
log_dir=f'05_InitialEncodeDecoder/tb_logs/{datetime.now(IST).strftime("%Y%m%d%H%M%S")}'
tensorboard_callback = TensorBoard(log_dir=log_dir,histogram_freq=1,write_graph=True)
earlystop = EarlyStopping(monitor='val_loss', min_delta=0.001, patience=10, verbose=1,mode='min')
reducelr = ReduceLROnPlateau(monitor='val_loss', min_delta=0.001, patience=5, verbose=1, factor=0.9)
check_point = ModelCheckpoint('05_InitialEncodeDecoder', monitor='val_loss',
                              save_best_only=True, save_weights_only=True,mode='min', verbose=0)

2023-11-26 07:27:58.312201: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-11-26 07:27:58.312242: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2023-11-26 07:27:58.312250: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2023-11-26 07:27:58.312297: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-26 07:27:58.312323: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [10]:
model.compile(optimizer=Adam(learning_rate=0.001),loss=loss_function)



In [11]:
model.fit(x=[train_enc_inp_truncated,train_dec_inp_truncated],y=train_dec_out_truncated,
          validation_data=([val_enc_inp_truncated,val_dec_inp_truncated],val_dec_out_truncated),
          epochs=30, batch_size=batch_size,callbacks=[tensorboard_callback,earlystop,reducelr,check_point])

NameError: name 'tensorboard_callback' is not defined