In [2]:
from tensorflow.keras.layers import Bidirectional,Concatenate,Permute,Dot,Input,LSTM,Multiply
from tensorflow.keras.layers import RepeatVector,Dense,Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model, Model
import tensorflow.keras.backend as K
import tensorflow as tf
import numpy as np

from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
import matplotlib.pyplot as plt
%matplotlib inline

2024-06-11 09:54:45.667263: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-11 09:54:45.667359: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-11 09:54:45.804940: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [1]:
!pip install faker

Collecting faker
  Downloading Faker-25.8.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-25.8.0-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: faker
Successfully installed faker-25.8.0


# 1. Preparing Dataset 
Here we will synthetically generate a dataset and write helper functions for the same.

In [3]:
# These are formats for generating the date the short , medium and full formats are formats specified by babel.dates import , check babel docs
FORMATS = ['short',
           'medium',
           'long',
           'full',
           'full',
           'full',
           'full',
           'full',
           'full',
           'full',
           'full',
           'full',
           'full',
           'd MMM YYY', 
           'd MMMM YYY',
           'dd MMM YYY',
           'd MMM, YYY',
           'd MMMM, YYY',
           'dd, MMM YYY',
           'd MM YY',
           'd MMMM YYY',
           'MMMM d YYY',
           'MMMM d, YYY',
           'dd.MM.YY']


def load_date() -> tuple:
    fake = Faker()
    date = fake.date_object()

    try:
        human_readable = format_date(date,random.choice(FORMATS),locale="en_US")
        human_readable = human_readable.lower()
        human_readable = human_readable.replace(",","")
        machine_readable = date.isoformat()
    
    except AttributeError as e:
        return None,None,None

    return human_readable,machine_readable ,date


def load_dataset(m):
    human_vocab = set()
    machine_vocab = set()
    dataset = []
    Tx = 30 # This is the max length of the date string we assume 

    for _ in tqdm(range(m)):
        h,m,_ = load_date()
        if h is not None:
            dataset.append((h,m))
            human_vocab.update(h)
            machine_vocab.update(m)

    human = dict(zip(sorted(human_vocab) + ['<unk>', '<pad>'], 
                     list(range(len(human_vocab) + 2))))
    inv_machine = dict(enumerate(sorted(machine_vocab)))
    machine = {v:k for k,v in inv_machine.items()}

    return dataset,human,machine,inv_machine

In [4]:
dataset, human, machine, inv_machine = load_dataset(10000)

100%|██████████| 10000/10000 [04:12<00:00, 39.59it/s]


In [5]:
dataset[:10]

[('apr 16 1980', '1980-04-16'),
 ('mar 16 1983', '1983-03-16'),
 ('25 september 2000', '2000-09-25'),
 ('21 01 71', '1971-01-21'),
 ('9 august 2019', '2019-08-09'),
 ('friday november 15 1974', '1974-11-15'),
 ('thursday november 25 2004', '2004-11-25'),
 ('06 sep 2012', '2012-09-06'),
 ('17.09.87', '1987-09-17'),
 ('friday august 12 2005', '2005-08-12')]

In [6]:
def preprocess_data(dataset,human,machine,Tx,Ty):
    X, Y = zip(*dataset)
    
    X = np.array([string_to_int(i, Tx, human) for i in X])
    Y = [string_to_int(t, Ty, machine) for t in Y]
    
    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human)), X)))
    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine)), Y)))

    return X, np.array(Y), Xoh, Yoh


def string_to_int(string,length,vocab):
    string = string.lower()
    string = string.replace(",","")

    if len(string) > length:
        string = string[:length]

    rep  = list(map(lambda x: vocab.get(x,'unk'),string))

    if len(string) < length:
        rep += [vocab['<pad>']] * (length - len(string))

    return rep


Tx=30
Ty = 10
X , Y , X_oh, Y_oh = preprocess_data(dataset,human,machine,Tx,Ty)



In [7]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [23]:
# Defining layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis= -1)
densor1 = Dense(10,activation='tanh')
densor2 = Dense(1,activation='relu')
activator = Activation(softmax,name="attention_weights")
doter = Dot(axes= 1)

In [22]:
# Now we define one_step_attention calcutation
def one_step_attention(a, s_prev):
    s_prev = repeator(s_prev)

    concatenate = concatenator([a, s_prev])

    e = densor1(concatenate)

    energies = densor2(e)

    alphas = activator(energies)

    context = doter([alphas, a])
    
    return context


Now we will build the model function which will call the one_step_attention function Ty times

In [20]:
n_a = 32 # no of units in the hidden layer of pre-attention Bi-LSTM 
n_s = 64 # no of units in the hidden layer of the post-attention LSTM

post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(len(machine), activation=softmax)

In [18]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [24]:
def modelf(Tx,Ty,n_a,n_s,human_size,machine_size):
    X = Input(shape=(Tx,human_size))
    s0 = Input(shape=(n_s,),name='s_0')
    c0 = Input(shape=(n_s,), name='c_0')
    s = s0
    c = c0
    
    outputs = []
    
    a = Bidirectional(LSTM(n_a,return_sequences=True))(X)
    
    for t in range(Ty):
        context = one_step_attention(a,s)
        
        
        _,s,c = post_activation_LSTM_cell(context,initial_state=[s,c])
        
        out = output_layer(s)
        
        outputs.append(out)
        
    model = Model(inputs=[X,s0,c0],outputs=outputs)
    
    return model

model = modelf(Tx=Tx, Ty=Ty, n_a=n_a, n_s=n_s, human_size=len(human), machine_size=len(machine))

In [25]:
model.summary()

In [33]:
opt = Adam(learning_rate=0.005, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=[['accuracy']]*10)

In [34]:
m= 10000
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))
outputs = list(Y_oh.swapaxes(0,1))

In [38]:
model.fit([X_oh, s0, c0], outputs, epochs=1, batch_size=100)

[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - dense_4_accuracy: 1.0000 - dense_4_accuracy_1: 1.0000 - dense_4_accuracy_2: 1.0000 - dense_4_accuracy_3: 0.9980 - dense_4_accuracy_4: 1.0000 - dense_4_accuracy_5: 1.0000 - dense_4_accuracy_6: 1.0000 - dense_4_accuracy_7: 1.0000 - dense_4_accuracy_8: 1.0000 - dense_4_accuracy_9: 1.0000 - loss: 0.0125


<keras.src.callbacks.history.History at 0x7d2468485750>

In [40]:
EXAMPLES = ['3 May 1979', '5 April 09', '21th of August 2016', 'Tue 10 Jul 2007', 'Saturday May 9 2018', 'March 3 2001', 'March 3rd 2001', '1 March 2001']
s00 = np.zeros((1, n_s))
c00 = np.zeros((1, n_s))
for example in EXAMPLES:
    source = string_to_int(example, Tx, human)
    #print(source)
    source = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human)), source))).swapaxes(0,1)
    source = np.swapaxes(source, 0, 1)
    source = np.expand_dims(source, axis=0)
    prediction = model.predict([source, s00, c00])
    prediction = np.argmax(prediction, axis = -1)
    output = [inv_machine[int(i)] for i in prediction]
    print("source:", example)
    print("output:", ''.join(output),"\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
source: 3 May 1979
output: 1979-05-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
source: 5 April 09
output: 2009-04-05 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
source: 21th of August 2016
output: 2016-01-20 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

  output = [inv_machine[int(i)] for i in prediction]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
source: Tue 10 Jul 2007
output: 2007-07-10 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
source: Saturday May 9 2018
output: 2018-05-09 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
source: March 3 2001
output: 2001-03-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
source: March 3rd 2001
output: 2001-13-03 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
source: 1 March 2001
output: 2001-03-01 



In [41]:
model.save("Neural_Machine_Translation.keras")