In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch 
import torch.nn as nn
from bpemb import BPEmb
from torchsummary import summary
from tqdm import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
data = pd.read_csv(r"C:\Users\doshi\OneDrive\Desktop\hindi_english_parallel.csv")
data

Unnamed: 0,hindi,english
0,अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें,Give your application an accessibility workout
1,एक्सेर्साइसर पहुंचनीयता अन्वेषक,Accerciser Accessibility Explorer
2,निचले पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the bottom panel
3,ऊपरी पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the top panel
4,उन प्लग-इनों की सूची जिन्हें डिफोल्ट रूप से नि...,A list of plugins that are disabled by default
...,...,...
1561836,स्पष्टीकरण.–जहां इस उपधारा के अधीन हानि और लाभ...,स्पष्टीकरण.–जहां इस उपधारा के अधीन हानि और लाभ...
1561837,मैंने गौर किया है कि यह न केवल अपने महत्त्वपूर...,है। I note that this is a landmark meeting – n...
1561838,उन्होंने मेरे समक्ष जो प्रदर्शन किया उसमें से ...,है। In the presentations that they made before...
1561839,खाद्य और जल सुरक्षा; पर्यावरण की दृष्टि से वहन...,्त है। Issues such as food and water security;...


In [4]:
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1353877 entries, 0 to 1561839
Data columns (total 2 columns):
 #   Column   Non-Null Count    Dtype 
---  ------   --------------    ----- 
 0   hindi    1353877 non-null  object
 1   english  1353877 non-null  object
dtypes: object(2)
memory usage: 31.0+ MB


In [5]:
data['english_len'] = data['english'].apply(lambda x:len(x.split()))
data['hindi_len'] = data['hindi'].apply(lambda x:len(x.split()))
data.head()

Unnamed: 0,hindi,english,english_len,hindi_len
0,अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें,Give your application an accessibility workout,6,8
1,एक्सेर्साइसर पहुंचनीयता अन्वेषक,Accerciser Accessibility Explorer,3,3
2,निचले पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the bottom panel,8,7
3,ऊपरी पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the top panel,8,7
4,उन प्लग-इनों की सूची जिन्हें डिफोल्ट रूप से नि...,A list of plugins that are disabled by default,9,12


In [6]:
data = data[(data.english_len>=5) & (data.english_len<=15) & (data.hindi_len>=5) & (data.hindi_len<=15)]
data.head()

Unnamed: 0,hindi,english,english_len,hindi_len
0,अपने अनुप्रयोग को पहुंचनीयता व्यायाम का लाभ दें,Give your application an accessibility workout,6,8
2,निचले पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the bottom panel,8,7
3,ऊपरी पटल के लिए डिफोल्ट प्लग-इन खाका,The default plugin layout for the top panel,8,7
4,उन प्लग-इनों की सूची जिन्हें डिफोल्ट रूप से नि...,A list of plugins that are disabled by default,9,12
6,पहुंचनीय आसंधि (नोड) को चुनते समय हाइलाइट बक्स...,The duration of the highlight box when selecti...,10,10


In [7]:
data = data.sample(n=25000, random_state=0)
train_split, test_split = train_test_split(data, test_size=0.1, random_state=0)
train_split = train_split.reset_index(0).drop(['index'], axis=1)
test_split = test_split.reset_index(0).drop(['index'], axis=1)

In [8]:
print(len(train_split), len(test_split))
train_split.head()

22500 2500


Unnamed: 0,hindi,english,english_len,hindi_len
0,एक खूब छोटा अंश जिस में तत्वों के गुण होते है।,A very small component acquiring a quality of ...,9,11
1,कोटा जानकारी समर्थित नहीं फ़ोल्डर '% s' के लिए,No IMAP mailbox available for folder '% s',8,9
2,परन्तु यह शीघ्र ही अपर्याप्त प्रतीत हुआ...।,But soon this was found rather inadequate.,7,7
3,उसने गिरफ्तार व्यक्ति के लिए प्रतिभू की भूमिका...,He acted as ad - promisor for the arrested per...,10,9
4,अल्लाह से क्षमा की प्रार्थना करो। निस्संदेह अल...,Ask God for forgiveness: He is most forgiving ...,10,12


In [9]:
bpemb_en = BPEmb(lang='en')
bpemb_hi = BPEmb(lang='hi')

In [10]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, max_seq_len=64):
        super(CustomDataset, self).__init__()
        self.data = data
        self.max_seq_len = max_seq_len
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        eng_sen = self.data.english.iloc[index]
        hin_sen = self.data.hindi.iloc[index]
        eng_tokens = bpemb_en.encode_ids_with_bos_eos(eng_sen)
        hin_tokens = bpemb_hi.encode_ids_with_bos_eos(hin_sen)
        trg_input_tokens = hin_tokens[:-1]
        trg_output_tokens = hin_tokens[1:]
        
        eng_mask = [1]*(len(eng_tokens))
        hin_mask = [1]*(len(trg_input_tokens))
        
        eng_tokens = eng_tokens + [0]*(self.max_seq_len - len(eng_tokens))
        trg_input_tokens = trg_input_tokens + [0]*(self.max_seq_len - len(trg_input_tokens))
        trg_output_tokens = trg_output_tokens + [0]*(self.max_seq_len - len(trg_output_tokens))
        
        eng_mask = eng_mask + [0]*(self.max_seq_len - len(eng_mask))
        hin_mask = hin_mask + [0]*(self.max_seq_len - len(hin_mask))
        # pad eng_tokens upto max_seq_len
        # pad_hin_tokens upto max_seq_len
        # then create masks for both of the inputs, and make them upto the dimension needed
        
        # now we have to pad the sequence upto max length and create the masks for the same
        
        
        return torch.tensor(eng_tokens), torch.tensor(trg_input_tokens), torch.tensor(trg_output_tokens), torch.tensor(eng_mask), torch.tensor(hin_mask)

In [11]:
def scaled_dot_product_attention(query, key, value, mask=None):
    # this is going in the single head self-attention.
    # query, key, value shape will be (b, h, t, d/h) 
    d_k = query.shape[-1]
    scaled_scores = torch.matmul(query, torch.transpose(key, -2, -1))/np.sqrt(d_k)  # shape is  (b, h, t, t)
    
    if mask is not None:
        # mask must be of shape (b,h,t,t)
        scaled_scores = torch.where(mask==0, -np.inf, scaled_scores)
        
    weights = torch.nn.Softmax(dim=-1)(scaled_scores) # shape is (b,h,t,t)
    return torch.matmul(weights, value)  # shape will be (b,h,t,d/h)


In [12]:
class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.d_model = d_model
        self.num_heads = num_heads
        
        # let us say that our input is of shape (b, t, d) 
        self.wq = nn.Linear(in_features = self.d_model, out_features = self.d_model, bias = False) 
        self.wk = nn.Linear(in_features = self.d_model, out_features = self.d_model, bias = False)
        self.wv = nn.Linear(in_features = self.d_model, out_features = self.d_model, bias = False)
        # remember that wq, wk, ev defined above are just the matrices nothing more
        
        # now the final dense layer to add some nolinearity in it 
        self.fc = nn.Linear(self.d_model, self.d_model)
        
    def forward(self, q, k, v, mask=None):
        # shape of q, k, v is (b, t, d)
        q = self.wq(q).reshape(q.shape[0], q.shape[1], self.num_heads, self.d_model//self.num_heads).permute(0, 2, 1, 3) 
        k = self.wk(k).reshape(k.shape[0], k.shape[1], self.num_heads, self.d_model//self.num_heads).permute(0, 2, 1, 3) 
        v = self.wv(v).reshape(v.shape[0], v.shape[1], self.num_heads, self.d_model//self.num_heads).permute(0, 2, 1, 3) 
        
        # now shape of q, k, v is (b, h, t, d/h)
        # now we have to simply perform  self-attention for every head
        op = scaled_dot_product_attention(q, k, v, mask)  # shape of op is (b, h, t, d/h)
        op = op.permute(0, 2, 1, 3)
        op = op.reshape(op.shape[0], op.shape[1], self.d_model)  # now shape of op is (b, t, d)
        return self.fc(op) # shapes are (b, t, d)

In [13]:
class feedforward(nn.Module):
    def __init__(self, d_model, hidden_dim):
        super(feedforward, self).__init__()
        self.fc1 = nn.Linear(d_model, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, d_model)
        
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

In [14]:
class encoder_block(nn.Module):
    def __init__(self, d_model, num_heads, hidden_dim, dropout_rate=0.1):
        super(encoder_block, self).__init__()
        self.mhsa = MultiHeadSelfAttention(d_model, num_heads)
        self.fc = feedforward(d_model, hidden_dim) # gonna give the shapes again to be (b, t, d)
        
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
    
    def forward(self, x, mask=None):
        # input shape is (b, t, d)
        op = self.mhsa(x, x, x, mask) # op shape is (b, t, d) and attention weights are of shape (b, t, t)
        op = self.dropout1(op)
        # now we have to pass it through layer normalization (study it)
        op = self.layernorm1(op + x)
        ffn_op = self.fc(op)
        ffn_op = self.dropout2(ffn_op)
        op = self.layernorm2(op + ffn_op)
        return op

In [15]:
# this is an example of how multi-head works
b = 1
t = 3
d = 12
h = 3
# first assigning the weights
wq0 = torch.rand((d, d//h))
wk0 = torch.rand((d, d//h))
wv0 = torch.rand((d, d//h))

wq1 = torch.rand((d, d//h))
wk1 = torch.rand((d, d//h))
wv1 = torch.rand((d, d//h))

wq2 = torch.rand((d, d//h))
wk2 = torch.rand((d, d//h))
wv2 = torch.rand((d, d//h))

# let us take some random input also 
x = torch.rand((1, t, d))

# now calculating q, k, v for each attention head
q0 = torch.matmul(x, wq0)
k0 = torch.matmul(x, wk0)
v0 = torch.matmul(x, wv0)

q1 = torch.matmul(x, wq1)
k1 = torch.matmul(x, wk1)
v1 = torch.matmul(x, wv1)

q2 = torch.matmul(x, wq2)
k2 = torch.matmul(x, wk2)
v2 = torch.matmul(x, wv2)

# now we have to perform scaled self-attention on each head
op0 = scaled_dot_product_attention(q0, k0, v0)
op1 = scaled_dot_product_attention(q1, k1, v1)
op2 = scaled_dot_product_attention(q2, k2, v2)


# now the final ouputu is the conactenation of all the three outputs
op = torch.concat([op0, op1, op2], dim=2)
print(op.shape)
print(op)

torch.Size([1, 3, 12])
tensor([[[4.3741, 3.4272, 3.7685, 2.7472, 3.1909, 3.5729, 3.0595, 3.6291,
          4.5180, 2.6380, 3.7090, 3.7612],
         [4.3825, 3.4384, 3.7711, 2.7651, 3.1902, 3.5766, 3.0590, 3.6332,
          4.5320, 2.6536, 3.7191, 3.7408],
         [4.3821, 3.4378, 3.7710, 2.7642, 3.1907, 3.5862, 3.0557, 3.6388,
          4.5306, 2.6522, 3.7180, 3.7422]]])


In [16]:
class encoder_transformer(nn.Module):
    def __init__(self, num_blocks, d_model, num_heads, hidden_dim, src_vocab_size, max_seq_len, dropout_rate=0.1):
        # max_seq_len is the number of time steps, i'll be referring it as t
        super(encoder_transformer, self).__init__()
        self.num_blocks = num_blocks
        self.d_model = d_model
        self.num_heads = num_heads
        self.hidden_dim = hidden_dim 
        self.vocab_size = src_vocab_size
        self.max_seq_len = max_seq_len
        
        self.token_embeds = nn.Embedding(src_vocab_size, d_model)
        self.pos_embeds = nn.Embedding(max_seq_len, d_model)
        self.dropout = nn.Dropout(p = dropout_rate)
        self.blocks = nn.ModuleList([encoder_block(d_model, num_heads, hidden_dim, dropout_rate=dropout_rate) 
                      for _ in range(self.num_blocks)])
        
            
        
        
    
    def forward(self, source, mask=None):
        # shape of source is (b, t)
        # all source sentences will be padded and padding will be static
        source = source.type(torch.LongTensor).to(device)
        # comment out above line for 
        t_embeds = self.token_embeds(source) # (b, t, d)
        pos_ids = torch.broadcast_to(torch.arange(self.max_seq_len), (x.shape[0], self.max_seq_len)).type(torch.LongTensor
                                                                                                         ).to(device)
        p_embeds = self.pos_embeds(pos_ids) # (b, t, d)
        
        inp = t_embeds + p_embeds  # (b, t, d)
        op = self.dropout(inp)  # (b, t, d)
        
        for _, block in enumerate(self.blocks):
            op = block(op, mask)
            
        return op
            

In [17]:
class decoder_block(nn.Module):
    def __init__(self, d_model, num_heads, hidden_dim, dropout_rate=0.1):
        super(decoder_block, self).__init__()
        self.mhsa1 = MultiHeadSelfAttention(d_model, num_heads)
        self.mhsa2 = MultiHeadSelfAttention(d_model, num_heads)
        
        self.fc = feedforward(d_model, hidden_dim)
        
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.dropout3 = nn.Dropout(p=dropout_rate)
        
        self.layernorm1 = nn.LayerNorm(d_model)
        self.layernorm2 = nn.LayerNorm(d_model)
        self.layernorm3 = nn.LayerNorm(d_model)
        
        
    def forward(self, encoder_ouptut, target, decoder_mask=None, memory_mask=None):
        mhsa_op1 = self.mhsa1(target, target, target, decoder_mask)
        mhsa_op1 = self.dropout1(mhsa_op1)
        mhsa_op1 = self.layernorm1(mhsa_op1 + target)
        
        mhsa_op2 = self.mhsa2(mhsa_op1, encoder_ouptut, encoder_ouptut, memory_mask)
        mhsa_op2 = self.dropout2(mhsa_op2)
        mhsa_op2 = self.layernorm2(mhsa_op1 + mhsa_op2)
        
        fc_op = self.fc(mhsa_op2)
        fc_op = self.dropout3(fc_op)
        op = self.layernorm3(fc_op + mhsa_op2)
        
        return op

In [18]:
class decoder_transformer(nn.Module):
    def __init__(self, num_blocks, d_model, num_heads, hidden_dim, trg_vocab_size, max_seq_len, dropout_rate=0.1):
        super(decoder_transformer, self).__init__()
        self.token_embeds = nn.Embedding(trg_vocab_size, d_model)
        self.pos_embeds = nn.Embedding(max_seq_len, d_model)
        self.max_seq_len = max_seq_len
        self.dropout = nn.Dropout(p=dropout_rate)
        
        self.blocks = nn.ModuleList([decoder_block(d_model, num_heads, hidden_dim, dropout_rate) 
                                    for _ in range(num_blocks)])
    
    def forward(self, encoder_output, target, decoder_mask=None, memory_mask=None):
        # shape of target is (b, t)
        target = target.type(torch.LongTensor).to(device)
        t_embeds = self.token_embeds(target) # (b, t, d)
        pos_ids = torch.broadcast_to(torch.arange(self.max_seq_len), (x.shape[0], self.max_seq_len)).type(torch.LongTensor
                                                                                                         ).to(device)
        p_embeds = self.pos_embeds(pos_ids)  # (b, t, d)
        
        inp = t_embeds + p_embeds
        op = self.dropout(inp)
        # now op is (b, t, d)
        
        for _, block in enumerate(self.blocks):
            op = block(encoder_output, op, decoder_mask, memory_mask)
            
        return op

In [19]:
class transformer(nn.Module):
    def __init__(self, num_blocks, d_model, num_heads, hidden_dim, src_vocab_size, trg_vocab_size, max_seq_len, 
                dropout_rate=0.1):
        super(transformer, self).__init__()
        self.encoder = encoder_transformer(num_blocks, d_model, num_heads, hidden_dim, src_vocab_size, max_seq_len,
                            dropout_rate=0.1)
        self.decoder = decoder_transformer(num_blocks, d_model, num_heads, hidden_dim, src_vocab_size, max_seq_len,
                            dropout_rate=0.1)
        
        self.fc = nn.Linear(d_model, trg_vocab_size)
        self.lookahead = torch.tril(torch.ones((max_seq_len, max_seq_len))).to(device)
    
    def forward(self, src, trg, src_pad_mask=None, trg_pad_mask=None):
        # we require masks to be of shape (b, 1, 1, t)
        dec_mask = None
        if src_pad_mask is not None:
            src_pad_mask = src_pad_mask.unsqueeze(1).unsqueeze(1)
            src_pad_mask = src_pad_mask.to(device)
        if trg_pad_mask is not None:
            trg_pad_mask = trg_pad_mask.unsqueeze(1).unsqueeze(1)
            dec_mask = torch.minimum(trg_pad_mask, self.lookahead)
            dec_mask = dec_mask.to(device)
            
        enc_op = self.encoder(src, src_pad_mask)
        op = self.decoder(enc_op, trg, dec_mask, src_pad_mask) # op is of shape (b, t, d)
        # and generate the required look ahead mask
        op = op.reshape(-1, op.shape[-1]) # shape is (b*t, d)
        op = self.fc(op) # shape is (b*t, trg_vocab_size)
        
        return op

In [20]:
def save_checkpoint(state, filename='my_checkpoint.pth'):
    # will save model and optimizer params at every epoch
    print("-> Saving CheckPoint")
    torch.save(state, filename)

In [21]:
def load_checkpoint(checkpoint, model):
    # it will just load, we can train it further, make changes to the architecture
    # and simply use it to predict
    print("-> Loading CheckPoint")
    model.load_state_dict(checkpoint["state_dict"])

In [22]:
def train(loader, model, optimizer, scaler, scheduler, loss_fn, epoch, device=device):
    '''
    it is the training procedure for one epoch of the network
    '''
    losses = 0
    model.train()
    num_batches = len(loader)
    batches = tqdm(loader) # tqdm will be used to generate progress bars
    for idx, batch in enumerate(batches, 0):
        src = batch[0].to(device)  # (batch_size, max_len)
        trg_inp = batch[1].to(device)  # (batch_size, max_len)
        trg_op = batch[2].to(device) # (batch_size, max_len)
        src_pad_mask = batch[3].to(device) # (batch_size, max_len)
        trg_pad_mask = batch[4].to(device) # (batch_size, max_len)

        # forward
        optimizer.zero_grad()
        with torch.cuda.amp.autocast(): # for gradient underflowing and overflowing and it makes training faster by converting all floats to float16
            op = model(src, trg_inp, src_pad_mask, trg_pad_mask) # op shape is (batch_size*max_len, trg_vocab_size+1)
            trg_op = trg_op.reshape(trg_op.shape[0]*trg_op.shape[1]) # trg_op shape is (batch_size*max_len)
            loss = loss_fn(op, trg_op) # loss_fn should contain the parametere ignore_idx=0, so that 
            # losses corresponding to the padding token isn't calculated

        # making all the previous gradients zero 
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        batches.set_postfix(loss = loss.item(), epoch=epoch) # loss of this current batch on current iteration 
        losses+= loss.item()

    losses/=num_batches    
    #scheduler.step()
    return losses

In [23]:
def test():
    # i will only use this function for measuring its accuracy on different metrics
    # for this task such as meteor and bleu
    pass

In [24]:
def translate_sentence(sentence, model, max_seq_len, trans_len=50):
    model.eval()
    # first i am given the english sentence
    inp = bpemb_en.encode_ids_with_bos_eos(sentence)
    enc_mask = [1]*(len(inp))
    inp = inp + [0]*(max_seq_len - len(inp))
    enc_mask = enc_mask + [0]*(max_seq_len - len(enc_mask))
    # inp shape is (max_seq_len) and so is of mask
    #print(f'input = {inp}\n\nmask = {enc_mask}')
    inp = torch.tensor(inp).unsqueeze(0).to(device) # shape of input is (1, max_seq_len)
    enc_mask = torch.tensor(enc_mask).unsqueeze(0).to(device) # shape of mask is also (1, max_seq_len)
    #inp =  inp.unsqueeze(0)
    #enc_mask = enc_mask.unsqueeze(0).unsqueeze(1).unsqueeze(1) #  shape should be (b,1,1,max_seq_len)
    # now the shapes are as required by the transformer
    
    #enc_op = model.encoder(inp, enc_mask)
    # now we have to decode the sentence one-by-one 
    # so let us first of all make the inputs and the corresponding trg_mask
    trg_inp = torch.zeros(max_seq_len).unsqueeze(0).to(device) # shape is (1, max_seq_len)
    trg_mask = torch.zeros(max_seq_len).unsqueeze(0).to(device) # shape is (1, max_seq_len)
    
    #lookahead = torch.tril(torch.ones((max_seq_len, max_seq_len))).to(device) 
    
    trg_inp[0, 0] = 1 # 1 means <sos> token
    translation = []
    #trg_mask[0, 0, 0, len(translation)] = 1 # as trg_inp has only one word in it at current time step
    #dec_mask = torch.minimum(trg_mask, lookahead)
    trg_mask[0, len(translation)] = 1 # as trg_inp has only one word in it at current time step
    last_token = trg_inp[0, len(translation)]
    # now we have to pass it through a decoder until we get a <eos> token or we exceed trans_len
    while len(translation)<trans_len and last_token!=2: # 2 means <eos> token
        # now we have to pass the above inputs through decoder
        #dec_op = model.decoder(enc_op, trg_inp, dec_mask, enc_mask) 
        # now shape of decoder op will be 
        # shape of output is (1, hindi_vocab_size)
        op = model(inp, trg_inp, enc_mask, trg_mask) # shape of op will be (max_seq_len, trg_vocab_size)
        op = op.argmax(dim=1)  # shape will be (max_seq_len)
        last_token = op[len(translation)].item()
        translation.append(last_token)
        trg_inp[0, len(translation)] = last_token  # updating the last token in the trg_inp
        trg_mask[0, len(translation)] = 1  # setting up the mask for the current value equal to 1
        #print(f"DONE {len(translation)} times")
        
    model.train()
    return bpemb_hi.decode(translation)

In [25]:
# training hyperparameters
num_epochs =  200
lr = 3e-4
batch_size = 64

train_dataset = CustomDataset(train_split, max_seq_len=64)
test_dataset = CustomDataset(test_split, max_seq_len=64)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [26]:
# model hyperparameters
num_blocks = 3
d_model = 512
num_heads = 8
hidden_dim = 4*d_model
src_vocab_size = bpemb_en.vocab_size + 1 # +1 due to padding token
trg_vocab_size = bpemb_hi.vocab_size + 1 # +1 due to padding token
max_seq_len = 64

# testing the model
model = transformer(num_blocks, d_model, num_heads, hidden_dim, src_vocab_size, 
                              trg_vocab_size, max_seq_len).to(device)
summary(model, [(max_seq_len, ), (max_seq_len ,)])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1              [-1, 64, 512]       5,120,512
         Embedding-2              [-1, 64, 512]          32,768
           Dropout-3              [-1, 64, 512]               0
            Linear-4              [-1, 64, 512]         262,144
            Linear-5              [-1, 64, 512]         262,144
            Linear-6              [-1, 64, 512]         262,144
            Linear-7              [-1, 64, 512]         262,656
MultiHeadSelfAttention-8              [-1, 64, 512]               0
           Dropout-9              [-1, 64, 512]               0
        LayerNorm-10              [-1, 64, 512]           1,024
           Linear-11             [-1, 64, 2048]       1,050,624
             ReLU-12             [-1, 64, 2048]               0
           Linear-13              [-1, 64, 512]       1,049,088
      feedforward-14              [

In [27]:
# let us test our model on some actual input to check it doesn't break
for _, batch in enumerate(train_loader):
    src = batch[0][0].unsqueeze(0).to(device)
    trg_inp = batch[1][0].unsqueeze(0).to(device)
    trg_op = batch[2][0].unsqueeze(0).to(device)
    src_pad_mask = batch[3][0].unsqueeze(0).to(device)
    trg_pad_mask = batch[4][0].unsqueeze(0).to(device)
    break
    
#print(f'{src}\n\n{src_pad_mask}\n\n{trg}\n\n{trg_pad_mask}')
op = model(src, trg_inp, src_pad_mask, trg_pad_mask)
op = nn.Softmax(dim=-1)(op)
out = torch.max(op, dim=-1).indices
print(f'Original English Sentence {bpemb_en.decode(src.tolist())}\n\nOriginal Hindi Sentence {bpemb_hi.decode(trg_inp.tolist())}\n\nPredicted Sentence {bpemb_hi.decode(out.tolist())}')
# ?? are because of the padding tokens we can easily remove them when needed

Original English Sentence ['distribution of profits to the shareholders of a company. ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇ ']

Original Hindi Sentence ['कंपनी के अंशधारियों को लाभ का वितरण। ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇  ⁇ ']

Predicted Sentence ['राष्ट्रपति', 'झा', 'हिंदी', 'जिसकी', 'रेन्द्र', 'स्पर्श', 'विनिमय', 'सि', 'द्रव्य', 'अधिव', 'लम्बे', 'साइड', 'संख्या', 'घूर्', 'कारा', '्वज', 'अवलोक', 'टर्मिन', 'साइड', 'बेथ', 'मातृभाषा', 'ब्ले', 'सम्मिलित', 'रिचर्ड', 'अवलोक', 'जे', 'छि', 'रेन्द्र', 'bit', 'ार्थना', 'उसमें', 'राजनी', 'न्धी', 'रहते', 'आपदा', 'उसमें', 'दायित्व', 'वीर', 'उम्र', 'शीर्ष', 'एशिया', 'दायित्व', 'ज्ञ', 'सभ्यता', 'शुल्क', 'रिचर्ड', 'कहते', 'उसमें', 'पत्रों', 'रहते', 'दायित्व', 'ल्ली', ':-', 'इंदिरा', 'रेन्द्र', 'ज़र', 'खड़ा', 'सैनिकों', 'ज

In [28]:
# setups
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss(ignore_index=0) 
scaler = torch.cuda.amp.GradScaler() 
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
losses = []
# for inference
sentences = ['another plant, the aluminium corporation of india, came into existence after the war.', 
             'He is doing very good these days', 'this guy is totally mad', 'what were you saying that day?']

In [29]:
# taking the test sentences for checking how good the model is trained
def infer(sentences, model, max_seq_len):
    for idx, sentence in enumerate(sentences):
        print(f'Example {idx+1}:\n{sentence}\n{translate_sentence(sentence, model, max_seq_len)}\n\n')

In [30]:
infer(sentences, model, max_seq_len)

Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['रहते', 'भाषित', 'उत्तर', 'fil', 'तथापि', 'भिक', 'इज़', 'परिश', 'one', 'अकाल', 'रूस', 'द', 'कथन', 'fir', 'नाटक', 'भारतीय', 'भाषित', 'भाप', 'थाई', 'उत्', 'चोप', 'निम्नलिखित', 'thumb', 'लोहे', 'धात', 'ऑनलाइन', 'बाज़', 'घ्र', 'छोड़ने', 'ारायण', 'ऐसा', 'दर्शन', 'उनसे', 'ड़ों', 'देखभाल', '्रु', 'सर्विस', 'net', 'जुड़', '==', 'net', 'त्म', 'bc', 'धीरे', 'साधारण', 'ism', 'नामांकन', 'उसमें', 'पार्टी', 'ोई']


Example 2:
He is doing very good these days
['रहते', 'विजेता', 'समीप', 'ंख', 'खगोल', 'ण्व', 'धान', 'और', 'रुचि', 'प्रक्षेपण', 'अपील', 'नाटक', 'क्रिस', 'ुद', 'मशीन', 'शब्दों', 'खरी', 'कठोर', 'चाय', 'उत्', 'चोप', 'निम्नलिखित', 'thumb', 'लोहे', 'सान', 'मुखी', 'कथित', 'sp', 'ons', 'सफलतापूर्वक', 'पत्रश्रेणी', 'चलाया', 'उत', 'आ', 'रहें', 'war', 'आ', 'नो', 'पत्रश्रेणी', 'तरी', 'उग', 'विख्यात', 'अम्ब', 'यिक', 'आंद', 'जूलियन', 'गम', 'शाब्दिक', 'lect', 'ढाका']


Example 3:
this guy is totally mad
['रह

In [31]:
# training
for epoch in range(num_epochs):
    model.train()
    losses.append(train(train_loader, model, optimizer, scaler, scheduler, loss_fn, epoch))
    
    # save checkpoint
    checkpoint = {
        'state_dict': model.state_dict(),
        'optimizer':optimizer.state_dict()
    }
    save_checkpoint(checkpoint)
    
    # check accuracy  on test set
    infer(sentences, model, max_seq_len)

100%|██████████| 352/352 [00:25<00:00, 13.82it/s, epoch=0, loss=5.72]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'व्यक्ति', ',', '0000', 'में', ',', '0000', 'के', 'कारण', ',', '0000', 'में', ',', '0000', '']


Example 2:
He is doing very good these days
['उसने', 'एक', 'ही', 'भी', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'एक', 'है', 'कि', 'यह', 'एक', 'है', '।', '']


Example 4:
what were you saying that day?
['क्या', 'तुम', 'क्या', 'है', 'कि', 'क्या', 'क्या', 'क्या', 'है', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.78it/s, epoch=1, loss=4.84]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['भारत', 'में', 'एक', 'भारत', 'की', 'एक', 'प्रमुख', 'भारत', 'में', 'एक', 'बड़ी', 'भारत', 'में', 'है', '।', '']


Example 2:
He is doing very good these days
['वे', 'ये', 'लोग', 'है', 'कि', 'ये', 'लोग', 'है', '']


Example 3:
this guy is totally mad
['यह', 'एक', 'बार', 'एक', 'नया', 'है', '']


Example 4:
what were you saying that day?
['आप', 'दिन', 'क्या', 'है', 'कि', 'तुम', 'लोग', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.73it/s, epoch=2, loss=4.56]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'देश', ',', 'भारत', 'के', 'बाद', 'भारत', 'में', 'एक', 'अन्य', 'भागों', 'में', 'एक', 'अन्य', 'प्रकार', 'की', 'एक', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'इन', 'बहुत', 'बहुत', 'बड़ी', 'है', '']


Example 3:
this guy is totally mad
['यह', 'बहुत', 'कम', 'है', 'कि', 'यह', 'विकल्प', 'है', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'क्या', 'है', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=3, loss=4.02]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', ',', 'भारत', 'में', 'एक', 'दूसरे', 'के', 'बाद', 'एक', 'दूसरे', 'के', 'बाद', 'एक', 'दूसरे', 'पर', 'स्थित', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'ये', 'बहुत', 'बड़ी', 'बड़ी', 'निश', 'ानी', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'है', 'कि', 'इस', 'प्रकार', 'की', 'है', '।', '']


Example 4:
what were you saying that day?
['तुम', 'क्या', 'तुम', 'ने', 'देखा', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=4, loss=3.28]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', ',', 'भारत', 'के', 'बाद', 'भारत', 'में', 'एक', 'दूसरे', 'के', 'बाद', 'एक', 'और', 'अंग', 'में', 'स्थित', 'है', '.', '']


Example 2:
He is doing very good these days
['वह', 'इन', 'दिनों', 'बहुत', 'बड़ी', 'है', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'से', 'अधिक', 'ृत', 'हैं', '।', '']


Example 4:
what were you saying that day?
['आप', 'दिन', 'क्या', 'हो', 'गया', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=5, loss=2.86]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'देश', ',', 'भारत', 'के', 'एक', 'अन्य', 'भागों', 'में', 'स्थित', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'दिन', 'है', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'है', 'कि', 'इस', 'पर', 'आधारित', 'है', '।', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'कित', 'ने', 'ही', 'चेहरे', 'गिर', 'े', 'रहे', 'थे', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=6, loss=2.42]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'न', 'ियन', '्त्र', 'ण', 'के', 'बाद', ',', 'भारत', 'के', 'बाद', 'एक', 'दम', 'नदी', 'में', 'से', 'एक', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छी', 'तरह', 'से', 'बहुत', 'अच्छा', 'है', ',', '']


Example 3:
this guy is totally mad
['यह', 'नेटवर्क', 'से', 'क्टर', 'काफी', 'जटिल', 'है', '।', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'कित', 'ने', 'ही', 'क्या', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=7, loss=1.89]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरी', 'ओर', 'नी', 'र', 'णीय', 'है', ',', 'भारत', 'में', 'जल', 'ो', 'हि', 'नी', 'पड़ी', '।', '']


Example 2:
He is doing very good these days
['वे', 'बहुत', 'ही', 'अच्छे', 'हैं', ',', '']


Example 3:
this guy is totally mad
['यह', 'शैक्षणिक', 'जटिल', 'की', 'गति', 'से', 'है', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'आप', 'उस', 'दिन', 'क्या', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=8, loss=1.71]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'दूसरी', 'छोर', 'के', 'बाद', ',', '‘', 'ग', 'ड़ा', 'हुआ', '’', 'में', 'एक', 'अन्य', 'आरम्भ', 'में', 'लिख', 'ना', '।', '']


Example 2:
He is doing very good these days
['वे', 'बहुत', 'अच्छी', 'तरह', 'हैं', ',', '']


Example 3:
this guy is totally mad
['यह', 'सीमा', 'कम', 'होती', 'है', '।', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'आप', 'उस', 'दिन', 'कित', 'ने', 'ही', 'क्या', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.66it/s, epoch=9, loss=1.4]  


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'स', 'ार्थ', ',', 'भारत', 'के', 'बाद', 'एक', 'अन्य', 'सदस्य', 'निकला', '.', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'काम', 'कर', 'रहा', 'है', '']


Example 3:
this guy is totally mad
['यह', 'शैक्षणिक', 'गतिविधियों', 'एक', 'दम', 'से', 'क्टर', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'था', '']




100%|██████████| 352/352 [00:26<00:00, 13.24it/s, epoch=10, loss=1.01] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'दूसरी', 'ओर', 'नी', 'प', 'रा', 'ओं', 'में', 'से', 'दूसरी', 'ओर', 'मिली', '.', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छी', 'तरह', 'से', 'ये', 'बहुत', 'अच्छे', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'पाठ्यक्रम', 'सी', '-', 'ड', 'ैक', 'ंत', 'रण', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.64it/s, epoch=11, loss=0.793]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'गर्भा', 'शय', 'के', 'बाद', 'विकसित', 'मातृ', 'और', 'फ', 'िस', 'ू', 'ध', 'ियां', '।', '']


Example 2:
He is doing very good these days
['वह', 'यह', 'विश्वास', 'है', 'कि', 'वह', 'दिन', '-', 'सा', 'ता', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'स्व', 'य', 'ली', 'जिले', 'में', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=12, loss=0.57] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'एक', 'अन्य', 'भागों', 'में', ',', 'उत्तर', 'प्रदेश', 'से', 'विकसित', 'हुआ', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'बहुत', 'अच्छे', 'हैं', ',', '']


Example 3:
this guy is totally mad
['यह', 'पाठ्यक्रम', 'एक', 'दम', 'निरा', 'धार', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'था', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=13, loss=0.525]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'शहर', ',', 'भारत', ',', 'कार', 'गर', 'ों', 'के', ',', 'फ', 'ंस', 'ू', 'ंगा', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छा', 'है', ',', 'स', 'लेकर', 'बहुत', 'अच्छा', 'काम', 'कर', 'रहा', 'है', '।', '']


Example 3:
this guy is totally mad
['कु', 'ंक', 'ल्प', 'ः', 'यह', 'विकल्प', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=14, loss=0.491]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'गर्भा', 'शय', 'के', 'बाद', 'गर्भा', 'शय', 'में', 'एक', 'अन्य', 'स्रोत', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'खुश', 'हो', 'रहा', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'विशाल', 'सी', 'वन', 'की', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम्', 'ह', 'ें', 'क्या', 'माल', 'ूम', 'कि', 'ज', 'ज़ा', 'क्या', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=15, loss=0.375]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'फ', 'ौर', 'प', 'ेक', 'वंश', 'के', 'स्वी', 'त', 'के', 'राज', 'ासन', 'को', 'ओवर', '.', '']


Example 2:
He is doing very good these days
['वह', 'सच', 'मु', 'च', 'सब', 'की', 'सो', 'चना', 'है', '']


Example 3:
this guy is totally mad
['यह', 'क्रिया', 'है', 'वे', 'बदलाव', 'ला', 'ते', 'हैं', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=16, loss=0.354]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'को', 'स', 'ना', 'या', 'गया', ',', 'भारत', 'के', 'बाद', 'में', 'भी', 'दूसरी', '.', '']


Example 2:
He is doing very good these days
['वह', 'मु', 'झ', 'से', 'उसकी', 'अस', 'ल', 'में', 'बहुत', 'अच्छा', 'हो', 'रही', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'घ', 'नात्मक', 'ता', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=17, loss=0.419]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'औद्योगिक', 'क्रांति', 'के', 'बाद', 'एक', 'दूसरी', 'छोर', 'प', 'र्व', 'बैंक', 'में', 'स', 'स्ता', 'ना', ',', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'ही', 'अच्छा', 'है', '']


Example 3:
this guy is totally mad
['यह', 'शैक्षणिक', 'गतिविधियों', 'से', 'काफी', 'मदद', 'है', '।', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'तुम', 'को', 'क्या', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=18, loss=0.371]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरे', 'के', 'बाद', 'एक', 'अन्य', 'भागों', 'में', 'और', 'घटना', 'एं', 'दिख', 'ाना', 'थ', 'घातक', 'होती', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'लगने', 'लगे', 'हैं', '']


Example 3:
this guy is totally mad
['यह', 'क्रिया', 'समूह', 'एक', 'बदलाव', 'बदलाव', 'अप', 'ार', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:26<00:00, 13.50it/s, epoch=19, loss=0.298]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारत', 'में', 'एक', 'अन्य', 'रूपों', ',', 'कंपनी', ',', 'स', 'ग', 'र्भ', 'ता', 'वश', 'ता', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छा', 'काम', 'कर', 'रहा', 'है', '']


Example 3:
this guy is totally mad
['यह', 'क्रिया', 'यह', 'जगह', 'घ', 'ोर', 'िट', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम्', 'ह', 'ें', 'क्या', 'ख', 'याल', 'ड़', 'ाने', 'थे', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=20, loss=0.279]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['भारत', 'के', 'उपरांत', ',', 'उत्तर', 'दाताओं', 'के', 'बाद', 'एक', 'अन्य', 'स', 'चे', 'त', 'में', 'भी', 'नहीं', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'हैं', ',', 'बहुत', 'अच्छा', 'है', '']


Example 3:
this guy is totally mad
['यह', 'क्रिया', 'यह', 'जगह', 'है', 'कि', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=21, loss=0.269]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'स', 'ख', 'ोल', ',', 'भारतीय', 'अर्थव्यवस्था', 'के', 'बाद', ',', 'युद्ध', 'के', 'बाद', ',', 'कंपनी', 'को', 'स', 'ना', 'शी', 'ने', 'स', 'की', 'को', 'एक', 'कार', 'ी', 'गर', ',', 'कंपनी', 'के', 'बाद', 'में', 'भेजा', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'हैं', '-', 'सु', 'झे', 'एक', 'बहुत', 'अच्छा', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'भाषण', 'उपयोग', 'इस', 'स्थिति', 'से', 'होती', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.70it/s, epoch=22, loss=0.244]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'सिक्', 'कों', 'के', 'बाद', 'में', 'भारी', 'रंग', 'वा', 'डेविड', 'गोल्ड', 'ता', 'को', 'स', 'ँ', 'व', 'के', 'समान', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'दिन', 'है', '']


Example 3:
this guy is totally mad
['यह', 'शैक्षणिक', 'गतिविधियों', 'से', 'खाली', 'जगह', 'है', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'तु', 'झे', 'कित', 'ने', 'उस', 'दिन', 'क्या', 'मिलता', 'है', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=23, loss=0.298]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरी', 'कंपनी', 'के', 'बाद', ',', 'भारत', 'के', 'बाद', 'एक', 'दूसरा', 'जिसके', 'पृष्ठ', 'पर', 'रोक', 'ता', 'था', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'दिन', 'है', '']


Example 3:
this guy is totally mad
['यह', 'विशाल', 'ष', 'कों', 'का', 'निर्धारण', 'है', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'तु', 'झे', 'क्या', 'हुआ', 'है', 'कि', 'तुम', 'लोग', 'क्या', '?', '']




100%|██████████| 352/352 [00:26<00:00, 13.52it/s, epoch=24, loss=0.241]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'स', 'जीव', 'जानवर', ',', 'भारत', 'के', 'बाद', ',', 'भारत', 'के', 'सभी', 'रूपों', 'में', 'स', 'िस', 'ान', '्व', 'ाह', 'ित', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'जीवन', 'की', 'तेज़', 'होती', 'है', ',', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'है', 'यह', 'मार्ग', 'है', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'कित', 'ने', 'ही', 'चेहरे', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=25, loss=0.242]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरा', 'महा', 'श', 'न्स', 'ेंट', 'ा', 'पश्चिम', 'में', 'स', 'ँ', 'थ', 'ेन', 'मिला', 'प', 'ते', 'हुए', 'पानी', 'का', 'एक', 'दूसरा', 'आई', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'हैं', ',', 'बहुत', 'अच्छी', 'अवस्था', 'में', 'है', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'स्व', 'य', 'कार्यों', 'में', 'है', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'कित', 'ने', 'ही', 'ज़ा', 'ब', 'में', 'क्या', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=26, loss=0.275]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'सिक्', 'खों', 'के', 'बाद', 'एक', 'अन्य', 'अतिरिक्त', 'सु', 'ने', 'से', 'गुजर', 'ने', 'लगे', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'है', ',', 'पह', 'ने', 'की', 'यही', 'वह', 'बहुत', 'अच्छी', 'तरह', 'है', '']


Example 3:
this guy is totally mad
['यह', 'क्रिया', 'संभव', 'है', 'काफी', 'सु', 'विध', 'ाजनक', 'बनाना', 'आवश्यक', 'है', '।', '']


Example 4:
what were you saying that day?
['उस', 'दिन', 'कित', 'ने', 'ही', 'दिया', 'गया', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=27, loss=0.233]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'पृष्ठ', 'के', 'स्वी', 'ो', 'थ', 'में', 'काम', 'कार', 'गर', ',', 'हालांकि', 'की', 'बात', 'स', 'ोप', 'ंथ', 'र', 'के', 'अन्य', 'स्रोत', 'है', '।', '']


Example 2:
He is doing very good these days
['वे', 'बहुत', 'अच्छे', 'हैं', 'कि', 'वह', 'दिन', 'लगते', 'हैं', '।', '']


Example 3:
this guy is totally mad
['यह', 'भाषण', 'कुछ', 'समय', 'से', 'ही', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'कित', 'ने', 'उस', 'दिन', 'कित', 'ने', 'ही', 'क्या', 'है', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.70it/s, epoch=28, loss=0.197]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'संवि', 'दा', 'वन', 'के', 'बाद', 'मे', 'मिश्रण', 'गया', ',', 'स', 'ोप', 'न', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छा', 'खेल', 'ता', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'अप', 'रि', 'हार', 'से', 'बना', 'रहा', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'था', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=29, loss=0.194]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'विकासशील', 'न्याय', 'पालिका', 'के', 'बाद', ',', 'पश्चिम', 'बंगाल', 'के', 'राज', 'दूत', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छे', 'हैं', ',', 'बहुत', 'अच्छे', 'दिन', 'लगते', 'हैं', '।', '']


Example 3:
this guy is totally mad
['यह', 'विकल्प', 'बहुत', 'अधिक', 'महत्व', 'है', '।', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'लोग', 'उस', 'दिन', 'कित', 'ने', 'ही', 'क्या', 'है', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=30, loss=0.234]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'श्रेणी', 'ो', ',', 'भारत', 'में', 'और', 'विदेशी', 'यु', 'वती', 'म', 'ताओं', 'के', 'विदेशी', 'मुद्रा', 'के', '00', 'में', 'भाग', 'दिख', 'ती', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'इन', 'दिनों', 'की', 'क्रिया', 'कला', 'प', 'ता', 'ही', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'असफल', 'अत्यधिक', 'विस्तार', 'ित', 'हैं', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.70it/s, epoch=31, loss=0.202]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरी', 'ओर', 'नी', 'वन', 'कार', 'पोरेशन', ',', 'निकला', ',', 'भारत', 'में', 'कार', '्मिक', 'ों', 'का', 'एक', 'दूसरा', 'टूट', 'गया', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'ही', 'ध्यान', 'से', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'विकल्प', 'यह', 'विकल्प', 'द', 'ेंगे', '।', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'उस', 'दिन', 'क्या', 'हो', 'गया', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.66it/s, epoch=32, loss=0.264]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'कलाकार', 'के', 'शाम', 'को', 'भारत', 'के', 'बाद', ',', 'युद्ध', 'के', 'राज', 'ू', 'पी', 'भर', 'ता', 'है', '.', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'बड़े', 'जीवन', 'का', 'प्रतीक', 'है', '']


Example 3:
this guy is totally mad
['यह', 'कार्य', 'अप', 'रि', 'हार', 'मौ', 'ड', 'करने', 'वाली', 'है', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'उस', 'दिन', 'को', 'तु', 'झे', 'क्या', 'चीज़', 'ों', 'से', 'पैदा', 'किया', 'गया', 'था', '']




100%|██████████| 352/352 [00:25<00:00, 13.67it/s, epoch=33, loss=0.246]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'रूप', 'से', 'स्वी', 'धीन', 'म्', 'भा', 'वत', ',', 'उत्तर', 'प्रदेश', 'के', 'बाद', 'प्रदर्शन', 'के', 'उपरांत', 'पाद', 'प', 'रंग', 'का', 'एक', 'पदार्थ', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'ही', 'ध्यान', 'है', '']


Example 3:
this guy is totally mad
['यह', 'विकल्प', 'यह', 'विकल्प', 'ऊर्जा', 'है', '।', '']


Example 4:
what were you saying that day?
['दिन', 'क्या', 'तुम', 'ने', 'देखा', 'था', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=34, loss=0.195] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'दूसरी', 'विद्या', 'के', 'बाद', 'दूसरी', 'ओर', 'नी', 'वन', 'के', 'दशक', 'में', 'दूसरा', 'निकला', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'ही', 'सुख', 'है', '']


Example 3:
this guy is totally mad
['यह', 'हमारी', 'निजी', 'रोग', 'है', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'उस', 'दिन', 'क्या', 'और', 'भाई', 'दिन', '?', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=35, loss=0.171] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['भारत', 'के', 'बाद', 'एक', 'स', '्र', 'ांस', 'उत्तर', ',', 'भारत', 'स', 'चे', 'तन', 'में', 'स', 'ँ', 'भ', 'वन', 'बनाई', '।', '']


Example 2:
He is doing very good these days
['वह', 'इन', 'दिनों', 'की', 'आशा', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'काम', 'बहुत', 'अच्छे', 'से', 'ही', 'समझ', 'रहा', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=36, loss=0.203]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'श्रेणी', 'में', 'मु', 'म', 'फ्', 'फ़', 'पानी', 'की', 'एक', 'कार', 'गर', 'निकला', '।', '']


Example 2:
He is doing very good these days
['वह', 'उनका', 'स', 'दिन', 'है', 'कि', 'उनका', 'स', 'द्', 'भावना', 'ामी', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'हिंसा', 'है', 'यह', 'कभी', 'स्व', 'ि', 'ग्ध', 'कर', 'उभर', 'रही', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.69it/s, epoch=37, loss=0.167]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'रस', 'ूल', 'निकला', ',', 'भारत', 'के', 'उपरांत', 'देश', 'में', 'कार', 'गर', 'हुआ', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'उनका', 'स', 'स्वर', 'पाठ', 'कर', 'रहा', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'स', 'ड़े', 'इस', 'मंच', 'में', 'काफी', 'द', 'ार', 'ती', 'है', '।', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:25<00:00, 13.68it/s, epoch=38, loss=0.198]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'चेन्नई', 'र', '्र', 'न', 'ू', 'तन', 'गर', 'में', 'आई', 'डी', 'गई', '।', '']


Example 2:
He is doing very good these days
['वह', 'दिनों', 'बहुत', 'अच्छे', 'से', 'सु', 'विध', 'ाजनक', 'गर', 'नहीं', 'है', '।', '']


Example 3:
this guy is totally mad
['यह', 'सीमा', 'है', 'यह', 'विचार', 'पूर्ण', 'परिवर्तन', 'ला', 'ए', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:26<00:00, 13.51it/s, epoch=39, loss=0.149]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'अन्य', 'भारतीय', 'लिप', 'ो', 'ऊ', 'त', ',', 'शिकार', 'ो', '-', 'को', 'च्छेद', 'न', ',', 'युद्ध', 'के', 'समान', 'फ', 'ंस', 'ों', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छी', 'बात', 'कर', 'रहा', 'है', '']


Example 3:
this guy is totally mad
['यह', 'प्रश्न', 'से', 'यह', 'विकल्प', 'काफी', 'है', '']


Example 4:
what were you saying that day?
['और', 'तुम', 'को', 'क्या', 'माल', 'ूम', 'हाव', 'िया', 'क्या', 'है', '']




100%|██████████| 352/352 [00:26<00:00, 13.32it/s, epoch=40, loss=0.182] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', '्यु', ',', 'पृष्ठ', 'के', 'स्वी', 'त', 'में', 'कार', 'गर', 'सुन', 'कर', 'मिली', '-', 'म', 'िस', 'में', 'तार', 'का', 'एक', 'पदार्थ', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छी', 'तरह', 'से', 'अ', 'ता', 'हुआ', 'है', '']


Example 3:
this guy is totally mad
['यह', 'हमारी', 'मूलभूत', 'कार्यों', 'को', 'स्', 'मे', 'री', 'है', '']


Example 4:
what were you saying that day?
['तुम्', 'ह', 'ें', 'क्या', 'था', 'कि', 'क्या', 'वह', 'दिन', '?', '']




100%|██████████| 352/352 [00:26<00:00, 13.14it/s, epoch=41, loss=0.127]


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'कलाकार', ',', 'अंड', 'ू', 'तन', 'में', ',', 'कार', 'गर', 'ो', 'लाई', 'दिखाई', 'पड़ता', 'है', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'ही', 'ध्यान', 'से', 'था', '।', '']


Example 3:
this guy is totally mad
['यह', 'सीमा', 'यह', 'उल्लेखनीय', 'की', 'आवश्यकता', 'है', '']


Example 4:
what were you saying that day?
['कित', 'ने', 'तुम्', 'ह', 'ें', 'क्या', 'था', '?', '']




100%|██████████| 352/352 [00:26<00:00, 13.26it/s, epoch=42, loss=0.195] 


-> Saving CheckPoint
Example 1:
another plant, the aluminium corporation of india, came into existence after the war.
['एक', 'और', 'श्रेणी', 'की', 'दूसरी', 'लाल', 'म', 'िय', ',', 'ल', 'ूट', 'सैल', 'ू', 'क्स', 'में', 'एक', 'अन्य', 'ख', 'ज', 'ते', 'हैं', '।', '']


Example 2:
He is doing very good these days
['वह', 'बहुत', 'अच्छी', 'चीज', 'ें', 'है', ',', '']


Example 3:
this guy is totally mad
['यह', 'घटक', 'यह', 'विकल्प', 'काफी', 'स्थान', 'है', '']


Example 4:
what were you saying that day?
['व', 'दिन', 'तुम', 'उस', 'दिन', 'कित', 'ने', 'उस', 'दिन', '-', 'क्या', '']




 28%|██▊       | 98/352 [00:07<00:19, 13.33it/s, epoch=43, loss=0.119] 


KeyboardInterrupt: 