In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import time
import re
import pickle
from tqdm import tqdm 
from sklearn.model_selection import train_test_split
import os

In [None]:
data=pd.read_excel('/content/gdrive/My Drive/data.xlsx')

In [None]:
data.columns

Index(['Headline', 'Short', 'Source ', 'Time ', 'Publish Date'], dtype='object')

In [None]:
del data['Source ']

In [None]:
del data['Time ']

In [None]:
del data['Publish Date']

In [None]:
data_=pd.DataFrame(data)

In [None]:
data=data_

In [None]:
data.head()

Unnamed: 0,Headline,Short
0,4 ex-bank officials booked for cheating bank o...,The CBI on Saturday booked four former officia...
1,Supreme Court to go paperless in 6 months: CJI,Chief Justice JS Khehar has said the Supreme C...
2,"At least 3 killed, 30 injured in blast in Sylh...","At least three people were killed, including a..."
3,Why has Reliance been barred from trading in f...,Mukesh Ambani-led Reliance Industries (RIL) wa...
4,Was stopped from entering my own studio at Tim...,TV news anchor Arnab Goswami has said he was t...


In [None]:
data.shape

(55104, 2)

In [None]:
summary=data['Headline']
document=data['Short']

In [None]:
document[75]

'IPS officer Himanshu Kumar was suspended on Saturday after he tweeted that seniors in the police department under the Adityanath government are targeting subordinates with Yadav surname. However, he later claimed that his comments were misinterpreted. After the UP police suspended Himanshu for indiscipline, he tweeted, &#34;Truth alone triumphs.&#34;'

In [None]:
summary[75]

'IPS officer suspended for tweeting against Yogi govt in UP'

In [None]:
document_processed=[]
summary_processed=[]

In [None]:
for i in document:
  result = re.sub('[\W]+', ' ', i) 
  result= '<start> ' + result +' <end>'
  document_processed.append(result)
for i in summary:
  result= re.sub('[\W]+', ' ',i)
  result= '<start> ' + result + ' <end>'
  summary_processed.append(result)

In [None]:
document[1]

'Chief Justice JS Khehar has said the Supreme Court will go paperless in six to seven months in a bid to save funds and make the judiciary eco-friendly. He further said the apex court will collect all the records electronically from the lower courts and the high courts so that there is no need to file hard copies.'

In [None]:
document_processed[1]

'<start> Chief Justice JS Khehar has said the Supreme Court will go paperless in six to seven months in a bid to save funds and make the judiciary eco friendly He further said the apex court will collect all the records electronically from the lower courts and the high courts so that there is no need to file hard copies  <end>'

In [None]:
document_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
document_tokenizer.fit_on_texts(document_processed)
document_tensor = document_tokenizer.texts_to_sequences(document_processed)

document_tensor = tf.keras.preprocessing.sequence.pad_sequences(document_tensor,
                                                         padding='post')

In [None]:
summary_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
summary_tokenizer.fit_on_texts(summary_processed)
summary_tensor = summary_tokenizer.texts_to_sequences(summary_processed)

summary_tensor = tf.keras.preprocessing.sequence.pad_sequences(summary_tensor,
                                                         padding='post')

In [None]:
# Creating training and validation sets using an 80-20 split
document_tensor_train, document_tensor_val, summary_tensor_train, summary_tensor_val = train_test_split(document_tensor, summary_tensor, test_size=0.2)

# Show length
print(len(document_tensor_train), len(summary_tensor_train), len(document_tensor_val), len(summary_tensor_val))

44083 44083 11021 11021


In [None]:
def get_angles(pos, i, d_model):
  angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
  return pos * angle_rates

In [None]:
def positional_encoding(position, d_model):
  angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                          np.arange(d_model)[np.newaxis, :],
                          d_model)
  
  # apply sin to even indices in the array; 2i
  angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
  
  # apply cos to odd indices in the array; 2i+1
  angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    
  pos_encoding = angle_rads[np.newaxis, ...]
    
  return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
pos_encoding = positional_encoding(50, 512)
print (pos_encoding.shape)

plt.pcolormesh(pos_encoding[0], cmap='RdBu')
plt.xlabel('Depth')
plt.xlim((0, 512))
plt.ylabel('Position')
plt.colorbar()
plt.show()

In [None]:
def create_padding_mask(seq):
  seq = tf.cast(tf.math.equal(seq, 0), tf.float32)
  
  # add extra dimensions to add the padding
  # to the attention logits.
  return seq[:, tf.newaxis, tf.newaxis, :]  # (batch_size, 1, 1, seq_len)

In [None]:
def create_look_ahead_mask(size):
  mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
  return mask  # (seq_len, seq_len)

In [None]:
def convert(lang, tensor):
  for t in tensor:
    if t!=0:
      print ("%d ----> %s" % (t, lang.index_word[t]))

In [None]:
print ("Input Language; index to word mapping")
convert(document_tokenizer, document_tensor_train[0])
print ()
#print ("Target Language; index to word mapping")
convert(summary_tokenizer, summary_tensor_train[0])

Input Language; index to word mapping
8 ----> <start>
166 ----> following
134 ----> bjp
3 ----> 39
12 ----> s
947 ----> victory
4 ----> in
1 ----> the
386 ----> uttar
179 ----> pradesh
318 ----> assembly
345 ----> elections
107 ----> party
569 ----> member
2360 ----> subramanian
1768 ----> swamy
17 ----> said
15 ----> that
28 ----> he
21 ----> will
392 ----> move
1 ----> the
354 ----> supreme
83 ----> court
10 ----> on
279 ----> march
405 ----> 21
13 ----> for
26 ----> an
1024 ----> early
1468 ----> hearing
5 ----> of
1 ----> the
712 ----> ram
8157 ----> mandir
219 ----> case
11 ----> 34
65 ----> i
4384 ----> expect
190 ----> now
66 ----> up
8596 ----> govt
21 ----> will
44 ----> not
6608 ----> oppose
443 ----> me
11 ----> 34
28 ----> he
171 ----> tweeted
1768 ----> swamy
36 ----> had
95 ----> earlier
17 ----> said
11 ----> 34
154 ----> there
16 ----> is
132 ----> only
68 ----> one
367 ----> place
5 ----> of
1269 ----> birth
5 ----> of
2328 ----> lord
712 ----> ram
6217 ----> ayodhya
1

array([   1,  810, 5393,   10,    4,  117,   33, 2734,    2,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0],
      dtype=int32)

In [None]:
BUFFER_SIZE = len(document_tensor_train)
BATCH_SIZE = 64
steps_per_epoch = len(document_tensor_train)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_inp_size = len(document_tokenizer.word_index)+1
vocab_tar_size = len(summary_tokenizer.word_index)+1

dataset = tf.data.Dataset.from_tensor_slices((document_tensor_train, summary_tensor_train)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [None]:
vocab_inp_size

68462

In [None]:
vocab_tar_size

28309

In [None]:
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 87]), TensorShape([64, 21]))

In [None]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.enc_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state = hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))

In [None]:
encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE)

# sample input
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

Encoder output shape: (batch size, sequence length, units) (64, 87, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)


In [None]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # query hidden state shape == (batch_size, hidden size)
    # query_with_time_axis shape == (batch_size, 1, hidden size)
    # values shape == (batch_size, max_len, hidden size)
    # we are doing this to broadcast addition along the time axis to calculate the score
    query_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    # the shape of the tensor before applying self.V is (batch_size, max_length, units)
    score = self.V(tf.nn.tanh(
        self.W1(query_with_time_axis) + self.W2(values)))

    # attention_weights shape == (batch_size, max_length, 1)
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [None]:
attention_layer = BahdanauAttention(10)
attention_result, attention_weights = attention_layer(sample_hidden, sample_output)

print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 87, 1)


In [None]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
    super(Decoder, self).__init__()
    self.batch_sz = batch_sz
    self.dec_units = dec_units
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(self.dec_units,
                                   return_sequences=True,
                                   return_state=True,
                                   recurrent_initializer='glorot_uniform')
    self.fc = tf.keras.layers.Dense(vocab_size)

    # used for attention
    self.attention = BahdanauAttention(self.dec_units)

  def call(self, x, hidden, enc_output):
    # enc_output shape == (batch_size, max_length, hidden_size)
    context_vector, attention_weights = self.attention(hidden, enc_output)

    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
    x = self.embedding(x)

    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    # passing the concatenated vector to the GRU
    output, state = self.gru(x)

    # output shape == (batch_size * 1, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # output shape == (batch_size, vocab)
    x = self.fc(output)

    return x, state, attention_weights

In [None]:
decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)),
                                      sample_hidden, sample_output)

print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

Decoder output shape: (batch_size, vocab size) (64, 28309)


In [None]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

In [None]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 encoder=encoder,
                                 decoder=decoder)

In [None]:
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0

  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([summary_tokenizer.word_index['<start>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [None]:
EPOCHS = 2

for epoch in tqdm(range(EPOCHS)):
  start = time.time()

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  for (batch, (inp, targ)) in tqdm(enumerate(dataset.take(steps_per_epoch))):
    batch_loss = train_step(inp, targ, enc_hidden)
    total_loss += batch_loss

    if batch % 100 == 0:
      print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))



  0%|          | 0/2 [00:00<?, ?it/s][A[A


0it [00:00, ?it/s][A[A[A


1it [01:01, 61.33s/it][A[A[A

Epoch 1 Batch 0 Loss 4.8891





2it [01:30, 51.76s/it][A[A[A


3it [02:00, 45.22s/it][A[A[A


4it [02:30, 40.58s/it][A[A[A


5it [03:00, 37.33s/it][A[A[A


6it [03:30, 35.11s/it][A[A[A


7it [03:59, 33.48s/it][A[A[A


8it [04:29, 32.39s/it][A[A[A


9it [04:59, 31.68s/it][A[A[A


10it [05:29, 31.07s/it][A[A[A


11it [05:59, 30.78s/it][A[A[A


12it [06:30, 30.93s/it][A[A[A


13it [07:02, 31.17s/it][A[A[A


14it [07:33, 31.13s/it][A[A[A


15it [08:04, 31.08s/it][A[A[A


16it [08:35, 31.04s/it][A[A[A


17it [09:06, 30.93s/it][A[A[A


18it [09:36, 30.75s/it][A[A[A


19it [10:09, 31.58s/it][A[A[A


20it [10:41, 31.47s/it][A[A[A


21it [11:11, 31.17s/it][A[A[A


22it [11:43, 31.26s/it][A[A[A


23it [12:14, 31.17s/it][A[A[A


24it [12:45, 31.22s/it][A[A[A


25it [13:15, 30.85s/it][A[A[A


26it [13:45, 30.71s/it][A[A[A


27it [14:15, 30.54s/it][A[A[A


28it [14:46, 30.42s/it][A[A[A


29it [15:15, 30.20s/it][A[A[A


30it [15:45, 30.12s/it][

Epoch 1 Batch 100 Loss 3.5252





102it [48:04, 26.10s/it][A[A[A


103it [48:31, 26.24s/it][A[A[A


104it [48:58, 26.56s/it][A[A[A


105it [49:24, 26.47s/it][A[A[A


106it [49:50, 26.27s/it][A[A[A


107it [50:16, 26.21s/it][A[A[A


108it [50:43, 26.54s/it][A[A[A


109it [51:10, 26.72s/it][A[A[A


110it [51:36, 26.44s/it][A[A[A


111it [52:02, 26.26s/it][A[A[A


112it [52:28, 26.05s/it][A[A[A


113it [52:53, 25.94s/it][A[A[A


114it [53:19, 25.94s/it][A[A[A


115it [53:45, 25.94s/it][A[A[A


116it [54:12, 26.12s/it][A[A[A


117it [54:38, 26.07s/it][A[A[A


118it [55:03, 25.93s/it][A[A[A


119it [55:30, 26.03s/it][A[A[A


120it [55:56, 26.16s/it][A[A[A


121it [56:22, 26.09s/it][A[A[A


122it [56:48, 26.01s/it][A[A[A


123it [57:13, 25.85s/it][A[A[A


124it [57:39, 25.71s/it][A[A[A


125it [58:04, 25.74s/it][A[A[A


126it [58:30, 25.60s/it][A[A[A


127it [58:56, 25.68s/it][A[A[A


128it [59:21, 25.62s/it][A[A[A


129it [59:47, 25.60s/it]

Epoch 1 Batch 200 Loss 3.5404





202it [1:35:22, 25.90s/it][A[A[A


203it [1:35:48, 25.80s/it][A[A[A


204it [1:36:14, 25.99s/it][A[A[A


205it [1:36:42, 26.68s/it][A[A[A


206it [1:37:13, 27.89s/it][A[A[A


207it [1:37:43, 28.55s/it][A[A[A


208it [1:38:13, 29.04s/it][A[A[A


209it [1:38:43, 29.38s/it][A[A[A


210it [1:39:14, 29.62s/it][A[A[A


211it [1:39:44, 29.90s/it][A[A[A


212it [1:40:14, 29.89s/it][A[A[A


213it [1:40:43, 29.73s/it][A[A[A


214it [1:41:11, 29.20s/it][A[A[A


215it [1:41:43, 30.09s/it][A[A[A


216it [1:42:14, 30.20s/it][A[A[A


217it [1:42:44, 30.19s/it][A[A[A


218it [1:43:14, 30.09s/it][A[A[A


219it [1:43:44, 30.13s/it][A[A[A


220it [1:44:15, 30.23s/it][A[A[A


221it [1:44:45, 30.18s/it][A[A[A


222it [1:45:14, 29.92s/it][A[A[A


223it [1:45:40, 28.70s/it][A[A[A


224it [1:46:06, 27.95s/it][A[A[A


225it [1:46:32, 27.32s/it][A[A[A


226it [1:46:58, 26.83s/it][A[A[A


227it [1:47:24, 26.64s/it][A[A[A


228it [1:

Epoch 1 Batch 300 Loss 3.3963





302it [2:20:10, 26.51s/it][A[A[A


303it [2:20:36, 26.37s/it][A[A[A


304it [2:21:01, 26.19s/it][A[A[A


305it [2:21:27, 26.09s/it][A[A[A


306it [2:21:53, 25.98s/it][A[A[A


307it [2:22:21, 26.59s/it][A[A[A


308it [2:22:47, 26.38s/it][A[A[A


309it [2:23:13, 26.20s/it][A[A[A


310it [2:23:38, 26.05s/it][A[A[A


311it [2:24:05, 26.20s/it][A[A[A


312it [2:24:32, 26.48s/it][A[A[A


313it [2:24:59, 26.62s/it][A[A[A


314it [2:25:26, 26.71s/it][A[A[A


315it [2:25:52, 26.41s/it][A[A[A


316it [2:26:18, 26.28s/it][A[A[A


317it [2:26:45, 26.47s/it][A[A[A


318it [2:27:11, 26.34s/it][A[A[A


319it [2:27:36, 26.17s/it][A[A[A


320it [2:28:02, 25.97s/it][A[A[A


321it [2:28:28, 25.93s/it][A[A[A


322it [2:28:54, 26.04s/it][A[A[A


323it [2:29:20, 25.93s/it][A[A[A


324it [2:29:46, 26.17s/it][A[A[A


325it [2:30:13, 26.29s/it][A[A[A


326it [2:30:39, 26.29s/it][A[A[A


327it [2:31:06, 26.46s/it][A[A[A


328it [2:

Epoch 1 Batch 400 Loss 3.4423





402it [3:03:43, 26.16s/it][A[A[A


403it [3:04:09, 25.96s/it][A[A[A


404it [3:04:34, 25.85s/it][A[A[A


405it [3:05:00, 25.72s/it][A[A[A


406it [3:05:25, 25.59s/it][A[A[A


407it [3:05:51, 25.77s/it][A[A[A


408it [3:06:18, 25.93s/it][A[A[A


409it [3:06:44, 26.20s/it][A[A[A


410it [3:07:11, 26.22s/it][A[A[A


411it [3:07:37, 26.24s/it][A[A[A


412it [3:08:03, 26.28s/it][A[A[A


413it [3:08:29, 26.23s/it][A[A[A


414it [3:08:56, 26.26s/it][A[A[A


415it [3:09:22, 26.25s/it][A[A[A


416it [3:09:48, 26.26s/it][A[A[A


417it [3:10:15, 26.27s/it][A[A[A


418it [3:10:41, 26.36s/it][A[A[A


419it [3:11:07, 26.28s/it][A[A[A


420it [3:11:33, 26.19s/it][A[A[A


421it [3:12:00, 26.29s/it][A[A[A


422it [3:12:26, 26.42s/it][A[A[A


423it [3:12:53, 26.46s/it][A[A[A


424it [3:13:23, 27.41s/it][A[A[A


425it [3:13:49, 27.03s/it][A[A[A


426it [3:14:15, 26.85s/it][A[A[A


427it [3:14:41, 26.56s/it][A[A[A


428it [3:

Epoch 1 Batch 500 Loss 3.2922





502it [3:47:08, 26.31s/it][A[A[A


503it [3:47:35, 26.36s/it][A[A[A


504it [3:48:01, 26.36s/it][A[A[A


505it [3:48:28, 26.49s/it][A[A[A


506it [3:48:55, 26.62s/it][A[A[A


507it [3:49:22, 26.77s/it][A[A[A


508it [3:49:48, 26.63s/it][A[A[A


509it [3:50:15, 26.64s/it][A[A[A


510it [3:50:42, 26.66s/it][A[A[A


511it [3:51:08, 26.51s/it][A[A[A


512it [3:51:34, 26.46s/it][A[A[A


513it [3:52:00, 26.38s/it][A[A[A


514it [3:52:27, 26.37s/it][A[A[A


515it [3:52:53, 26.32s/it][A[A[A


516it [3:53:19, 26.34s/it][A[A[A


517it [3:53:47, 26.83s/it][A[A[A


518it [3:54:14, 26.80s/it][A[A[A


519it [3:54:40, 26.55s/it][A[A[A


520it [3:55:06, 26.41s/it][A[A[A


521it [3:55:32, 26.28s/it][A[A[A


522it [3:55:58, 26.26s/it][A[A[A


523it [3:56:25, 26.34s/it][A[A[A


524it [3:56:51, 26.27s/it][A[A[A


525it [3:57:17, 26.21s/it][A[A[A


526it [3:57:43, 26.26s/it][A[A[A


527it [3:58:09, 26.22s/it][A[A[A


528it [3:

Epoch 1 Batch 600 Loss 3.3713





602it [4:30:58, 25.85s/it][A[A[A


603it [4:31:23, 25.80s/it][A[A[A


604it [4:31:49, 25.75s/it][A[A[A


605it [4:32:15, 25.77s/it][A[A[A


606it [4:32:40, 25.72s/it][A[A[A


607it [4:33:06, 25.69s/it][A[A[A


608it [4:33:32, 25.72s/it][A[A[A


609it [4:33:57, 25.71s/it][A[A[A


610it [4:34:25, 26.17s/it][A[A[A


611it [4:34:51, 26.19s/it][A[A[A


612it [4:35:17, 26.04s/it][A[A[A


613it [4:35:43, 26.05s/it][A[A[A


614it [4:36:09, 26.26s/it][A[A[A


615it [4:36:36, 26.26s/it][A[A[A


616it [4:37:02, 26.12s/it][A[A[A


617it [4:37:28, 26.12s/it][A[A[A


618it [4:37:53, 26.04s/it][A[A[A


619it [4:38:20, 26.05s/it][A[A[A


620it [4:38:45, 25.84s/it][A[A[A


621it [4:39:11, 25.85s/it][A[A[A


622it [4:39:36, 25.77s/it][A[A[A


623it [4:40:02, 25.75s/it][A[A[A


624it [4:40:28, 25.68s/it][A[A[A


625it [4:40:53, 25.69s/it][A[A[A


626it [4:41:19, 25.74s/it][A[A[A


627it [4:41:45, 25.75s/it][A[A[A


628it [4:

Epoch 1 Loss 3.4962
Time taken for 1 epoch 18504.95499444008 sec






1it [00:26, 26.50s/it][A[A[A

Epoch 2 Batch 0 Loss 3.1952





2it [00:52, 26.40s/it][A[A[A


3it [01:18, 26.35s/it][A[A[A


4it [01:44, 26.26s/it][A[A[A


5it [02:11, 26.36s/it][A[A[A


6it [02:37, 26.31s/it][A[A[A


7it [03:04, 26.35s/it][A[A[A


8it [03:30, 26.35s/it][A[A[A


9it [03:57, 26.41s/it][A[A[A


10it [04:23, 26.44s/it][A[A[A


11it [04:49, 26.42s/it][A[A[A


12it [05:16, 26.40s/it][A[A[A


13it [05:42, 26.36s/it][A[A[A


14it [06:08, 26.35s/it][A[A[A


15it [06:35, 26.30s/it][A[A[A


16it [07:03, 26.99s/it][A[A[A


17it [07:29, 26.67s/it][A[A[A


18it [07:56, 26.62s/it][A[A[A


19it [08:22, 26.57s/it][A[A[A


20it [08:48, 26.42s/it][A[A[A


21it [09:14, 26.32s/it][A[A[A


22it [09:41, 26.45s/it][A[A[A


23it [10:07, 26.44s/it][A[A[A


24it [10:34, 26.60s/it][A[A[A


25it [11:01, 26.48s/it][A[A[A


26it [11:27, 26.42s/it][A[A[A


27it [11:53, 26.43s/it][A[A[A


28it [12:20, 26.38s/it][A[A[A


29it [12:46, 26.39s/it][A[A[A


30it [13:12, 26.30s/it][

Epoch 2 Batch 100 Loss 3.2250





102it [44:27, 25.74s/it][A[A[A


103it [44:53, 25.63s/it][A[A[A


104it [45:18, 25.57s/it][A[A[A


105it [45:44, 25.75s/it][A[A[A


106it [46:11, 25.96s/it][A[A[A


107it [46:37, 26.13s/it][A[A[A


108it [47:03, 26.03s/it][A[A[A


109it [47:31, 26.66s/it][A[A[A