In [1]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import torch
import logging
logging.basicConfig(level=logging.ERROR)
# If there's a GPU available...
if torch.cuda.is_available():

    # Tell PyTorch to use the GPU.
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [2]:
!pip install transformers



In [3]:
from transformers import AutoTokenizer,AutoModel

# Load the BERT tokenizer.
print('Loading FLANT5 tokenizer...')
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base', do_lower_case=True)

Loading FLANT5 tokenizer...


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [4]:
import json
with open("/kaggle/input/training/train_claims_quantemp_bm25.json") as f:
  train_data = json.load(f)
len(train_data
    )

9935

In [5]:
train_data[-1]

{'claim': 'FAKE:  Commandos from &#8220;Berkut&#8221; who refused to kneel have been burned alive in Lviv',
 'label': 'False',
 'doc': "head injuries are rising dramatically--about 1.7 million people have a tbi each year. millions of americans are alive today who have had a head injury and now ...these settlements underwent periodical acts of destruction and re-creation, as they were burned and then rebuilt every 6080 years. ... have been consistently ...the corral fire was a wildfire that burned from november 24 until november 27, 2007 in the malibu creek state park. the fire, which burned 4,901 acres (20 ...the only animals skinned alive are skinned alive by peta, greenpeace and ... fact check: was this 7-month-old white fox skinned alive for lady gaga's fur ...the american dream is alive and well in this memoir of a muslim immigrant from india who arrived planning to start a business, working so hard toward his ...",
 'retrieved_evidence': ['head injuries are rising dramatically--ab

In [6]:
import json
with open("/kaggle/input/validation/val_claims_quantemp_bm25.json") as f:
  val_data = json.load(f)
len(val_data
    )

3084

In [7]:
from sklearn.preprocessing import LabelEncoder
LE = LabelEncoder()

In [8]:
def get_features(data):
  features = []
  evidences = []

  for index, fact in enumerate(data):
    claim = fact["claim"]


    feature = "[Claim]:"+claim+"[Evidences]:"+".".join(fact["retrieved_evidence"])
    features.append(feature)
  return features




In [9]:
train_features = get_features(train_data)

In [10]:
len(train_features)

9935

In [11]:
train_features[0]

"[Claim]:In her budget speech, Nirmala Sitharaman claimed that the Government distributed 35,000 crore LED bulbs in the country.[Evidences]:5 gen 2022  the power ministry has distributed 36.78 crore led lights under the ujala programme in seven years, which saved 47778 million units of ....5 gen 2023  state-owned energy efficiency services ltd (eesl) on thursday said it has distributed over 36.8 crore led bulbs under the government's unnat ....5 gen 2022  ujala completes 7 years of energy-efficient and affordable led distribution. 36.78 crore leds distributed across the country under ujala ujala ....5 gen 2022  with ujala, the cost of led bulbs has come down by 85 per cent. this ... for distribution of led bulbs under the ujala programme, it stated..nov 22, 2013  from incandescent bulbs to fluorescents to leds, we're exploring the long history of the light bulb."

In [12]:
val_features = get_features(val_data)

In [13]:
len(val_features)

3084

In [14]:
val_features[2]

'[Claim]:Says Dino Rossi "stripped" health care "from 45,000 children."[Evidences]:... seniors can be charged five times what anybody else is charged for health care for pre-existing conditions" under the republican health care bill. the ....state and local nondiscrimination laws prohibit health care discrimination against transgender people in many circumstances. ... health care to non-binary people ....spent on these beneficiaries, on average, according to medpac. this overall health care spending trends: higher health care price and cost.south carolina and across the country is health care costs themselves. single-payer health care system. kucinich: i actually introduced the.services provided at the primary health care level, are in fact, a shifting ... ayushman bharat: comprehensive primary health care through health and wellness ...96 pages'

In [15]:
train_labels = [fact["label"] for fact in train_data]
val_labels = [fact["label"] for fact in val_data]


In [16]:
train_labels_final = LE.fit_transform(train_labels)
train_labels_final

array([1, 1, 1, ..., 0, 0, 1])

In [17]:
train_labels_final[:20]

array([1, 1, 1, 1, 1, 0, 1, 2, 1, 1, 1, 1, 1, 0, 1, 2, 2, 1, 1, 0])

In [18]:
val_labels_final = LE.transform(val_labels)
val_labels_final

array([1, 1, 2, ..., 1, 0, 2])

In [19]:
val_data[-1]

{'claim': 'The Biden administration "published a study concluding 4 (of) 5 new cars on the road by 2050 will still require liquid fuels."',
 'label': 'True',
 'doc': "5 ott 2020  on my second visit to the site of the former huanan seafood wholesale market, at the intersection of new china road and development road, ...5 avr. 2012  africa's urban population will increase from 414 million to over 1.2 billion by 2050 ... india and china. also, the 2011 revision, for the first ...5 apr 2023  the state, heavily dependent on the auto industry, is a case study in whether electric vehicles will create or destroy jobs.5 apr 2021  a new space jam movie is coming, but the original still remains a classic. the promotional site for space jam still exists, and has remained ...5 set 2023  new jersey will raise its gasoline tax by about a penny per gallon next month, gov. phil murphy's administration said friday.",
 'retrieved_evidence': ['5 ott 2020  on my second visit to the site of the former huana

In [20]:
len(val_labels_final)

3084

In [21]:
input_ids = []
attention_masks = []

for sent in train_features:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 256,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        truncation=True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )

    # Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])

    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])
# Convert the lists into tensors.
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)


# Print sentence 0, now as a list of IDs.
print('Original: ', train_features[0])
print('Token IDs:', input_ids[0])



Original:  [Claim]:In her budget speech, Nirmala Sitharaman claimed that the Government distributed 35,000 crore LED bulbs in the country.[Evidences]:5 gen 2022  the power ministry has distributed 36.78 crore led lights under the ujala programme in seven years, which saved 47778 million units of ....5 gen 2023  state-owned energy efficiency services ltd (eesl) on thursday said it has distributed over 36.8 crore led bulbs under the government's unnat ....5 gen 2022  ujala completes 7 years of energy-efficient and affordable led distribution. 36.78 crore leds distributed across the country under ujala ujala ....5 gen 2022  with ujala, the cost of led bulbs has come down by 85 per cent. this ... for distribution of led bulbs under the ujala programme, it stated..nov 22, 2013  from incandescent bulbs to fluorescents to leds, we're exploring the long history of the light bulb.
Token IDs: tensor([  784,   254,   521,   603,   908,    10,  1570,   160,  1487,  5023,
            6,  2504,    5

In [22]:
val_input_ids = []
val_attention_masks = []

for sent in val_features:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
                        sent,                      # Sentence to encode.
                        add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                        max_length = 256,           # Pad & truncate all sentences.
                        pad_to_max_length = True,
                        truncation=True,
                        return_attention_mask = True,   # Construct attn. masks.
                        return_tensors = 'pt',     # Return pytorch tensors.
                   )

    # Add the encoded sentence to the list.
    val_input_ids.append(encoded_dict['input_ids'])

    # And its attention mask (simply differentiates padding from non-padding).
    val_attention_masks.append(encoded_dict['attention_mask'])
# Convert the lists into tensors.
val_input_ids = torch.cat(val_input_ids, dim=0)
val_attention_masks = torch.cat(val_attention_masks, dim=0)


# Print sentence 0, now as a list of IDs.
print('Original: ', val_features[0])
print('Token IDs:', val_attention_masks[0])

Original:  [Claim]:Amit Shah said Narendra Modi sleeps for 24 hours for the welfare of the poor.[Evidences]:mar 23, 2022  he sleeps for 3.5 to 4 hours every day. he sleeps late at around 12 o clock and gets up early around 4 o clock. yet, he is so fresh and energetic throughout the ....mar 20, 2020  the study found that the virus is viable for up to 72 hours on plastics, 48 hours on stainless steel, 24 hours on cardboard, and 4 hours on ....march 12, 2013: the obama administration issued a statement of administration policy on h.r. 890, noting that ultimately, no states formally applied for state waivers of welfare work requirements. friday, july 12, 2013 marks one year since the obama administration first declared that it had the authority to waive the work requirements for welfare recipients. work requirements were created in the 1996 welfare reform law, and they have been key to the success of welfare reform in increasing work and earnings and reducing poverty and welfare dependence

In [23]:
train_labels_final = torch.tensor(train_labels_final)
val_labels_final = torch.tensor(val_labels_final)

In [24]:
val_labels_final.shape

torch.Size([3084])

In [25]:
len(val_input_ids)

3084

In [26]:
num_classes = len(list(set(train_labels)))
list(set(train_labels))

['True', 'False', 'Conflicting']

In [27]:
num_classes

3

In [28]:
from torch.utils.data import TensorDataset, random_split
# train_poincare_tensor = torch.tensor(poincare_embeddings_final,dtype=torch.float)
# difficulty_tensor = torch.tensor(difficulty_level_vectors,dtype=torch.float)
# Combine the training inputs into a TensorDataset.
dataset = TensorDataset(input_ids, attention_masks, train_labels_final)
val_dataset = TensorDataset(val_input_ids, val_attention_masks,val_labels_final)
#

In [29]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
batch_size = 16
train_dataloader = DataLoader(
            dataset,  # The training samples.
            sampler = RandomSampler(dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset),
            batch_size = batch_size
        )

In [30]:
from torch import nn
from transformers import T5EncoderModel
class MultiClassClassifier(nn.Module):
    def __init__(self, t5_model_path, labels_count, hidden_dim=768, mlp_dim=500, extras_dim=100, dropout=0.1, freeze_t5=False):
        super().__init__()

        self.t5_encoder = T5EncoderModel.from_pretrained(t5_model_path)
        self.dropout = nn.Dropout(dropout)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_dim, mlp_dim),
            nn.ReLU(),
            nn.Linear(mlp_dim, labels_count)
        )
        if freeze_t5:
            print("Freezing T5 layers")
            for param in self.t5_encoder.parameters():
                param.requires_grad = False

# TODO:    There might be errors here
    def forward(self, tokens, masks):
        output = self.t5_encoder(input_ids=tokens, attention_mask=masks)
        encoder_hidden_states = output.last_hidden_state
        pooled_output = encoder_hidden_states[:, 0]  # Taking the representation of the [CLS] token
        dropout_output = self.dropout(pooled_output)
        mlp_output = self.mlp(dropout_output)

        return mlp_output

In [31]:
from transformers import BertForSequenceClassification, AdamW, BertConfig

# Loads BertForSequenceClassification, the pretrained BERT model with a single
model = MultiClassClassifier('google/flan-t5-base',num_classes, 768 ,500,100,dropout=0.1,freeze_t5=False)

# model.load_state_dict(torch.load("model_bert_difficulty_prediction/model_weights"))

# Tell pytorch to run this model on the GPU.
model.cuda()


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

MultiClassClassifier(
  (t5_encoder): T5EncoderModel(
    (shared): Embedding(32128, 768)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32128, 768)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=768, out_features=768, bias=False)
                (k): Linear(in_features=768, out_features=768, bias=False)
                (v): Linear(in_features=768, out_features=768, bias=False)
                (o): Linear(in_features=768, out_features=768, bias=False)
                (relative_attention_bias): Embedding(32, 12)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseGatedActDense(
                (wi_0): Linear(in_features=768, out_features=2048, bias=False)
                (wi_1): Linear

In [32]:
optimizer = AdamW(model.parameters(),
                  lr = 2e-5,
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )



In [33]:
from transformers import get_linear_schedule_with_warmup


epochs = 20

# Total number of training steps is [number of batches] x [number of epochs].
total_steps = len(train_dataloader) * epochs



In [34]:
len(train_dataloader)

621

In [35]:
1935 * 32

61920

In [36]:
scheduler = get_linear_schedule_with_warmup(optimizer,
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

In [37]:
import numpy as np

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [38]:
import time
import datetime

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))

    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [39]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


In [40]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [41]:
# for param in model.t5_encoder.encoder.layer[0:5].parameters():
#     param.requires_grad=False

In [42]:
loss_func = nn.CrossEntropyLoss()


In [43]:
import random
import numpy as np

# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 42

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store a number of quantities such as training and validation loss,
# validation accuracy, and timings.
training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()
early_stopping = EarlyStopping(patience=2, verbose=True)
# For each epoch...
for epoch_i in range(0, epochs):

    # ========================================
    #               Training
    # ========================================

    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_accuracy = 0
    total_train_loss = 0

    # Put the model into training mode. Don't be mislead--the call to
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questimport gensim.downloader as api
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)

            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        # b_poincare = batch[2].to(device)
        # b_difficulty = batch[3].to(device)
        b_labels = batch[2].to(device)
        # skill_labels = batch[3].to(device)

        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically because
        # accumulating the gradients is "convenient while training RNNs".
        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()

        # Perform a forward pass (evaluate the model on this training batch).
        probas = model(b_input_ids,b_input_mask)

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value
        # from the tensor.
        loss = loss_func(probas, b_labels)
        total_train_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        # scheduler.step()
        logits = probas.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        total_train_accuracy += flat_accuracy(logits, label_ids)
    avg_train_accuracy = total_train_accuracy / len(train_dataloader)
    print(" Train Accuracy: {0:.2f}".format(avg_train_accuracy))

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)



    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)

        b_input_mask = batch[1].to(device)
        # b_poincare = batch[2].to(device)
        # b_difficulty = batch[3].to(device)
        b_labels = batch[2].to(device)
        # skill_labels = batch[3].to(device)

        # Tell pytorch not to bother with constructing the compute graph during
        # the forward pass, since this is only needed for backprop (training).
        with torch.no_grad():

            # Forward pass, calculate logit predictions.

          logits = model(b_input_ids,b_input_mask)

        # Accumulate the validation loss.
        loss = loss_func(logits, b_labels)
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        total_eval_accuracy += flat_accuracy(logits, label_ids)


    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    early_stopping(avg_val_loss, model)
    if early_stopping.early_stop:
      print("Early stopping")
      break
    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)

    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))
    output_dir = "/kaggle/working/model_t5/"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print("Saving model to %s" % output_dir)
    tokenizer.save_pretrained(output_dir)
    torch.save(model.state_dict(), os.path.join(output_dir, 'model_weights'))

#     !rm -rf "/kaggle/working/models"
#     !mv model_roberta_large_oracle "/content/drive/My Drive/ecir_compnumfacts/"
    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...
  Batch    40  of    621.    Elapsed: 0:00:20.
  Batch    80  of    621.    Elapsed: 0:00:40.
  Batch   120  of    621.    Elapsed: 0:00:59.
  Batch   160  of    621.    Elapsed: 0:01:18.
  Batch   200  of    621.    Elapsed: 0:01:37.
  Batch   240  of    621.    Elapsed: 0:01:56.
  Batch   280  of    621.    Elapsed: 0:02:16.
  Batch   320  of    621.    Elapsed: 0:02:35.
  Batch   360  of    621.    Elapsed: 0:02:54.
  Batch   400  of    621.    Elapsed: 0:03:13.
  Batch   440  of    621.    Elapsed: 0:03:32.
  Batch   480  of    621.    Elapsed: 0:03:52.
  Batch   520  of    621.    Elapsed: 0:04:11.
  Batch   560  of    621.    Elapsed: 0:04:30.
  Batch   600  of    621.    Elapsed: 0:04:49.
 Train Accuracy: 0.59

  Average training loss: 0.91
  Training epcoh took: 0:04:59

Running Validation...
  Accuracy: 0.58
Validation loss decreased (inf --> 0.851258).  Saving model ...
  Validation Loss: 0.85
  Validation took: 0:00:30
Saving model to /kaggle/working/model_t5/



In [44]:
# from google.colab import drive
# drive.mount('/content/drive')

In [45]:
# from huggingface_hub import login

In [46]:
# login()

In [47]:
# LE.classes_