
# **Install libraries**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd "/content/drive/MyDrive/T5_paraphrasing_reference_code"

/content/drive/MyDrive/T5_paraphrasing_reference_code


In [None]:
!pip install sentencepiece

Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |▎                               | 10kB 23.9MB/s eta 0:00:01[K     |▌                               | 20kB 25.7MB/s eta 0:00:01[K     |▉                               | 30kB 23.5MB/s eta 0:00:01[K     |█                               | 40kB 17.5MB/s eta 0:00:01[K     |█▍                              | 51kB 14.9MB/s eta 0:00:01[K     |█▋                              | 61kB 16.9MB/s eta 0:00:01[K     |██                              | 71kB 13.5MB/s eta 0:00:01[K     |██▏                             | 81kB 13.1MB/s eta 0:00:01[K     |██▌                             | 92kB 13.8MB/s eta 0:00:01[K     |██▊                             | 102kB 13.7MB/s eta 0:00:01[K     |███                             | 112kB 13.7MB/s eta 0:00:01[K     |███▎        

In [None]:
!pip install torch==1.4.0
!pip install transformers==2.9.0
!pip install pytorch_lightning==0.7.5

Collecting torch==1.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/1a/3b/fa92ece1e58a6a48ec598bab327f39d69808133e5b2fb33002ca754e381e/torch-1.4.0-cp37-cp37m-manylinux1_x86_64.whl (753.4MB)
[K     |████████████████████████████████| 753.4MB 16kB/s 
[31mERROR: torchvision 0.9.1+cu101 has requirement torch==1.8.1, but you'll have torch 1.4.0 which is incompatible.[0m
[31mERROR: torchtext 0.9.1 has requirement torch==1.8.1, but you'll have torch 1.4.0 which is incompatible.[0m
[?25hInstalling collected packages: torch
  Found existing installation: torch 1.8.1+cu101
    Uninstalling torch-1.8.1+cu101:
      Successfully uninstalled torch-1.8.1+cu101
Successfully installed torch-1.4.0
Collecting transformers==2.9.0
[?25l  Downloading https://files.pythonhosted.org/packages/cd/38/c9527aa055241c66c4d785381eaf6f80a28c224cae97daa1f8b183b5fabb/transformers-2.9.0-py3-none-any.whl (635kB)
[K     |████████████████████████████████| 645kB 14.0MB/s 
Collecting sacremoses
[?25l

In [None]:
# Check we have a GPU and check the memory size of the GUP
!nvidia-smi -L

GPU 0: Tesla V100-SXM2-16GB (UUID: GPU-88849243-7e8b-c203-2354-c0f39881bc63)


# **Import packages**

In [None]:
import argparse
import glob
import os
import json
import time
import logging
import random
import re
from itertools import chain
from string import punctuation

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl

from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


INFO:transformers.file_utils:PyTorch version 1.4.0 available.
INFO:transformers.file_utils:TensorFlow version 2.4.1 available.


# **Set a seed**

In [None]:
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)

set_seed(42)

# **T5FineTuner**

In [None]:
class T5FineTuner(pl.LightningModule):
    def __init__(self, hparams):
        super(T5FineTuner, self).__init__()
        self.hparams = hparams

        self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)
        self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)

    def is_logger(self):
        return True #self.trainer.proc_rank <= 0

    def forward(
            self, input_ids, attention_mask=None, decoder_input_ids=None, decoder_attention_mask=None, lm_labels=None
    ):
        return self.model(
            input_ids,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            decoder_attention_mask=decoder_attention_mask,
            lm_labels=lm_labels,
        )

    def _step(self, batch):
        lm_labels = batch["target_ids"]
        lm_labels[lm_labels[:, :] == self.tokenizer.pad_token_id] = -100

        outputs = self(
            input_ids=batch["source_ids"],
            attention_mask=batch["source_mask"],
            lm_labels=lm_labels,
            decoder_attention_mask=batch['target_mask']
        )

        loss = outputs[0]

        return loss

    def training_step(self, batch, batch_idx):
        loss = self._step(batch)

        tensorboard_logs = {"train_loss": loss}
        return {"loss": loss, "log": tensorboard_logs}

    def training_epoch_end(self, outputs):
        avg_train_loss = torch.stack([x["loss"] for x in outputs]).mean()
        tensorboard_logs = {"avg_train_loss": avg_train_loss}
        return {"avg_train_loss": avg_train_loss, "log": tensorboard_logs, 'progress_bar': tensorboard_logs}

    def validation_step(self, batch, batch_idx):
        loss = self._step(batch)
        return {"val_loss": loss}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        tensorboard_logs = {"val_loss": avg_loss}
        return {"avg_val_loss": avg_loss, "log": tensorboard_logs, 'progress_bar': tensorboard_logs}

    def configure_optimizers(self):
        "Prepare optimizer and schedule (linear warmup and decay)"

        model = self.model
        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
                "weight_decay": self.hparams.weight_decay,
            },
            {
                "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=self.hparams.learning_rate, eps=self.hparams.adam_epsilon)
        self.opt = optimizer
        return [optimizer]

    def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None, on_tpu=False, using_native_amp=False, using_lbfgs=False):
        if self.trainer.use_tpu:
            xm.optimizer_step(optimizer)
        else:
            optimizer.step()
        optimizer.zero_grad()
        self.lr_scheduler.step()

    def get_tqdm_dict(self):
        tqdm_dict = {"loss": "{:.3f}".format(self.trainer.avg_loss), "lr": self.lr_scheduler.get_last_lr()[-1]}

        return tqdm_dict

    def train_dataloader(self):
        train_dataset = get_dataset(tokenizer=self.tokenizer, type_path="train", args=self.hparams)
        dataloader = DataLoader(train_dataset, batch_size=self.hparams.train_batch_size, drop_last=True, shuffle=True,
                                num_workers=4)
        t_total = (
                (len(dataloader.dataset) // (self.hparams.train_batch_size * max(1, self.hparams.n_gpu)))
                // self.hparams.gradient_accumulation_steps
                * float(self.hparams.num_train_epochs)
        )
        scheduler = get_linear_schedule_with_warmup(
            self.opt, num_warmup_steps=self.hparams.warmup_steps, num_training_steps=t_total
        )
        self.lr_scheduler = scheduler
        return dataloader

    def val_dataloader(self):
        val_dataset = get_dataset(tokenizer=self.tokenizer, type_path="dev", args=self.hparams)
        return DataLoader(val_dataset, batch_size=self.hparams.eval_batch_size, num_workers=4)

logger = logging.getLogger(__name__)

class LoggingCallback(pl.Callback):
  def on_validation_end(self, trainer, pl_module):
    logger.info("***** Validation results *****")
    if pl_module.is_logger():
      metrics = trainer.callback_metrics
      # Log results
      for key in sorted(metrics):
        if key not in ["log", "progress_bar"]:
          logger.info("{} = {}\n".format(key, str(metrics[key])))

  def on_test_end(self, trainer, pl_module):
    logger.info("***** Test results *****")

    if pl_module.is_logger():
      metrics = trainer.callback_metrics

      # Log and save results to file
      output_test_results_file = os.path.join(pl_module.hparams.output_dir, "test_results.txt")
      with open(output_test_results_file, "w") as writer:
        for key in sorted(metrics):
          if key not in ["log", "progress_bar"]:
            logger.info("{} = {}\n".format(key, str(metrics[key])))
            writer.write("{} = {}\n".format(key, str(metrics[key])))

# **Load datasets**

In [None]:
data_train = pd.read_csv("data/train.tsv", sep="\t")#.astype(str)
data_train.head()

Unnamed: 0.1,Unnamed: 0,a,b,c,sentence1,sentence2,e,f,g,h
0,0,S > NP VP .,"S > SBAR , NP VP .",False,Mr. Whetstone is goingto speak to you after I ...,"after I'm done, Mr. Whetstone will be speaking.","(0.3, 0.375, 0.33333333333333326)","(-0.33333333333333337, False)",2,"('en', 'en')"
1,1,S > NP VP .,S > RB S VP .,False,"I guess it's up to me to save this family, then.",so saving my family is on me.,"(0.18181818181818182, 0.2857142857142857, 0.22...","(-1.0, False)",2,"('en', 'en')"
2,2,S > NP VP .,S > NP VP .,True,It'll speed distribution when we get the lines...,we would expedite such distributions when we f...,"(0.2222222222222222, 0.2222222222222222, 0.222...","(-1.0, False)",0,"('en', 'en')"
3,3,S > NP VP .,S > NP VP .,True,I'm less than 15 minutes away from Matobo's ho...,I'll be with Matoby in fifteen minutes.,"(0.1111111111111111, 0.14285714285714285, 0.125)","(-1.0, True)",0,"('en', 'en')"
4,4,"S > SBAR , NP VP .","S > SBAR , NP VP .",True,"If it'd exist, I'd free it.","if he really existed, I would have freed him.","(0.16666666666666666, 0.1111111111111111, 0.13...","(-1.0, True)",0,"('en', 'en')"


In [None]:
data_dev = pd.read_csv("data/dev.tsv", sep="\t")#.astype(str)
#data_dev_1 = data_dev[data_dev['label']==1]

In [None]:
print('Training data: ', data_train.shape)
print('Validation data: ', data_dev.shape)

Training data:  (73062, 10)
Validation data:  (1492, 10)


# **Set arguments**

In [None]:
args_dict = dict(
    data_dir="data", # path for data files
    output_dir="t5_paraphrase_M2", # path to save the checkpoints
    model_name_or_path='t5-large',
    tokenizer_name_or_path='t5-large',
    max_seq_length=256,
    learning_rate=3e-4,
    weight_decay=0.0,
    adam_epsilon=1e-8,
    warmup_steps=0,
    train_batch_size=1,
    eval_batch_size=1,
    num_train_epochs=3,
    gradient_accumulation_steps=16,
    n_gpu=1,
    early_stop_callback=False,
    fp_16=False, # if you want to enable 16-bit training then install apex and set this to true
    opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties
    max_grad_norm=1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default
    seed=42,
)

train_path = "data/train.tsv"
val_path = "data/dev.tsv"

train = pd.read_csv(train_path, sep="\t").astype(str)
print(train.head())

tokenizer = T5Tokenizer.from_pretrained('t5-large')

  Unnamed: 0                   a  ...  g             h
0          0         S > NP VP .  ...  2  ('en', 'en')
1          1         S > NP VP .  ...  2  ('en', 'en')
2          2         S > NP VP .  ...  0  ('en', 'en')
3          3         S > NP VP .  ...  0  ('en', 'en')
4          4  S > SBAR , NP VP .  ...  0  ('en', 'en')

[5 rows x 10 columns]


INFO:filelock:Lock 140473723411792 acquired on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock
INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpkj0pio49


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…

INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model in cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f
INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f
INFO:filelock:Lock 140473723411792 released on /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f.lock
INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f




# **ParaphraseDataset()**

In [None]:
class ParaphraseDataset(Dataset):
    def __init__(self, tokenizer, data_dir, type_path, max_len=512):
        self.path = os.path.join(data_dir, type_path + '.tsv')

        self.source_column = "sentence1"
        self.target_column = "sentence2"
        self.data = pd.read_csv(self.path, sep="\t").astype(str)

        self.max_len = max_len
        self.tokenizer = tokenizer
        self.inputs = []
        self.targets = []

        self._build()

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        source_ids = self.inputs[index]["input_ids"].squeeze()
        target_ids = self.targets[index]["input_ids"].squeeze()

        src_mask = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
        target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

        return {"source_ids": source_ids, "source_mask": src_mask, "target_ids": target_ids, "target_mask": target_mask}

    def _build(self):
        for idx in range(len(self.data)):
            input_, target = self.data.loc[idx, self.source_column], self.data.loc[idx, self.target_column]

            input_ = "paraphrase: "+ input_ + ' </s>'
            target = target + " </s>"

            # tokenize inputs
            tokenized_inputs = self.tokenizer.batch_encode_plus(
                [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors="pt", truncation='longest_first'
            )
            # tokenize targets
            tokenized_targets = self.tokenizer.batch_encode_plus(
                [target], max_length=self.max_len, pad_to_max_length=True, return_tensors="pt", truncation='longest_first'
            )

            self.inputs.append(tokenized_inputs)
            self.targets.append(tokenized_targets)

# **Start training**

In [None]:
dataset = ParaphraseDataset(tokenizer, 'data', 'dev', 256)
print("Val dataset: ",len(dataset))

data = dataset[61]
print(tokenizer.decode(data['source_ids']))
print(tokenizer.decode(data['target_ids']))

if not os.path.exists('t5_paraphrase_M2'):
    os.makedirs('t5_paraphrase_M2')

args_dict.update({'data_dir': 'data', 'output_dir': 't5_paraphrase_M2', 'num_train_epochs':3,'max_seq_length':256})
args = argparse.Namespace(**args_dict)
print(args_dict)

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    filepath=args.output_dir, prefix="checkpoint", monitor="val_loss", mode="min", save_top_k=5
)

train_params = dict(
    accumulate_grad_batches=args.gradient_accumulation_steps,
    gpus=args.n_gpu,
    max_epochs=args.num_train_epochs,
 #   early_stop_callback=False,
    precision= 16 if args.fp_16 else 32,
    amp_level=args.opt_level,
    gradient_clip_val=args.max_grad_norm,
    checkpoint_callback=checkpoint_callback,
    callbacks=[LoggingCallback()],
)

def get_dataset(tokenizer, type_path, args):
  return ParaphraseDataset(tokenizer=tokenizer, data_dir=args.data_dir, type_path=type_path,  max_len=args.max_seq_length)

print ("Initialize model")
model = T5FineTuner(args)

trainer = pl.Trainer(**train_params)

print (" Training model")
trainer.fit(model)

print ("training finished")

print ("Saving model")
model.model.save_pretrained('t5_paraphrase_M2')

print ("Model saved")

!cp "/content/t5_paraphrase_M2/" -a "/content/drive/My Drive/"
!cp "/content/lightning_logs/" -a "/content/drive/My Drive/"
print ("Copied the final folder to Google Drive")

Val dataset:  1492
paraphrase: It made a hard, sickening sound when it struck the floor.
when he hit the ground, he made an unpleasant, dull sound.
{'data_dir': 'data', 'output_dir': 't5_paraphrase', 'model_name_or_path': 't5-large', 'tokenizer_name_or_path': 't5-large', 'max_seq_length': 256, 'learning_rate': 0.0003, 'weight_decay': 0.0, 'adam_epsilon': 1e-08, 'warmup_steps': 0, 'train_batch_size': 1, 'eval_batch_size': 1, 'num_train_epochs': 3, 'gradient_accumulation_steps': 16, 'n_gpu': 1, 'early_stop_callback': False, 'fp_16': False, 'opt_level': 'O1', 'max_grad_norm': 1.0, 'seed': 42}
Initialize model


INFO:filelock:Lock 140473702933776 acquired on /root/.cache/torch/transformers/0e9978f992c9b90cd05d080648b1b1c8aabc3f931f62781fa8fcbc281eba168d.ba29edd8b0c069c672abbe0f807c1cd7cac52350f14d193ba0a0ef5cdb9a255e.lock
INFO:transformers.file_utils:https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmpuu7icmcz


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1200.0, style=ProgressStyle(description…

INFO:transformers.file_utils:storing https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json in cache at /root/.cache/torch/transformers/0e9978f992c9b90cd05d080648b1b1c8aabc3f931f62781fa8fcbc281eba168d.ba29edd8b0c069c672abbe0f807c1cd7cac52350f14d193ba0a0ef5cdb9a255e
INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/0e9978f992c9b90cd05d080648b1b1c8aabc3f931f62781fa8fcbc281eba168d.ba29edd8b0c069c672abbe0f807c1cd7cac52350f14d193ba0a0ef5cdb9a255e
INFO:filelock:Lock 140473702933776 released on /root/.cache/torch/transformers/0e9978f992c9b90cd05d080648b1b1c8aabc3f931f62781fa8fcbc281eba168d.ba29edd8b0c069c672abbe0f807c1cd7cac52350f14d193ba0a0ef5cdb9a255e.lock
INFO:transformers.configuration_utils:loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/t5-large-config.json from cache at /root/.cache/torch/transformers/0e9978f992c9b90cd05d080648b1b1c8aabc3f931f62781fa8fcbc281eba168d.ba29edd8b0c069c672abbe0f807c1cd7cac




INFO:filelock:Lock 140473703328016 acquired on /root/.cache/torch/transformers/e47fdf946478fcd76239a89ab1db1545af6261da0f9be758eb538a22de9553fc.f7406fdda08cdd666e1b81685deafd24a40ba2d5579384751f9f7023254ffb5b.lock
INFO:transformers.file_utils:https://cdn.huggingface.co/t5-large-pytorch_model.bin not found in cache or force_download set to True, downloading to /root/.cache/torch/transformers/tmp3v1evqtj


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2950825948.0, style=ProgressStyle(descr…

INFO:transformers.file_utils:storing https://cdn.huggingface.co/t5-large-pytorch_model.bin in cache at /root/.cache/torch/transformers/e47fdf946478fcd76239a89ab1db1545af6261da0f9be758eb538a22de9553fc.f7406fdda08cdd666e1b81685deafd24a40ba2d5579384751f9f7023254ffb5b
INFO:transformers.file_utils:creating metadata file for /root/.cache/torch/transformers/e47fdf946478fcd76239a89ab1db1545af6261da0f9be758eb538a22de9553fc.f7406fdda08cdd666e1b81685deafd24a40ba2d5579384751f9f7023254ffb5b
INFO:filelock:Lock 140473703328016 released on /root/.cache/torch/transformers/e47fdf946478fcd76239a89ab1db1545af6261da0f9be758eb538a22de9553fc.f7406fdda08cdd666e1b81685deafd24a40ba2d5579384751f9f7023254ffb5b.lock
INFO:transformers.modeling_utils:loading weights file https://cdn.huggingface.co/t5-large-pytorch_model.bin from cache at /root/.cache/torch/transformers/e47fdf946478fcd76239a89ab1db1545af6261da0f9be758eb538a22de9553fc.f7406fdda08cdd666e1b81685deafd24a40ba2d5579384751f9f7023254ffb5b





INFO:transformers.modeling_utils:Weights of T5ForConditionalGeneration not initialized from pretrained model: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']
INFO:transformers.tokenization_utils:loading file https://s3.amazonaws.com/models.huggingface.co/bert/t5-spiece.model from cache at /root/.cache/torch/transformers/68f1b8dbca4350743bb54b8c4169fd38cbabaad564f85a9239337a8d0342af9f.9995af32582a1a7062cb3173c118cb7b4636fa03feb967340f20fc37406f021f
INFO:lightning:GPU available: True, used: True
INFO:lightning:CUDA_VISIBLE_DEVICES: [0]


 Training model


INFO:lightning:
     | Name                                                                  | Type                       | Params
-----------------------------------------------------------------------------------------------------------------
0    | model                                                                 | T5ForConditionalGeneration | 737 M 
1    | model.shared                                                          | Embedding                  | 32 M  
2    | model.encoder                                                         | T5Stack                    | 334 M 
3    | model.encoder.block                                                   | ModuleList                 | 302 M 
4    | model.encoder.block.0                                                 | T5Block                    | 12 M  
5    | model.encoder.block.0.layer                                           | ModuleList                 | 12 M  
6    | model.encoder.block.0.layer.0                             

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

INFO:__main__:***** Validation results *****
INFO:__main__:avg_val_loss = tensor(1.3903, device='cuda:0')

INFO:__main__:loss = tensor(1.6659, device='cuda:0')

INFO:__main__:train_loss = tensor(1.6659, device='cuda:0')

INFO:__main__:val_loss = tensor(1.3903, device='cuda:0')



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

INFO:__main__:***** Validation results *****
INFO:__main__:avg_train_loss = tensor(1.5628, device='cuda:0')

INFO:__main__:avg_val_loss = tensor(1.3654, device='cuda:0')

INFO:__main__:epoch = 0

INFO:__main__:loss = tensor(0.7336, device='cuda:0')

INFO:__main__:train_loss = tensor(0.7336, device='cuda:0')

INFO:__main__:val_loss = tensor(1.3654, device='cuda:0')



HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

INFO:__main__:***** Validation results *****
INFO:__main__:avg_train_loss = tensor(1.1044, device='cuda:0')

INFO:__main__:avg_val_loss = tensor(1.4444, device='cuda:0')

INFO:__main__:epoch = 1

INFO:__main__:loss = tensor(0.2213, device='cuda:0')

INFO:__main__:train_loss = tensor(0.2213, device='cuda:0')

INFO:__main__:val_loss = tensor(1.4444, device='cuda:0')

INFO:transformers.configuration_utils:Configuration saved in t5_paraphrase/config.json



training finished
Saving model


INFO:transformers.modeling_utils:Model weights saved in t5_paraphrase/pytorch_model.bin


Model saved
cp: cannot stat '/content/t5_paraphrase/': No such file or directory
cp: cannot stat '/content/lightning_logs/': No such file or directory
Copied the final folder to Google Drive
