참고 url : https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb

In [None]:
!pip install transformers
!pip install sentencepiece==0.1.91

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 15.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 50.7 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 6.1 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 67.7 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 56.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Atte

transformers version = '4.12.5'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# model.generate(pieces)
from transformers import T5Config, T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader

In [None]:
model_folder = '/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_240000'

model = T5ForConditionalGeneration.from_pretrained(model_folder)
tokenizer = T5Tokenizer.from_pretrained(model_folder)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
class CustomDataset:

    def __init__(self, dataframe, tokenizer, source_len, summ_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = summ_len
        self.text = self.data.text
        self.ctext = self.data.ctext

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        ctext = str(self.ctext[index])
        ctext = ' '.join(ctext.split())

        text = str(self.text[index])
        text = ' '.join(text.split())

        source = self.tokenizer.batch_encode_plus([ctext], max_length= self.source_len, pad_to_max_length=True,return_tensors='pt')
        target = self.tokenizer.batch_encode_plus([text], max_length= self.summ_len, pad_to_max_length=True,return_tensors='pt')

        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()

        return {
            'source_ids': source_ids.to(dtype=torch.long), 
            'source_mask': source_mask.to(dtype=torch.long), 
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long)
        }

In [None]:
def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in tqdm(enumerate(loader, 0)):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]
        
        if _%10 == 0:
            pass
            
        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # xm.optimizer_step(optimizer)
        # xm.mark_step()

In [None]:
def validate(epoch, tokenizer, model, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
                input_ids = ids,
                attention_mask = mask, 
                max_length=150, 
                num_beams=2,
                repetition_penalty=2.5, 
                length_penalty=1.0, 
                early_stopping=True
                )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            if _%100==0:
                print(f'Completed {_}')

            predictions.extend(preds)
            actuals.extend(target)
    return predictions, actuals

In [None]:
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(45100, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(45100, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedGeluDense(
              (wi_0): Linear(in_features=768, out_features=3072, bias=False)
              (wi_1): Linear(in_features=768, out_features=3072, bias=False)
              (wo)

hyper-parameters

In [None]:
config = T5Config()
config.MAX_LEN = 1024
config.SUMMARY_LEN = 150 
config.TRAIN_BATCH_SIZE = 2       # input batch size for training (default: 64)
config.VALID_BATCH_SIZE = 2    # input batch size for testing (default: 1000)
config.TRAIN_EPOCHS = 8       # number of epochs to train (default: 10)
config.VAL_EPOCHS = 1
config.LEARNING_RATE = 1e-4    # learning rate (default: 0.01)
config.SEED = 42               # random seed (default: 42)

In [None]:
train_params = {
        'batch_size': config.TRAIN_BATCH_SIZE,
        'shuffle': True,
        'num_workers': 0
        }

val_params = {
        'batch_size': config.VALID_BATCH_SIZE,
        'shuffle': False,
        'num_workers': 0
        }

optimizer = torch.optim.Adam(params =  model.parameters(), lr=config.LEARNING_RATE)

dataset

In [None]:
import pandas as pd
train_dataset = pd.read_csv('/content/drive/MyDrive/3차 프로젝트/dataset/extract_data/validation_data.csv')[['document','label']].iloc[:30000]
validation_dataset = pd.read_csv('/content/drive/MyDrive/3차 프로젝트/dataset/valid.csv')[['document','label']].iloc[:500]

train

In [None]:
train_dataset.columns = ['ctext','text']
train_dataset.ctext = 'summarize: ' + train_dataset.ctext

training_set = CustomDataset(train_dataset, tokenizer, config.MAX_LEN, config.SUMMARY_LEN)
training_loader = DataLoader(training_set, **train_params)


for epoch in range(config.TRAIN_EPOCHS):
    print (1)
    train(epoch, tokenizer, model, device, training_loader, optimizer)
    tokenizer.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_270000/{}'.format(epoch))
    model.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_270000/{}'.format(epoch))

1


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Loss:  1.5236687660217285


501it [02:41,  3.11it/s]

Epoch: 0, Loss:  3.332335948944092


1001it [05:22,  3.11it/s]

Epoch: 0, Loss:  2.8985884189605713


1501it [08:03,  3.10it/s]

Epoch: 0, Loss:  4.068318843841553


2001it [10:44,  3.09it/s]

Epoch: 0, Loss:  2.196030378341675


2501it [13:25,  3.10it/s]

Epoch: 0, Loss:  1.263974666595459


3001it [16:06,  3.10it/s]

Epoch: 0, Loss:  1.8085156679153442


3501it [18:47,  3.11it/s]

Epoch: 0, Loss:  2.829256772994995


4001it [21:28,  3.12it/s]

Epoch: 0, Loss:  2.63930606842041


4501it [24:09,  3.08it/s]

Epoch: 0, Loss:  3.6409337520599365


5001it [26:50,  3.11it/s]

Epoch: 0, Loss:  3.360675573348999


5501it [29:30,  3.10it/s]

Epoch: 0, Loss:  2.6464767456054688


6001it [32:11,  3.09it/s]

Epoch: 0, Loss:  2.5452325344085693


6501it [34:52,  3.13it/s]

Epoch: 0, Loss:  2.3160104751586914


7001it [37:32,  3.10it/s]

Epoch: 0, Loss:  1.8138662576675415


7501it [40:13,  3.11it/s]

Epoch: 0, Loss:  1.252880334854126


8001it [42:54,  3.12it/s]

Epoch: 0, Loss:  2.0886170864105225


8501it [45:34,  3.08it/s]

Epoch: 0, Loss:  2.242990732192993


9001it [48:15,  3.11it/s]

Epoch: 0, Loss:  4.015856742858887


9501it [50:56,  3.11it/s]

Epoch: 0, Loss:  1.6141210794448853


10001it [53:37,  3.11it/s]

Epoch: 0, Loss:  1.2503284215927124


10501it [56:18,  3.09it/s]

Epoch: 0, Loss:  1.026455044746399


11001it [59:01,  3.08it/s]

Epoch: 0, Loss:  1.025941252708435


11501it [1:01:43,  3.09it/s]

Epoch: 0, Loss:  1.842098355293274


12001it [1:04:25,  3.09it/s]

Epoch: 0, Loss:  1.5726850032806396


12501it [1:07:08,  3.08it/s]

Epoch: 0, Loss:  1.769033432006836


13001it [1:09:49,  3.08it/s]

Epoch: 0, Loss:  1.9995089769363403


13501it [1:12:31,  3.13it/s]

Epoch: 0, Loss:  2.7896487712860107


14001it [1:15:11,  3.10it/s]

Epoch: 0, Loss:  2.616948366165161


14501it [1:17:52,  3.12it/s]

Epoch: 0, Loss:  0.7009639143943787


15000it [1:20:32,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 1, Loss:  1.7850430011749268


501it [02:41,  3.11it/s]

Epoch: 1, Loss:  0.9382373094558716


1001it [05:22,  3.11it/s]

Epoch: 1, Loss:  1.1279655694961548


1501it [08:03,  3.10it/s]

Epoch: 1, Loss:  1.7322584390640259


2001it [10:43,  3.12it/s]

Epoch: 1, Loss:  0.7277222275733948


2501it [13:24,  3.11it/s]

Epoch: 1, Loss:  1.3674854040145874


3001it [16:05,  3.11it/s]

Epoch: 1, Loss:  1.6099257469177246


3501it [18:46,  3.09it/s]

Epoch: 1, Loss:  0.832453727722168


4001it [21:26,  3.12it/s]

Epoch: 1, Loss:  0.9061333537101746


4501it [24:07,  3.11it/s]

Epoch: 1, Loss:  1.6439059972763062


5001it [26:48,  3.11it/s]

Epoch: 1, Loss:  1.9437239170074463


5501it [29:29,  3.10it/s]

Epoch: 1, Loss:  2.0924484729766846


6001it [32:10,  3.08it/s]

Epoch: 1, Loss:  1.325053334236145


6501it [34:51,  3.11it/s]

Epoch: 1, Loss:  1.4402800798416138


7001it [37:32,  3.10it/s]

Epoch: 1, Loss:  2.0560641288757324


7501it [40:13,  3.12it/s]

Epoch: 1, Loss:  1.0527945756912231


8001it [42:54,  3.11it/s]

Epoch: 1, Loss:  0.5582066774368286


8501it [45:35,  3.11it/s]

Epoch: 1, Loss:  0.9075061082839966


9001it [48:16,  3.12it/s]

Epoch: 1, Loss:  0.9256030321121216


9501it [50:56,  3.11it/s]

Epoch: 1, Loss:  0.8187376260757446


10001it [53:38,  3.09it/s]

Epoch: 1, Loss:  0.6862515807151794


10501it [56:18,  3.11it/s]

Epoch: 1, Loss:  1.5048259496688843


11001it [58:59,  3.08it/s]

Epoch: 1, Loss:  1.540167212486267


11501it [1:01:40,  3.12it/s]

Epoch: 1, Loss:  1.2370625734329224


12001it [1:04:21,  3.12it/s]

Epoch: 1, Loss:  1.4193646907806396


12501it [1:07:02,  3.11it/s]

Epoch: 1, Loss:  1.8024801015853882


13001it [1:09:43,  3.10it/s]

Epoch: 1, Loss:  1.7334750890731812


13501it [1:12:23,  3.10it/s]

Epoch: 1, Loss:  0.8414886593818665


14001it [1:15:04,  3.11it/s]

Epoch: 1, Loss:  1.2741121053695679


14501it [1:17:45,  3.12it/s]

Epoch: 1, Loss:  0.9330969452857971


15000it [1:20:26,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 2, Loss:  1.3286571502685547


501it [02:41,  3.08it/s]

Epoch: 2, Loss:  0.7404791712760925


1001it [05:22,  3.07it/s]

Epoch: 2, Loss:  1.1986958980560303


1501it [08:03,  3.11it/s]

Epoch: 2, Loss:  0.5556076765060425


2001it [10:44,  3.11it/s]

Epoch: 2, Loss:  0.934173047542572


2501it [13:24,  3.11it/s]

Epoch: 2, Loss:  0.8761917352676392


3001it [16:05,  3.09it/s]

Epoch: 2, Loss:  0.6737422347068787


3501it [18:45,  3.11it/s]

Epoch: 2, Loss:  0.5489234924316406


4001it [21:26,  3.11it/s]

Epoch: 2, Loss:  1.0253723859786987


4501it [24:07,  3.12it/s]

Epoch: 2, Loss:  1.0508779287338257


5001it [26:48,  3.11it/s]

Epoch: 2, Loss:  0.6380648016929626


5501it [29:29,  3.10it/s]

Epoch: 2, Loss:  1.3644440174102783


6001it [32:10,  3.12it/s]

Epoch: 2, Loss:  1.0832453966140747


6501it [34:51,  3.10it/s]

Epoch: 2, Loss:  0.8236770033836365


7001it [37:31,  3.11it/s]

Epoch: 2, Loss:  0.9290841221809387


7501it [40:12,  3.12it/s]

Epoch: 2, Loss:  1.0024380683898926


8001it [42:53,  3.10it/s]

Epoch: 2, Loss:  0.7929015755653381


8501it [45:34,  3.13it/s]

Epoch: 2, Loss:  0.7764785885810852


9001it [48:14,  3.12it/s]

Epoch: 2, Loss:  0.9977505207061768


9501it [50:55,  3.11it/s]

Epoch: 2, Loss:  1.2823666334152222


10001it [53:36,  3.08it/s]

Epoch: 2, Loss:  1.1720837354660034


10501it [56:17,  3.09it/s]

Epoch: 2, Loss:  1.3028854131698608


11001it [58:58,  3.09it/s]

Epoch: 2, Loss:  0.8985819816589355


11501it [1:01:39,  3.04it/s]

Epoch: 2, Loss:  1.1798566579818726


12001it [1:04:19,  3.12it/s]

Epoch: 2, Loss:  0.9113016128540039


12501it [1:07:01,  3.08it/s]

Epoch: 2, Loss:  1.5774154663085938


13001it [1:09:43,  3.08it/s]

Epoch: 2, Loss:  0.6381633281707764


13501it [1:12:26,  3.07it/s]

Epoch: 2, Loss:  0.9507613778114319


14001it [1:15:08,  3.07it/s]

Epoch: 2, Loss:  1.2943142652511597


14501it [1:17:50,  3.09it/s]

Epoch: 2, Loss:  0.5311521291732788


15000it [1:20:30,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 3, Loss:  0.7157273292541504


501it [02:41,  3.12it/s]

Epoch: 3, Loss:  0.27429378032684326


1001it [05:21,  3.12it/s]

Epoch: 3, Loss:  0.5952837467193604


1501it [08:02,  3.11it/s]

Epoch: 3, Loss:  0.6624022126197815


2001it [10:43,  3.10it/s]

Epoch: 3, Loss:  1.1841483116149902


2501it [13:25,  3.09it/s]

Epoch: 3, Loss:  0.5141281485557556


3001it [16:05,  3.12it/s]

Epoch: 3, Loss:  0.46672677993774414


3501it [18:47,  3.12it/s]

Epoch: 3, Loss:  0.7487919330596924


4001it [21:29,  3.12it/s]

Epoch: 3, Loss:  0.29059597849845886


4501it [24:10,  3.09it/s]

Epoch: 3, Loss:  0.7440437078475952


5001it [26:51,  3.10it/s]

Epoch: 3, Loss:  0.5474783778190613


5501it [29:32,  3.10it/s]

Epoch: 3, Loss:  0.7274226546287537


6001it [32:13,  3.10it/s]

Epoch: 3, Loss:  0.7821522355079651


6501it [34:54,  3.08it/s]

Epoch: 3, Loss:  0.6041014790534973


7001it [37:35,  3.11it/s]

Epoch: 3, Loss:  0.7021522521972656


7501it [40:15,  3.11it/s]

Epoch: 3, Loss:  0.5099661350250244


8001it [42:56,  3.11it/s]

Epoch: 3, Loss:  0.46178415417671204


8501it [45:37,  3.11it/s]

Epoch: 3, Loss:  0.2869635224342346


9001it [48:18,  3.11it/s]

Epoch: 3, Loss:  1.2066675424575806


9501it [50:59,  3.11it/s]

Epoch: 3, Loss:  0.5655546188354492


10001it [53:40,  3.11it/s]

Epoch: 3, Loss:  0.7995823621749878


10501it [56:21,  3.12it/s]

Epoch: 3, Loss:  0.7631953358650208


11001it [59:02,  3.09it/s]

Epoch: 3, Loss:  0.6544004082679749


11501it [1:01:43,  3.12it/s]

Epoch: 3, Loss:  0.5654216408729553


12001it [1:04:24,  3.08it/s]

Epoch: 3, Loss:  0.7241672873497009


12501it [1:07:04,  3.06it/s]

Epoch: 3, Loss:  1.2614123821258545


13001it [1:09:44,  3.12it/s]

Epoch: 3, Loss:  0.6990433931350708


13501it [1:12:25,  3.09it/s]

Epoch: 3, Loss:  0.9008471369743347


14001it [1:15:06,  3.08it/s]

Epoch: 3, Loss:  0.5486735701560974


14501it [1:17:47,  3.08it/s]

Epoch: 3, Loss:  1.1068716049194336


15000it [1:20:28,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 4, Loss:  0.553309440612793


501it [02:41,  3.10it/s]

Epoch: 4, Loss:  0.39556920528411865


1001it [05:22,  3.12it/s]

Epoch: 4, Loss:  0.5178959369659424


1501it [08:03,  3.10it/s]

Epoch: 4, Loss:  0.22274990379810333


2001it [10:43,  3.12it/s]

Epoch: 4, Loss:  0.1613885909318924


2501it [13:24,  3.11it/s]

Epoch: 4, Loss:  0.6068313717842102


3001it [16:05,  3.11it/s]

Epoch: 4, Loss:  0.6459349393844604


3501it [18:46,  3.09it/s]

Epoch: 4, Loss:  0.40102049708366394


4001it [21:26,  3.10it/s]

Epoch: 4, Loss:  0.3739315867424011


4501it [24:07,  3.11it/s]

Epoch: 4, Loss:  0.4687759280204773


5001it [26:48,  3.06it/s]

Epoch: 4, Loss:  0.4661426246166229


5501it [29:29,  3.07it/s]

Epoch: 4, Loss:  0.418777197599411


6001it [32:10,  3.11it/s]

Epoch: 4, Loss:  0.35251444578170776


6501it [34:51,  3.09it/s]

Epoch: 4, Loss:  0.3221701979637146


7001it [37:32,  3.10it/s]

Epoch: 4, Loss:  0.4203118085861206


7501it [40:13,  3.08it/s]

Epoch: 4, Loss:  0.3403947055339813


8001it [42:55,  3.10it/s]

Epoch: 4, Loss:  0.41524913907051086


8501it [45:36,  3.12it/s]

Epoch: 4, Loss:  0.4792528748512268


9001it [48:17,  3.11it/s]

Epoch: 4, Loss:  0.6170259118080139


9501it [50:58,  3.11it/s]

Epoch: 4, Loss:  0.7576828598976135


10001it [53:39,  3.10it/s]

Epoch: 4, Loss:  0.30929121375083923


10501it [56:20,  3.08it/s]

Epoch: 4, Loss:  0.3911148011684418


11001it [59:01,  3.12it/s]

Epoch: 4, Loss:  0.47629526257514954


11501it [1:01:42,  3.11it/s]

Epoch: 4, Loss:  0.5125318765640259


12001it [1:04:23,  3.11it/s]

Epoch: 4, Loss:  0.48405182361602783


12501it [1:07:04,  3.08it/s]

Epoch: 4, Loss:  0.2703123390674591


13001it [1:09:45,  3.11it/s]

Epoch: 4, Loss:  0.8215668797492981


13501it [1:12:26,  3.05it/s]

Epoch: 4, Loss:  0.6029293537139893


14001it [1:15:08,  3.07it/s]

Epoch: 4, Loss:  0.6545106172561646


14501it [1:17:51,  3.05it/s]

Epoch: 4, Loss:  0.6633605360984802


15000it [1:20:33,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 5, Loss:  0.22653013467788696


501it [02:43,  3.08it/s]

Epoch: 5, Loss:  0.31699085235595703


1001it [05:25,  3.09it/s]

Epoch: 5, Loss:  0.40170884132385254


1501it [08:07,  3.06it/s]

Epoch: 5, Loss:  0.23482146859169006


2001it [10:48,  3.11it/s]

Epoch: 5, Loss:  0.28319641947746277


2501it [13:29,  3.10it/s]

Epoch: 5, Loss:  0.37629228830337524


3001it [16:11,  3.10it/s]

Epoch: 5, Loss:  0.3280159533023834


3501it [18:52,  3.11it/s]

Epoch: 5, Loss:  0.5974920988082886


4001it [21:33,  3.12it/s]

Epoch: 5, Loss:  0.10247848182916641


4501it [24:14,  3.10it/s]

Epoch: 5, Loss:  0.17368800938129425


5001it [26:55,  3.09it/s]

Epoch: 5, Loss:  0.26269498467445374


5501it [29:36,  3.12it/s]

Epoch: 5, Loss:  0.5866430997848511


6001it [32:17,  3.09it/s]

Epoch: 5, Loss:  0.31949517130851746


6501it [34:58,  3.10it/s]

Epoch: 5, Loss:  0.35064369440078735


7001it [37:39,  3.10it/s]

Epoch: 5, Loss:  0.2615039646625519


7501it [40:20,  3.11it/s]

Epoch: 5, Loss:  0.23280107975006104


8001it [43:01,  3.09it/s]

Epoch: 5, Loss:  0.36035406589508057


8501it [45:42,  3.10it/s]

Epoch: 5, Loss:  0.1710204929113388


9001it [48:23,  3.12it/s]

Epoch: 5, Loss:  0.44056436419487


9501it [51:04,  3.12it/s]

Epoch: 5, Loss:  0.6573999524116516


10001it [53:45,  3.09it/s]

Epoch: 5, Loss:  0.5594220757484436


10501it [56:26,  3.12it/s]

Epoch: 5, Loss:  0.49362656474113464


11001it [59:06,  3.10it/s]

Epoch: 5, Loss:  0.3739760220050812


11501it [1:01:47,  3.12it/s]

Epoch: 5, Loss:  0.43863487243652344


12001it [1:04:28,  3.11it/s]

Epoch: 5, Loss:  0.7076170444488525


12501it [1:07:09,  3.10it/s]

Epoch: 5, Loss:  0.41047629714012146


13001it [1:09:50,  3.10it/s]

Epoch: 5, Loss:  0.19617478549480438


13501it [1:12:31,  3.11it/s]

Epoch: 5, Loss:  0.2886471152305603


14001it [1:15:12,  3.11it/s]

Epoch: 5, Loss:  0.4517686665058136


14501it [1:17:52,  3.11it/s]

Epoch: 5, Loss:  0.2901334762573242


15000it [1:20:33,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 6, Loss:  0.1994551122188568


501it [02:41,  3.11it/s]

Epoch: 6, Loss:  0.1214165911078453


1001it [05:22,  3.11it/s]

Epoch: 6, Loss:  0.3169296383857727


1501it [08:03,  3.09it/s]

Epoch: 6, Loss:  0.2781614661216736


2001it [10:43,  3.11it/s]

Epoch: 6, Loss:  0.3606007397174835


2501it [13:24,  3.12it/s]

Epoch: 6, Loss:  0.18869376182556152


3001it [16:04,  3.12it/s]

Epoch: 6, Loss:  0.4989849030971527


3501it [18:45,  3.12it/s]

Epoch: 6, Loss:  0.13562537729740143


4001it [21:26,  3.11it/s]

Epoch: 6, Loss:  0.2333647459745407


4501it [24:07,  3.11it/s]

Epoch: 6, Loss:  0.793557345867157


5001it [26:48,  3.10it/s]

Epoch: 6, Loss:  0.16532528400421143


5501it [29:29,  3.12it/s]

Epoch: 6, Loss:  0.4117276966571808


6001it [32:09,  3.05it/s]

Epoch: 6, Loss:  0.2587834298610687


6501it [34:50,  3.12it/s]

Epoch: 6, Loss:  0.2946995794773102


7001it [37:30,  3.08it/s]

Epoch: 6, Loss:  0.1564488410949707


7501it [40:11,  3.09it/s]

Epoch: 6, Loss:  0.3530784845352173


8001it [42:52,  3.12it/s]

Epoch: 6, Loss:  0.2209458351135254


8501it [45:32,  3.13it/s]

Epoch: 6, Loss:  0.2905611991882324


9001it [48:13,  3.11it/s]

Epoch: 6, Loss:  0.39572572708129883


9501it [50:53,  3.12it/s]

Epoch: 6, Loss:  0.32308429479599


10001it [53:34,  3.10it/s]

Epoch: 6, Loss:  0.4442180097103119


10501it [56:15,  3.08it/s]

Epoch: 6, Loss:  0.31406375765800476


11001it [58:56,  3.08it/s]

Epoch: 6, Loss:  0.22483348846435547


11501it [1:01:36,  3.08it/s]

Epoch: 6, Loss:  0.31179502606391907


12001it [1:04:17,  3.11it/s]

Epoch: 6, Loss:  0.18244598805904388


12501it [1:06:58,  3.09it/s]

Epoch: 6, Loss:  0.33337900042533875


13001it [1:09:39,  3.10it/s]

Epoch: 6, Loss:  0.27522820234298706


13501it [1:12:20,  3.12it/s]

Epoch: 6, Loss:  0.5756389498710632


14001it [1:15:00,  3.12it/s]

Epoch: 6, Loss:  0.5084565281867981


14501it [1:17:41,  3.09it/s]

Epoch: 6, Loss:  0.1371391862630844


15000it [1:20:22,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 7, Loss:  0.20719966292381287


501it [02:41,  3.11it/s]

Epoch: 7, Loss:  0.3060154616832733


1001it [05:22,  3.09it/s]

Epoch: 7, Loss:  0.26412370800971985


1501it [08:04,  3.10it/s]

Epoch: 7, Loss:  0.32056599855422974


2001it [10:46,  3.07it/s]

Epoch: 7, Loss:  0.21488922834396362


2501it [13:28,  3.08it/s]

Epoch: 7, Loss:  0.19973832368850708


3001it [16:10,  3.08it/s]

Epoch: 7, Loss:  0.15044258534908295


3501it [18:51,  3.11it/s]

Epoch: 7, Loss:  0.3923029601573944


4001it [21:31,  3.13it/s]

Epoch: 7, Loss:  0.1746978759765625


4501it [24:11,  3.10it/s]

Epoch: 7, Loss:  0.2906644940376282


5001it [26:52,  3.11it/s]

Epoch: 7, Loss:  0.21682310104370117


5501it [29:33,  3.12it/s]

Epoch: 7, Loss:  0.09197615832090378


6001it [32:14,  3.07it/s]

Epoch: 7, Loss:  0.3062528073787689


6501it [34:56,  3.09it/s]

Epoch: 7, Loss:  0.3518678843975067


7001it [37:37,  3.11it/s]

Epoch: 7, Loss:  0.18053127825260162


7501it [40:18,  3.09it/s]

Epoch: 7, Loss:  0.25624755024909973


8001it [42:59,  3.11it/s]

Epoch: 7, Loss:  0.3156527280807495


8501it [45:39,  3.10it/s]

Epoch: 7, Loss:  0.3190787434577942


9001it [48:21,  3.10it/s]

Epoch: 7, Loss:  0.27617183327674866


9501it [51:02,  3.06it/s]

Epoch: 7, Loss:  0.22753731906414032


10001it [53:43,  3.12it/s]

Epoch: 7, Loss:  0.3326137363910675


10501it [56:24,  3.10it/s]

Epoch: 7, Loss:  0.31630194187164307


11001it [59:05,  3.11it/s]

Epoch: 7, Loss:  0.7295558452606201


11501it [1:01:46,  3.06it/s]

Epoch: 7, Loss:  0.5603418350219727


12001it [1:04:27,  3.09it/s]

Epoch: 7, Loss:  0.3119117319583893


12501it [1:07:08,  3.12it/s]

Epoch: 7, Loss:  0.31219807267189026


13001it [1:09:49,  3.11it/s]

Epoch: 7, Loss:  0.33651384711265564


13501it [1:12:31,  3.11it/s]

Epoch: 7, Loss:  0.5169351696968079


14001it [1:15:12,  3.10it/s]

Epoch: 7, Loss:  0.40531450510025024


14501it [1:17:53,  3.12it/s]

Epoch: 7, Loss:  0.31074434518814087


15000it [1:20:33,  3.10it/s]


In [None]:
tokenizer.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_270000/')
model.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_270000/')

test

In [None]:
validation_dataset.columns = ['ctext','text']
validation_dataset.ctext = 'summarize: ' + validation_dataset.ctext

val_set = CustomDataset(validation_dataset, tokenizer, config.MAX_LEN, config.SUMMARY_LEN)

val_loader = DataLoader(val_set, **val_params)

for epoch in range(config.VAL_EPOCHS):
    predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
    final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})

final_df.to_csv('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/final_df_train270000(8).csv')



Completed 0
Completed 100
Completed 200
