참고 url : https://github.com/abhimishra91/transformers-tutorials/blob/master/transformers_summarization_wandb.ipynb

In [None]:
!pip install transformers
!pip install sentencepiece==0.1.91

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 15.1 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 75.0 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.1.2-py3-none-any.whl (59 kB)
[K     |████████████████████████████████| 59 kB 7.2 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 93.1 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 80.2 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Atte

transformers version = '4.12.5'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# model.generate(pieces)
from transformers import T5Config, T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader

In [None]:
model_folder = '/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_180000'

model = T5ForConditionalGeneration.from_pretrained(model_folder)
tokenizer = T5Tokenizer.from_pretrained(model_folder)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
class CustomDataset:

    def __init__(self, dataframe, tokenizer, source_len, summ_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = summ_len
        self.text = self.data.text
        self.ctext = self.data.ctext

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        ctext = str(self.ctext[index])
        ctext = ' '.join(ctext.split())

        text = str(self.text[index])
        text = ' '.join(text.split())

        source = self.tokenizer.batch_encode_plus([ctext], max_length= self.source_len, pad_to_max_length=True,return_tensors='pt')
        target = self.tokenizer.batch_encode_plus([text], max_length= self.summ_len, pad_to_max_length=True,return_tensors='pt')

        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()

        return {
            'source_ids': source_ids.to(dtype=torch.long), 
            'source_mask': source_mask.to(dtype=torch.long), 
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long)
        }

In [None]:
def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in tqdm(enumerate(loader, 0)):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]
        
        if _%10 == 0:
            pass
            
        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # xm.optimizer_step(optimizer)
        # xm.mark_step()

In [None]:
def validate(epoch, tokenizer, model, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
                input_ids = ids,
                attention_mask = mask, 
                max_length=150, 
                num_beams=2,
                repetition_penalty=2.5, 
                length_penalty=1.0, 
                early_stopping=True
                )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            if _%100==0:
                print(f'Completed {_}')

            predictions.extend(preds)
            actuals.extend(target)
    return predictions, actuals

In [None]:
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(45100, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(45100, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedGeluDense(
              (wi_0): Linear(in_features=768, out_features=3072, bias=False)
              (wi_1): Linear(in_features=768, out_features=3072, bias=False)
              (wo)

hyper-parameters

In [None]:
config = T5Config()
config.MAX_LEN = 1024
config.SUMMARY_LEN = 150 
config.TRAIN_BATCH_SIZE = 2       # input batch size for training (default: 64)
config.VALID_BATCH_SIZE = 2    # input batch size for testing (default: 1000)
config.TRAIN_EPOCHS = 8       # number of epochs to train (default: 10)
config.VAL_EPOCHS = 1
config.LEARNING_RATE = 1e-4    # learning rate (default: 0.01)
config.SEED = 42               # random seed (default: 42)

In [None]:
train_params = {
        'batch_size': config.TRAIN_BATCH_SIZE,
        'shuffle': True,
        'num_workers': 0
        }

val_params = {
        'batch_size': config.VALID_BATCH_SIZE,
        'shuffle': False,
        'num_workers': 0
        }

optimizer = torch.optim.Adam(params =  model.parameters(), lr=config.LEARNING_RATE)

dataset

In [None]:
import pandas as pd
train_dataset = pd.read_csv('/content/drive/MyDrive/3차 프로젝트/dataset/train.csv')[['document','label']].iloc[180000:240000]
validation_dataset = pd.read_csv('/content/drive/MyDrive/3차 프로젝트/dataset/valid.csv')[['document','label']].iloc[:500]

train

In [None]:
train_dataset.columns = ['ctext','text']
train_dataset.ctext = 'summarize: ' + train_dataset.ctext

training_set = CustomDataset(train_dataset, tokenizer, config.MAX_LEN, config.SUMMARY_LEN)
training_loader = DataLoader(training_set, **train_params)


for epoch in range(config.TRAIN_EPOCHS):
    print (1)
    train(epoch, tokenizer, model, device, training_loader, optimizer)
    tokenizer.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_240000/{}'.format(epoch))
    model.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_240000/{}'.format(epoch))

1


0it [00:00, ?it/s]Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Epoch: 0, Loss:  2.7956812381744385


501it [02:41,  3.09it/s]

Epoch: 0, Loss:  2.5159385204315186


1001it [05:22,  3.11it/s]

Epoch: 0, Loss:  2.1523854732513428


1501it [08:03,  3.09it/s]

Epoch: 0, Loss:  0.9482724070549011


2001it [10:44,  3.10it/s]

Epoch: 0, Loss:  3.469827175140381


2501it [13:25,  3.10it/s]

Epoch: 0, Loss:  1.435721755027771


3001it [16:06,  3.11it/s]

Epoch: 0, Loss:  1.1680275201797485


3501it [18:47,  3.10it/s]

Epoch: 0, Loss:  3.6756670475006104


4001it [21:27,  3.10it/s]

Epoch: 0, Loss:  2.8124852180480957


4501it [24:08,  3.10it/s]

Epoch: 0, Loss:  3.4577229022979736


5001it [26:49,  3.10it/s]

Epoch: 0, Loss:  2.5418527126312256


5501it [29:30,  3.11it/s]

Epoch: 0, Loss:  1.4582209587097168


6001it [32:11,  3.10it/s]

Epoch: 0, Loss:  1.8036681413650513


6501it [34:52,  3.09it/s]

Epoch: 0, Loss:  1.7646421194076538


7001it [37:33,  3.10it/s]

Epoch: 0, Loss:  2.17376708984375


7501it [40:14,  3.11it/s]

Epoch: 0, Loss:  2.638796806335449


8001it [42:55,  3.11it/s]

Epoch: 0, Loss:  1.0689356327056885


8501it [45:36,  3.11it/s]

Epoch: 0, Loss:  0.929603636264801


9001it [48:17,  3.12it/s]

Epoch: 0, Loss:  1.0394753217697144


9501it [50:57,  3.10it/s]

Epoch: 0, Loss:  1.850478172302246


10001it [53:38,  3.11it/s]

Epoch: 0, Loss:  1.7601763010025024


10501it [56:19,  3.09it/s]

Epoch: 0, Loss:  2.025336742401123


11001it [59:00,  3.11it/s]

Epoch: 0, Loss:  0.8179364204406738


11501it [1:01:41,  3.11it/s]

Epoch: 0, Loss:  1.7285693883895874


12001it [1:04:22,  3.10it/s]

Epoch: 0, Loss:  1.5902973413467407


12501it [1:07:03,  3.10it/s]

Epoch: 0, Loss:  1.7614890336990356


13001it [1:09:44,  3.10it/s]

Epoch: 0, Loss:  1.6550322771072388


13501it [1:12:26,  3.09it/s]

Epoch: 0, Loss:  0.8843523859977722


14001it [1:15:06,  3.10it/s]

Epoch: 0, Loss:  1.305621862411499


14501it [1:17:47,  3.10it/s]

Epoch: 0, Loss:  1.3588950634002686


15001it [1:20:29,  3.10it/s]

Epoch: 0, Loss:  1.2394390106201172


15501it [1:23:10,  3.10it/s]

Epoch: 0, Loss:  1.4224064350128174


16001it [1:25:51,  3.11it/s]

Epoch: 0, Loss:  1.7435628175735474


16501it [1:28:32,  3.11it/s]

Epoch: 0, Loss:  2.292792320251465


17001it [1:31:12,  3.11it/s]

Epoch: 0, Loss:  2.047661542892456


17501it [1:33:53,  3.10it/s]

Epoch: 0, Loss:  1.7957462072372437


18001it [1:36:34,  3.12it/s]

Epoch: 0, Loss:  1.3124027252197266


18501it [1:39:15,  3.10it/s]

Epoch: 0, Loss:  1.784183382987976


19001it [1:41:56,  3.11it/s]

Epoch: 0, Loss:  1.715043306350708


19501it [1:44:36,  3.11it/s]

Epoch: 0, Loss:  1.5449421405792236


20001it [1:47:17,  3.11it/s]

Epoch: 0, Loss:  1.3719090223312378


20501it [1:49:57,  3.11it/s]

Epoch: 0, Loss:  2.2665767669677734


21001it [1:52:38,  3.11it/s]

Epoch: 0, Loss:  1.6124848127365112


21501it [1:55:19,  3.12it/s]

Epoch: 0, Loss:  1.4642587900161743


22001it [1:57:59,  3.11it/s]

Epoch: 0, Loss:  1.6497584581375122


22501it [2:00:40,  3.11it/s]

Epoch: 0, Loss:  1.2816178798675537


23001it [2:03:21,  3.11it/s]

Epoch: 0, Loss:  1.9381119012832642


23501it [2:06:01,  3.10it/s]

Epoch: 0, Loss:  2.50409197807312


24001it [2:08:42,  3.11it/s]

Epoch: 0, Loss:  2.702740430831909


24501it [2:11:23,  3.12it/s]

Epoch: 0, Loss:  1.6957248449325562


25001it [2:14:03,  3.09it/s]

Epoch: 0, Loss:  1.814744472503662


25501it [2:16:44,  3.11it/s]

Epoch: 0, Loss:  1.6157152652740479


26001it [2:19:25,  3.09it/s]

Epoch: 0, Loss:  1.2298920154571533


26501it [2:22:06,  3.10it/s]

Epoch: 0, Loss:  3.3022286891937256


27001it [2:24:47,  3.10it/s]

Epoch: 0, Loss:  1.3379595279693604


27501it [2:27:28,  3.10it/s]

Epoch: 0, Loss:  1.8356289863586426


28001it [2:30:09,  3.10it/s]

Epoch: 0, Loss:  1.2782257795333862


28501it [2:32:50,  3.10it/s]

Epoch: 0, Loss:  1.8927420377731323


29001it [2:35:31,  3.11it/s]

Epoch: 0, Loss:  1.437551498413086


29501it [2:38:12,  3.11it/s]

Epoch: 0, Loss:  1.9553065299987793


30000it [2:40:52,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 1, Loss:  1.7787113189697266


501it [02:41,  3.10it/s]

Epoch: 1, Loss:  1.195456624031067


1001it [05:22,  3.11it/s]

Epoch: 1, Loss:  1.2693545818328857


1501it [08:02,  3.12it/s]

Epoch: 1, Loss:  1.315719723701477


2001it [10:43,  3.10it/s]

Epoch: 1, Loss:  0.8551998734474182


2501it [13:24,  3.09it/s]

Epoch: 1, Loss:  1.1148326396942139


3001it [16:05,  3.11it/s]

Epoch: 1, Loss:  1.0523558855056763


3501it [18:46,  3.11it/s]

Epoch: 1, Loss:  1.5478475093841553


4001it [21:26,  3.08it/s]

Epoch: 1, Loss:  0.9146776795387268


4501it [24:07,  3.09it/s]

Epoch: 1, Loss:  0.7540760636329651


5001it [26:48,  3.10it/s]

Epoch: 1, Loss:  1.3508626222610474


5501it [29:29,  3.08it/s]

Epoch: 1, Loss:  1.574434518814087


6001it [32:11,  3.10it/s]

Epoch: 1, Loss:  1.752839207649231


6501it [34:52,  3.10it/s]

Epoch: 1, Loss:  2.801276206970215


7001it [37:33,  3.10it/s]

Epoch: 1, Loss:  0.843514084815979


7501it [40:14,  3.10it/s]

Epoch: 1, Loss:  1.93288254737854


8001it [42:55,  3.10it/s]

Epoch: 1, Loss:  2.0069069862365723


8501it [45:36,  3.10it/s]

Epoch: 1, Loss:  1.799611210823059


9001it [48:16,  3.10it/s]

Epoch: 1, Loss:  1.1077163219451904


9501it [50:57,  3.09it/s]

Epoch: 1, Loss:  0.9967432022094727


10001it [53:39,  3.10it/s]

Epoch: 1, Loss:  1.59481942653656


10501it [56:20,  3.10it/s]

Epoch: 1, Loss:  1.6276819705963135


11001it [59:01,  3.10it/s]

Epoch: 1, Loss:  1.6552518606185913


11501it [1:01:42,  3.10it/s]

Epoch: 1, Loss:  1.1989130973815918


12001it [1:04:23,  3.10it/s]

Epoch: 1, Loss:  0.7232471108436584


12501it [1:07:04,  3.10it/s]

Epoch: 1, Loss:  1.3034449815750122


13001it [1:09:45,  3.10it/s]

Epoch: 1, Loss:  1.2925347089767456


13501it [1:12:26,  3.10it/s]

Epoch: 1, Loss:  1.022601842880249


14001it [1:15:08,  3.09it/s]

Epoch: 1, Loss:  1.0935484170913696


14501it [1:17:49,  3.10it/s]

Epoch: 1, Loss:  1.010350227355957


15001it [1:20:30,  3.10it/s]

Epoch: 1, Loss:  1.3524761199951172


15501it [1:23:11,  3.09it/s]

Epoch: 1, Loss:  1.0596249103546143


16001it [1:25:52,  3.09it/s]

Epoch: 1, Loss:  1.7925654649734497


16501it [1:28:34,  3.09it/s]

Epoch: 1, Loss:  1.522524118423462


17001it [1:31:15,  3.10it/s]

Epoch: 1, Loss:  1.123423457145691


17501it [1:33:56,  3.11it/s]

Epoch: 1, Loss:  0.9173855185508728


18001it [1:36:37,  3.10it/s]

Epoch: 1, Loss:  1.125997543334961


18501it [1:39:18,  3.11it/s]

Epoch: 1, Loss:  1.4081143140792847


19001it [1:42:00,  3.10it/s]

Epoch: 1, Loss:  1.1286871433258057


19501it [1:44:41,  3.09it/s]

Epoch: 1, Loss:  0.6078309416770935


20001it [1:47:22,  3.11it/s]

Epoch: 1, Loss:  1.5993503332138062


20501it [1:50:03,  3.09it/s]

Epoch: 1, Loss:  0.5893189311027527


21001it [1:52:44,  3.10it/s]

Epoch: 1, Loss:  0.8210567235946655


21501it [1:55:25,  3.09it/s]

Epoch: 1, Loss:  0.8613072633743286


22001it [1:58:07,  3.09it/s]

Epoch: 1, Loss:  1.4087737798690796


22501it [2:00:48,  3.10it/s]

Epoch: 1, Loss:  0.9228498935699463


23001it [2:03:29,  3.10it/s]

Epoch: 1, Loss:  2.1876347064971924


23501it [2:06:10,  3.09it/s]

Epoch: 1, Loss:  1.0862922668457031


24001it [2:08:51,  3.10it/s]

Epoch: 1, Loss:  1.0242793560028076


24501it [2:11:32,  3.09it/s]

Epoch: 1, Loss:  1.1197925806045532


25001it [2:14:13,  3.11it/s]

Epoch: 1, Loss:  1.0878312587738037


25501it [2:16:54,  3.11it/s]

Epoch: 1, Loss:  1.1614536046981812


26001it [2:19:35,  3.11it/s]

Epoch: 1, Loss:  1.357168197631836


26501it [2:22:15,  3.11it/s]

Epoch: 1, Loss:  1.1572234630584717


27001it [2:24:56,  3.10it/s]

Epoch: 1, Loss:  1.6152427196502686


27501it [2:27:38,  3.11it/s]

Epoch: 1, Loss:  1.2984040975570679


28001it [2:30:19,  3.10it/s]

Epoch: 1, Loss:  2.7563304901123047


28501it [2:33:00,  3.10it/s]

Epoch: 1, Loss:  2.8338167667388916


29001it [2:35:41,  3.10it/s]

Epoch: 1, Loss:  1.5056662559509277


29501it [2:38:22,  3.09it/s]

Epoch: 1, Loss:  0.8921769261360168


30000it [2:41:03,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 2, Loss:  0.6407203078269958


501it [02:41,  3.10it/s]

Epoch: 2, Loss:  0.6592469215393066


1001it [05:22,  3.10it/s]

Epoch: 2, Loss:  1.000961422920227


1501it [08:04,  3.09it/s]

Epoch: 2, Loss:  0.9397891759872437


2001it [10:45,  3.11it/s]

Epoch: 2, Loss:  1.4391978979110718


2501it [13:27,  3.08it/s]

Epoch: 2, Loss:  1.3253151178359985


3001it [16:08,  3.11it/s]

Epoch: 2, Loss:  1.2288318872451782


3501it [18:49,  3.10it/s]

Epoch: 2, Loss:  1.16517972946167


4001it [21:29,  3.11it/s]

Epoch: 2, Loss:  1.4921547174453735


4501it [24:10,  3.11it/s]

Epoch: 2, Loss:  1.0421066284179688


5001it [26:51,  3.12it/s]

Epoch: 2, Loss:  1.4227975606918335


5501it [29:31,  3.10it/s]

Epoch: 2, Loss:  0.9614306092262268


6001it [32:12,  3.11it/s]

Epoch: 2, Loss:  0.6583174467086792


6501it [34:53,  3.11it/s]

Epoch: 2, Loss:  0.5513280630111694


7001it [37:34,  3.10it/s]

Epoch: 2, Loss:  1.0103631019592285


7501it [40:15,  3.09it/s]

Epoch: 2, Loss:  1.0676441192626953


8001it [42:55,  3.11it/s]

Epoch: 2, Loss:  1.2466368675231934


8501it [45:36,  3.11it/s]

Epoch: 2, Loss:  0.9462432265281677


9001it [48:17,  3.11it/s]

Epoch: 2, Loss:  0.8899595737457275


9501it [50:58,  3.10it/s]

Epoch: 2, Loss:  1.0239107608795166


10001it [53:38,  3.12it/s]

Epoch: 2, Loss:  1.0714155435562134


10501it [56:19,  3.11it/s]

Epoch: 2, Loss:  1.0833818912506104


11001it [58:59,  3.10it/s]

Epoch: 2, Loss:  1.413208246231079


11501it [1:01:40,  3.11it/s]

Epoch: 2, Loss:  1.415056824684143


12001it [1:04:21,  3.10it/s]

Epoch: 2, Loss:  0.9132978916168213


12501it [1:07:02,  3.11it/s]

Epoch: 2, Loss:  1.653555154800415


13001it [1:09:42,  3.12it/s]

Epoch: 2, Loss:  1.3352380990982056


13501it [1:12:23,  3.11it/s]

Epoch: 2, Loss:  0.8358277082443237


14001it [1:15:04,  3.11it/s]

Epoch: 2, Loss:  1.124769926071167


14501it [1:17:44,  3.11it/s]

Epoch: 2, Loss:  0.7014967203140259


15001it [1:20:25,  3.11it/s]

Epoch: 2, Loss:  0.7197559475898743


15501it [1:23:06,  3.12it/s]

Epoch: 2, Loss:  1.1815502643585205


16001it [1:25:47,  3.09it/s]

Epoch: 2, Loss:  0.859079897403717


16501it [1:28:28,  3.11it/s]

Epoch: 2, Loss:  1.7140188217163086


17001it [1:31:09,  3.11it/s]

Epoch: 2, Loss:  1.390525221824646


17501it [1:33:49,  3.11it/s]

Epoch: 2, Loss:  1.2556349039077759


18001it [1:36:30,  3.10it/s]

Epoch: 2, Loss:  1.7018401622772217


18501it [1:39:10,  3.11it/s]

Epoch: 2, Loss:  0.6795973181724548


19001it [1:41:51,  3.11it/s]

Epoch: 2, Loss:  1.2908025979995728


19501it [1:44:32,  3.10it/s]

Epoch: 2, Loss:  1.4856679439544678


20001it [1:47:13,  3.12it/s]

Epoch: 2, Loss:  1.1793917417526245


20501it [1:49:53,  3.12it/s]

Epoch: 2, Loss:  1.0328357219696045


21001it [1:52:34,  3.11it/s]

Epoch: 2, Loss:  1.1298633813858032


21501it [1:55:14,  3.08it/s]

Epoch: 2, Loss:  0.8406699895858765


22001it [1:57:55,  3.11it/s]

Epoch: 2, Loss:  0.8198291659355164


22501it [2:00:36,  3.12it/s]

Epoch: 2, Loss:  0.6370086073875427


23001it [2:03:17,  3.11it/s]

Epoch: 2, Loss:  1.0596539974212646


23501it [2:05:58,  3.11it/s]

Epoch: 2, Loss:  1.1875438690185547


24001it [2:08:38,  3.12it/s]

Epoch: 2, Loss:  0.9825984835624695


24501it [2:11:19,  3.10it/s]

Epoch: 2, Loss:  0.9707031846046448


25001it [2:14:00,  3.10it/s]

Epoch: 2, Loss:  1.1002717018127441


25501it [2:16:41,  3.11it/s]

Epoch: 2, Loss:  2.031580924987793


26001it [2:19:22,  3.10it/s]

Epoch: 2, Loss:  1.6529748439788818


26501it [2:22:03,  3.11it/s]

Epoch: 2, Loss:  1.6995959281921387


27001it [2:24:44,  3.11it/s]

Epoch: 2, Loss:  1.6860651969909668


27501it [2:27:24,  3.11it/s]

Epoch: 2, Loss:  0.8680760264396667


28001it [2:30:05,  3.11it/s]

Epoch: 2, Loss:  1.4628382921218872


28501it [2:32:46,  3.11it/s]

Epoch: 2, Loss:  1.0565340518951416


29001it [2:35:26,  3.10it/s]

Epoch: 2, Loss:  1.0212167501449585


29501it [2:38:07,  3.09it/s]

Epoch: 2, Loss:  1.386762022972107


30000it [2:40:48,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 3, Loss:  1.103103518486023


501it [02:41,  3.11it/s]

Epoch: 3, Loss:  0.7113490700721741


1001it [05:22,  3.11it/s]

Epoch: 3, Loss:  0.29521670937538147


1501it [08:02,  3.10it/s]

Epoch: 3, Loss:  0.9040957689285278


2001it [10:43,  3.10it/s]

Epoch: 3, Loss:  1.2755919694900513


2501it [13:24,  3.10it/s]

Epoch: 3, Loss:  0.5269791483879089


3001it [16:05,  3.11it/s]

Epoch: 3, Loss:  0.8538177013397217


3501it [18:46,  3.11it/s]

Epoch: 3, Loss:  0.6699063777923584


4001it [21:27,  3.10it/s]

Epoch: 3, Loss:  0.6688711643218994


4501it [24:07,  3.11it/s]

Epoch: 3, Loss:  0.4627503454685211


5001it [26:48,  3.11it/s]

Epoch: 3, Loss:  0.5613758563995361


5501it [29:29,  3.11it/s]

Epoch: 3, Loss:  0.731299877166748


6001it [32:11,  3.10it/s]

Epoch: 3, Loss:  1.0570367574691772


6501it [34:52,  3.10it/s]

Epoch: 3, Loss:  0.645944356918335


7001it [37:33,  3.10it/s]

Epoch: 3, Loss:  1.4726041555404663


7501it [40:14,  3.09it/s]

Epoch: 3, Loss:  1.264609456062317


8001it [42:56,  3.10it/s]

Epoch: 3, Loss:  0.9232497811317444


8501it [45:37,  3.10it/s]

Epoch: 3, Loss:  0.5454822778701782


9001it [48:18,  3.11it/s]

Epoch: 3, Loss:  0.7747918367385864


9501it [50:59,  3.10it/s]

Epoch: 3, Loss:  1.4762904644012451


10001it [53:40,  3.10it/s]

Epoch: 3, Loss:  1.211963176727295


10501it [56:21,  3.10it/s]

Epoch: 3, Loss:  0.5930004119873047


11001it [59:02,  3.09it/s]

Epoch: 3, Loss:  0.6198540329933167


11501it [1:01:43,  3.10it/s]

Epoch: 3, Loss:  1.0296986103057861


12001it [1:04:25,  3.07it/s]

Epoch: 3, Loss:  0.6665574908256531


12501it [1:07:06,  3.08it/s]

Epoch: 3, Loss:  0.8784165978431702


13001it [1:09:48,  3.11it/s]

Epoch: 3, Loss:  0.7078545093536377


13501it [1:12:29,  3.10it/s]

Epoch: 3, Loss:  0.7262975573539734


14001it [1:15:10,  3.09it/s]

Epoch: 3, Loss:  1.2857481241226196


14501it [1:17:52,  3.09it/s]

Epoch: 3, Loss:  1.4210926294326782


15001it [1:20:34,  3.10it/s]

Epoch: 3, Loss:  0.4901924729347229


15501it [1:23:15,  3.09it/s]

Epoch: 3, Loss:  0.5861795544624329


16001it [1:25:56,  3.09it/s]

Epoch: 3, Loss:  0.9288579821586609


16501it [1:28:37,  3.10it/s]

Epoch: 3, Loss:  0.8219451904296875


17001it [1:31:19,  3.11it/s]

Epoch: 3, Loss:  0.8810670971870422


17501it [1:34:00,  3.11it/s]

Epoch: 3, Loss:  1.4027339220046997


18001it [1:36:42,  3.08it/s]

Epoch: 3, Loss:  0.5091787576675415


18501it [1:39:23,  3.09it/s]

Epoch: 3, Loss:  1.6428630352020264


19001it [1:42:05,  3.10it/s]

Epoch: 3, Loss:  0.6519935727119446


19501it [1:44:46,  3.10it/s]

Epoch: 3, Loss:  0.6122481226921082


20001it [1:47:27,  3.10it/s]

Epoch: 3, Loss:  0.7612443566322327


20501it [1:50:08,  3.09it/s]

Epoch: 3, Loss:  0.9120689630508423


21001it [1:52:49,  3.09it/s]

Epoch: 3, Loss:  1.1391116380691528


21501it [1:55:31,  3.10it/s]

Epoch: 3, Loss:  0.6460642218589783


22001it [1:58:12,  3.10it/s]

Epoch: 3, Loss:  0.6226919293403625


22501it [2:00:53,  3.10it/s]

Epoch: 3, Loss:  0.7865425944328308


23001it [2:03:34,  3.10it/s]

Epoch: 3, Loss:  0.5368874669075012


23501it [2:06:15,  3.09it/s]

Epoch: 3, Loss:  1.318390965461731


24001it [2:08:56,  3.10it/s]

Epoch: 3, Loss:  0.5945297479629517


24501it [2:11:38,  3.09it/s]

Epoch: 3, Loss:  0.9698657393455505


25001it [2:14:19,  3.09it/s]

Epoch: 3, Loss:  0.6783220767974854


25501it [2:17:00,  3.09it/s]

Epoch: 3, Loss:  0.7647081613540649


26001it [2:19:41,  3.11it/s]

Epoch: 3, Loss:  1.2425881624221802


26501it [2:22:23,  3.09it/s]

Epoch: 3, Loss:  1.4469993114471436


27001it [2:25:04,  3.08it/s]

Epoch: 3, Loss:  1.2770271301269531


27501it [2:27:46,  3.09it/s]

Epoch: 3, Loss:  1.3306076526641846


28001it [2:30:27,  3.08it/s]

Epoch: 3, Loss:  0.878879189491272


28501it [2:33:09,  3.08it/s]

Epoch: 3, Loss:  1.2545067071914673


29001it [2:35:50,  3.09it/s]

Epoch: 3, Loss:  0.8957182765007019


29501it [2:38:32,  3.10it/s]

Epoch: 3, Loss:  1.2829580307006836


30000it [2:41:13,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 4, Loss:  0.5335351824760437


501it [02:41,  3.10it/s]

Epoch: 4, Loss:  0.4108329713344574


1001it [05:23,  3.10it/s]

Epoch: 4, Loss:  1.0305262804031372


1501it [08:05,  3.07it/s]

Epoch: 4, Loss:  1.0562090873718262


2001it [10:46,  3.09it/s]

Epoch: 4, Loss:  0.5580390095710754


2501it [13:28,  3.09it/s]

Epoch: 4, Loss:  0.9040917754173279


3001it [16:09,  3.11it/s]

Epoch: 4, Loss:  0.5258870124816895


3501it [18:50,  3.09it/s]

Epoch: 4, Loss:  0.7340328097343445


4001it [21:32,  3.09it/s]

Epoch: 4, Loss:  0.5788702964782715


4501it [24:13,  3.10it/s]

Epoch: 4, Loss:  0.8945354223251343


5001it [26:54,  3.10it/s]

Epoch: 4, Loss:  0.3077186644077301


5501it [29:35,  3.11it/s]

Epoch: 4, Loss:  0.3224318325519562


6001it [32:17,  3.10it/s]

Epoch: 4, Loss:  0.9900875687599182


6501it [34:58,  3.09it/s]

Epoch: 4, Loss:  0.6482405066490173


7001it [37:39,  3.08it/s]

Epoch: 4, Loss:  1.081270456314087


7501it [40:21,  3.09it/s]

Epoch: 4, Loss:  0.7673035264015198


8001it [43:02,  3.09it/s]

Epoch: 4, Loss:  0.7804979085922241


8501it [45:44,  3.10it/s]

Epoch: 4, Loss:  0.8186880350112915


9001it [48:25,  3.09it/s]

Epoch: 4, Loss:  0.685904324054718


9501it [51:06,  3.09it/s]

Epoch: 4, Loss:  0.588789701461792


10001it [53:48,  3.09it/s]

Epoch: 4, Loss:  0.3307465612888336


10501it [56:29,  3.09it/s]

Epoch: 4, Loss:  0.5385408401489258


11001it [59:11,  3.09it/s]

Epoch: 4, Loss:  0.518177330493927


11501it [1:01:52,  3.10it/s]

Epoch: 4, Loss:  0.3971995711326599


12001it [1:04:34,  3.10it/s]

Epoch: 4, Loss:  0.8648342490196228


12501it [1:07:15,  3.11it/s]

Epoch: 4, Loss:  0.5900545120239258


13001it [1:09:56,  3.10it/s]

Epoch: 4, Loss:  0.4708506166934967


13501it [1:12:37,  3.11it/s]

Epoch: 4, Loss:  0.49416476488113403


14001it [1:15:18,  3.10it/s]

Epoch: 4, Loss:  0.5671615600585938


14501it [1:18:00,  3.10it/s]

Epoch: 4, Loss:  0.46230629086494446


15001it [1:20:41,  3.10it/s]

Epoch: 4, Loss:  0.7174120545387268


15501it [1:23:22,  3.09it/s]

Epoch: 4, Loss:  0.9506328105926514


16001it [1:26:03,  3.10it/s]

Epoch: 4, Loss:  0.8139284253120422


16501it [1:28:45,  3.09it/s]

Epoch: 4, Loss:  0.41984713077545166


17001it [1:31:26,  3.09it/s]

Epoch: 4, Loss:  0.8047968149185181


17501it [1:34:08,  3.10it/s]

Epoch: 4, Loss:  0.62107914686203


18001it [1:36:49,  3.09it/s]

Epoch: 4, Loss:  0.8993725776672363


18501it [1:39:31,  3.09it/s]

Epoch: 4, Loss:  0.8556673526763916


19001it [1:42:12,  3.10it/s]

Epoch: 4, Loss:  1.2551060914993286


19501it [1:44:54,  3.10it/s]

Epoch: 4, Loss:  0.574358344078064


20001it [1:47:35,  3.10it/s]

Epoch: 4, Loss:  0.8416591286659241


20501it [1:50:17,  3.10it/s]

Epoch: 4, Loss:  0.6608149409294128


21001it [1:52:58,  3.09it/s]

Epoch: 4, Loss:  0.6265941858291626


21501it [1:55:39,  3.10it/s]

Epoch: 4, Loss:  1.6520112752914429


22001it [1:58:20,  3.10it/s]

Epoch: 4, Loss:  0.31864508986473083


22501it [2:01:01,  3.10it/s]

Epoch: 4, Loss:  1.1738172769546509


23001it [2:03:42,  3.10it/s]

Epoch: 4, Loss:  0.4884195923805237


23501it [2:06:24,  3.10it/s]

Epoch: 4, Loss:  1.319908857345581


24001it [2:09:05,  3.10it/s]

Epoch: 4, Loss:  0.9953625798225403


24501it [2:11:46,  3.10it/s]

Epoch: 4, Loss:  0.45570576190948486


25001it [2:14:27,  3.09it/s]

Epoch: 4, Loss:  0.4811759293079376


25501it [2:17:08,  3.11it/s]

Epoch: 4, Loss:  0.7706590890884399


26001it [2:19:49,  3.11it/s]

Epoch: 4, Loss:  1.169522762298584


26501it [2:22:29,  3.10it/s]

Epoch: 4, Loss:  0.9689112305641174


27001it [2:25:11,  3.11it/s]

Epoch: 4, Loss:  0.6044816970825195


27501it [2:27:52,  3.10it/s]

Epoch: 4, Loss:  0.8283703327178955


28001it [2:30:33,  3.11it/s]

Epoch: 4, Loss:  0.8803883790969849


28501it [2:33:14,  3.09it/s]

Epoch: 4, Loss:  1.115579605102539


29001it [2:35:55,  3.09it/s]

Epoch: 4, Loss:  0.9042210578918457


29501it [2:38:36,  3.07it/s]

Epoch: 4, Loss:  0.7001240253448486


30000it [2:41:17,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 5, Loss:  0.5443477630615234


501it [02:41,  3.10it/s]

Epoch: 5, Loss:  0.6885237693786621


1001it [05:23,  3.10it/s]

Epoch: 5, Loss:  0.2649447023868561


1501it [08:04,  3.10it/s]

Epoch: 5, Loss:  0.6012579798698425


2001it [10:45,  3.10it/s]

Epoch: 5, Loss:  0.3706217408180237


2501it [13:27,  3.10it/s]

Epoch: 5, Loss:  0.7544947266578674


3001it [16:09,  3.09it/s]

Epoch: 5, Loss:  0.6087623834609985


3501it [18:50,  3.10it/s]

Epoch: 5, Loss:  0.35358768701553345


4001it [21:31,  3.09it/s]

Epoch: 5, Loss:  0.7011997103691101


4501it [24:12,  3.08it/s]

Epoch: 5, Loss:  0.2608339190483093


5001it [26:53,  3.10it/s]

Epoch: 5, Loss:  1.1278469562530518


5501it [29:34,  3.11it/s]

Epoch: 5, Loss:  0.8317840695381165


6001it [32:15,  3.10it/s]

Epoch: 5, Loss:  0.8064770698547363


6501it [34:56,  3.10it/s]

Epoch: 5, Loss:  0.6400701999664307


7001it [37:37,  3.11it/s]

Epoch: 5, Loss:  0.32238346338272095


7501it [40:18,  3.11it/s]

Epoch: 5, Loss:  0.8021599650382996


8001it [42:59,  3.10it/s]

Epoch: 5, Loss:  0.9241093397140503


8501it [45:41,  3.09it/s]

Epoch: 5, Loss:  0.721663236618042


9001it [48:22,  3.11it/s]

Epoch: 5, Loss:  0.4436914622783661


9501it [51:03,  3.10it/s]

Epoch: 5, Loss:  0.5718568563461304


10001it [53:44,  3.12it/s]

Epoch: 5, Loss:  0.7033871412277222


10501it [56:25,  3.10it/s]

Epoch: 5, Loss:  0.4189022183418274


11001it [59:06,  3.10it/s]

Epoch: 5, Loss:  0.3770742416381836


11501it [1:01:47,  3.09it/s]

Epoch: 5, Loss:  0.29815998673439026


12001it [1:04:29,  3.10it/s]

Epoch: 5, Loss:  0.3876277804374695


12501it [1:07:10,  3.10it/s]

Epoch: 5, Loss:  0.7876459956169128


13001it [1:09:51,  3.10it/s]

Epoch: 5, Loss:  0.3541468679904938


13501it [1:12:32,  3.10it/s]

Epoch: 5, Loss:  0.7017711400985718


14001it [1:15:13,  3.11it/s]

Epoch: 5, Loss:  0.4374992847442627


14501it [1:17:54,  3.10it/s]

Epoch: 5, Loss:  1.199769377708435


15001it [1:20:35,  3.11it/s]

Epoch: 5, Loss:  0.4833410680294037


15501it [1:23:15,  3.10it/s]

Epoch: 5, Loss:  0.9428569078445435


16001it [1:25:56,  3.11it/s]

Epoch: 5, Loss:  0.8746252655982971


16501it [1:28:37,  3.09it/s]

Epoch: 5, Loss:  0.6346144080162048


17001it [1:31:19,  3.10it/s]

Epoch: 5, Loss:  0.7153002023696899


17501it [1:34:00,  3.10it/s]

Epoch: 5, Loss:  0.3135095536708832


18001it [1:36:41,  3.09it/s]

Epoch: 5, Loss:  0.8894493579864502


18501it [1:39:22,  3.10it/s]

Epoch: 5, Loss:  0.5479254126548767


19001it [1:42:02,  3.11it/s]

Epoch: 5, Loss:  0.5862993597984314


19501it [1:44:43,  3.09it/s]

Epoch: 5, Loss:  0.9825848937034607


20001it [1:47:24,  3.10it/s]

Epoch: 5, Loss:  0.5363293290138245


20501it [1:50:06,  3.09it/s]

Epoch: 5, Loss:  0.7947150468826294


21001it [1:52:47,  3.09it/s]

Epoch: 5, Loss:  0.46033117175102234


21501it [1:55:28,  3.10it/s]

Epoch: 5, Loss:  0.7163848280906677


22001it [1:58:09,  3.11it/s]

Epoch: 5, Loss:  0.4314045310020447


22501it [2:00:50,  3.10it/s]

Epoch: 5, Loss:  1.4520502090454102


23001it [2:03:31,  3.10it/s]

Epoch: 5, Loss:  1.0701978206634521


23501it [2:06:12,  3.09it/s]

Epoch: 5, Loss:  0.5830410718917847


24001it [2:08:53,  3.09it/s]

Epoch: 5, Loss:  0.6180303692817688


24501it [2:11:34,  3.09it/s]

Epoch: 5, Loss:  0.4822272062301636


25001it [2:14:15,  3.09it/s]

Epoch: 5, Loss:  0.615375280380249


25501it [2:16:56,  3.11it/s]

Epoch: 5, Loss:  0.8506978750228882


26001it [2:19:37,  3.08it/s]

Epoch: 5, Loss:  0.477151483297348


26501it [2:22:18,  3.10it/s]

Epoch: 5, Loss:  0.601693868637085


27001it [2:24:59,  3.11it/s]

Epoch: 5, Loss:  0.9039182662963867


27501it [2:27:40,  3.10it/s]

Epoch: 5, Loss:  0.5584840178489685


28001it [2:30:21,  3.10it/s]

Epoch: 5, Loss:  0.4761050343513489


28501it [2:33:02,  3.10it/s]

Epoch: 5, Loss:  0.7160639762878418


29001it [2:35:43,  3.10it/s]

Epoch: 5, Loss:  0.514999270439148


29501it [2:38:25,  3.09it/s]

Epoch: 5, Loss:  0.8733649849891663


30000it [2:41:06,  3.10it/s]


1


0it [00:00, ?it/s]

Epoch: 6, Loss:  0.3346883952617645


501it [02:41,  3.11it/s]

Epoch: 6, Loss:  0.17759643495082855


1001it [05:23,  3.09it/s]

Epoch: 6, Loss:  0.30421948432922363


1501it [08:04,  3.11it/s]

Epoch: 6, Loss:  0.29742592573165894


2001it [10:45,  3.11it/s]

Epoch: 6, Loss:  0.5119505524635315


2501it [13:26,  3.08it/s]

Epoch: 6, Loss:  0.31513237953186035


3001it [16:07,  3.10it/s]

Epoch: 6, Loss:  0.23027434945106506


3501it [18:47,  3.10it/s]

Epoch: 6, Loss:  0.6162341833114624


4001it [21:28,  3.11it/s]

Epoch: 6, Loss:  0.4396574795246124


4501it [24:09,  3.11it/s]

Epoch: 6, Loss:  0.38974565267562866


5001it [26:50,  3.12it/s]

Epoch: 6, Loss:  0.17303483188152313


5501it [29:30,  3.10it/s]

Epoch: 6, Loss:  0.4197474718093872


6001it [32:11,  3.10it/s]

Epoch: 6, Loss:  0.666767418384552


6501it [34:52,  3.10it/s]

Epoch: 6, Loss:  0.27792873978614807


7001it [37:33,  3.10it/s]

Epoch: 6, Loss:  0.4175770580768585


7501it [40:14,  3.12it/s]

Epoch: 6, Loss:  0.4671332836151123


8001it [42:54,  3.11it/s]

Epoch: 6, Loss:  0.8038844466209412


8501it [45:35,  3.10it/s]

Epoch: 6, Loss:  0.36855798959732056


9001it [48:17,  3.10it/s]

Epoch: 6, Loss:  0.6652027368545532


9501it [50:58,  3.11it/s]

Epoch: 6, Loss:  0.4539434015750885


10001it [53:39,  3.10it/s]

Epoch: 6, Loss:  0.7328888177871704


10501it [56:20,  3.10it/s]

Epoch: 6, Loss:  0.6722214818000793


11001it [59:01,  3.11it/s]

Epoch: 6, Loss:  0.2279072403907776


11501it [1:01:42,  3.09it/s]

Epoch: 6, Loss:  0.2896758019924164


12001it [1:04:23,  3.09it/s]

Epoch: 6, Loss:  0.6846069693565369


12501it [1:07:03,  3.12it/s]

Epoch: 6, Loss:  0.6692290902137756


13001it [1:09:44,  3.11it/s]

Epoch: 6, Loss:  0.5062145590782166


13501it [1:12:25,  3.11it/s]

Epoch: 6, Loss:  0.2910391688346863


14001it [1:15:06,  3.10it/s]

Epoch: 6, Loss:  0.38416942954063416


14501it [1:17:47,  3.10it/s]

Epoch: 6, Loss:  0.28862464427948


15001it [1:20:28,  3.09it/s]

Epoch: 6, Loss:  0.531100869178772


15501it [1:23:09,  3.11it/s]

Epoch: 6, Loss:  0.37676092982292175


16001it [1:25:50,  3.11it/s]

Epoch: 6, Loss:  0.639582097530365


16501it [1:28:31,  3.10it/s]

Epoch: 6, Loss:  0.3835926055908203


17001it [1:31:12,  3.10it/s]

Epoch: 6, Loss:  0.9664195775985718


17501it [1:33:53,  3.10it/s]

Epoch: 6, Loss:  0.6001589298248291


18001it [1:36:34,  3.11it/s]

Epoch: 6, Loss:  0.6865969300270081


18501it [1:39:15,  3.11it/s]

Epoch: 6, Loss:  0.2590247094631195


19001it [1:41:56,  3.12it/s]

Epoch: 6, Loss:  0.8324717879295349


19501it [1:44:37,  3.11it/s]

Epoch: 6, Loss:  0.632510781288147


20001it [1:47:18,  3.10it/s]

Epoch: 6, Loss:  0.7275426387786865


20501it [1:49:58,  3.12it/s]

Epoch: 6, Loss:  0.743806004524231


21001it [1:52:39,  3.10it/s]

Epoch: 6, Loss:  0.3629722595214844


21501it [1:55:20,  3.01it/s]

Epoch: 6, Loss:  0.7202169299125671


22001it [1:58:01,  3.11it/s]

Epoch: 6, Loss:  0.48499369621276855


22501it [2:00:42,  3.10it/s]

Epoch: 6, Loss:  1.0275983810424805


23001it [2:03:23,  3.09it/s]

Epoch: 6, Loss:  0.4783518314361572


23501it [2:06:04,  3.09it/s]

Epoch: 6, Loss:  0.8427639603614807


24001it [2:08:45,  3.10it/s]

Epoch: 6, Loss:  0.5024862289428711


24501it [2:11:26,  3.11it/s]

Epoch: 6, Loss:  0.8334118723869324


25001it [2:14:07,  3.11it/s]

Epoch: 6, Loss:  0.744436502456665


25501it [2:16:49,  3.09it/s]

Epoch: 6, Loss:  0.43828505277633667


26001it [2:19:30,  3.10it/s]

Epoch: 6, Loss:  0.7154418230056763


26501it [2:22:11,  3.10it/s]

Epoch: 6, Loss:  1.11789870262146


27001it [2:24:52,  3.10it/s]

Epoch: 6, Loss:  0.7839925289154053


27501it [2:27:33,  3.10it/s]

Epoch: 6, Loss:  1.2710680961608887


28001it [2:30:15,  3.09it/s]

Epoch: 6, Loss:  0.5658734440803528


28501it [2:32:56,  3.11it/s]

Epoch: 6, Loss:  0.5023759603500366


29001it [2:35:37,  3.10it/s]

Epoch: 6, Loss:  0.5953693985939026


29501it [2:38:17,  3.11it/s]

Epoch: 6, Loss:  0.5470110177993774


30000it [2:40:58,  3.11it/s]


1


0it [00:00, ?it/s]

Epoch: 7, Loss:  0.3267504572868347


501it [02:41,  3.09it/s]

Epoch: 7, Loss:  0.22095446288585663


1001it [05:22,  3.10it/s]

Epoch: 7, Loss:  0.46394336223602295


1501it [08:03,  3.11it/s]

Epoch: 7, Loss:  0.3946826756000519


2001it [10:44,  3.10it/s]

Epoch: 7, Loss:  0.21825860440731049


2501it [13:25,  3.11it/s]

Epoch: 7, Loss:  0.4900188148021698


3001it [16:06,  3.11it/s]

Epoch: 7, Loss:  0.2622716426849365


3501it [18:48,  3.09it/s]

Epoch: 7, Loss:  0.350304514169693


4001it [21:28,  3.11it/s]

Epoch: 7, Loss:  0.2397524118423462


4501it [24:09,  3.10it/s]

Epoch: 7, Loss:  0.6626566648483276


5001it [26:50,  3.09it/s]

Epoch: 7, Loss:  0.22366170585155487


5501it [29:31,  3.08it/s]

Epoch: 7, Loss:  0.744671642780304


6001it [32:12,  3.10it/s]

Epoch: 7, Loss:  0.19651710987091064


6501it [34:53,  3.11it/s]

Epoch: 7, Loss:  0.3447667956352234


7001it [37:33,  3.12it/s]

Epoch: 7, Loss:  0.34927403926849365


7501it [40:14,  3.12it/s]

Epoch: 7, Loss:  0.6439229249954224


8001it [42:55,  3.11it/s]

Epoch: 7, Loss:  1.0735141038894653


8501it [45:35,  3.10it/s]

Epoch: 7, Loss:  0.23109345138072968


9001it [48:16,  3.10it/s]

Epoch: 7, Loss:  0.7114962339401245


9501it [50:58,  3.11it/s]

Epoch: 7, Loss:  0.40534543991088867


10001it [53:39,  3.10it/s]

Epoch: 7, Loss:  0.43840667605400085


10501it [56:21,  3.09it/s]

Epoch: 7, Loss:  0.4383693039417267


11001it [59:02,  3.10it/s]

Epoch: 7, Loss:  0.5356674790382385


11501it [1:01:43,  3.10it/s]

Epoch: 7, Loss:  0.47472119331359863


12001it [1:04:25,  3.08it/s]

Epoch: 7, Loss:  0.5687617063522339


12501it [1:07:06,  3.10it/s]

Epoch: 7, Loss:  0.49125537276268005


13001it [1:09:48,  3.09it/s]

Epoch: 7, Loss:  0.557453989982605


13501it [1:12:30,  3.08it/s]

Epoch: 7, Loss:  0.6239665746688843


14001it [1:15:12,  3.09it/s]

Epoch: 7, Loss:  0.18543964624404907


14501it [1:17:53,  3.09it/s]

Epoch: 7, Loss:  0.33443865180015564


15001it [1:20:35,  3.09it/s]

Epoch: 7, Loss:  0.11952280253171921


15501it [1:23:17,  3.10it/s]

Epoch: 7, Loss:  0.47193998098373413


16001it [1:25:58,  3.10it/s]

Epoch: 7, Loss:  0.4428119659423828


16501it [1:28:39,  3.09it/s]

Epoch: 7, Loss:  0.8623582124710083


17001it [1:31:21,  3.10it/s]

Epoch: 7, Loss:  0.43502742052078247


17501it [1:34:02,  3.10it/s]

Epoch: 7, Loss:  0.4342148005962372


18001it [1:36:43,  3.10it/s]

Epoch: 7, Loss:  0.37920817732810974


18501it [1:39:24,  3.10it/s]

Epoch: 7, Loss:  0.5914626717567444


19001it [1:42:06,  3.08it/s]

Epoch: 7, Loss:  0.4203943908214569


19501it [1:44:47,  3.09it/s]

Epoch: 7, Loss:  0.3275440037250519


20001it [1:47:29,  3.09it/s]

Epoch: 7, Loss:  0.39945903420448303


20501it [1:50:10,  3.10it/s]

Epoch: 7, Loss:  0.3450356423854828


21001it [1:52:52,  3.10it/s]

Epoch: 7, Loss:  0.5525482296943665


21501it [1:55:33,  3.09it/s]

Epoch: 7, Loss:  0.6092211604118347


22001it [1:58:14,  3.09it/s]

Epoch: 7, Loss:  0.7558051347732544


22501it [2:00:56,  3.10it/s]

Epoch: 7, Loss:  0.653830349445343


23001it [2:03:37,  3.08it/s]

Epoch: 7, Loss:  0.532883882522583


23501it [2:06:19,  3.10it/s]

Epoch: 7, Loss:  0.532683253288269


24001it [2:09:00,  3.09it/s]

Epoch: 7, Loss:  0.6879466772079468


24501it [2:11:41,  3.10it/s]

Epoch: 7, Loss:  0.4309919476509094


25001it [2:14:23,  3.10it/s]

Epoch: 7, Loss:  0.5670745968818665


25501it [2:17:04,  3.11it/s]

Epoch: 7, Loss:  0.5506840944290161


26001it [2:19:45,  3.09it/s]

Epoch: 7, Loss:  0.4674319326877594


26501it [2:22:26,  3.11it/s]

Epoch: 7, Loss:  0.44468799233436584


27001it [2:25:07,  3.10it/s]

Epoch: 7, Loss:  0.8738105893135071


27501it [2:27:48,  3.11it/s]

Epoch: 7, Loss:  0.9531276822090149


28001it [2:30:29,  3.11it/s]

Epoch: 7, Loss:  0.6350958943367004


28501it [2:33:10,  3.11it/s]

Epoch: 7, Loss:  0.9054493308067322


29001it [2:35:51,  3.10it/s]

Epoch: 7, Loss:  0.6760379076004028


29501it [2:38:32,  3.10it/s]

Epoch: 7, Loss:  0.7100611925125122


30000it [2:41:13,  3.10it/s]


In [None]:
tokenizer.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_240000/')
model.save_pretrained('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/pretrained_240000/')

test

In [None]:
validation_dataset.columns = ['ctext','text']
validation_dataset.ctext = 'summarize: ' + validation_dataset.ctext

val_set = CustomDataset(validation_dataset, tokenizer, config.MAX_LEN, config.SUMMARY_LEN)

val_loader = DataLoader(val_set, **val_params)

for epoch in range(config.VAL_EPOCHS):
    predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
    final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})

final_df.to_csv('/content/drive/MyDrive/3차프로젝트_현정/eT5_epoch8/final_df_train240000(8).csv')



Completed 0
Completed 100
Completed 200
