In [1]:
import argparse
import os

#from bs4 import BeautifulSoup
#from googlesearch import search
import numpy as np
import requests
from transformers import GPT2Config, GPT2LMHeadModel
import torch
from tqdm import tnrange, tqdm_notebook

from dataset import GPT21024Dataset, GPT21024Dataset_new
from utils import add_special_tokens, beam_search, generate_beam_sample, generate_sample, sample_seq, generate_sample_all

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#please change default arguments if needed

parser = argparse.ArgumentParser()

parser.add_argument("--seed",default=42, type=int,  help="seed to replicate results")
parser.add_argument("--num_workers",default=4, type=int,  help="num of cpus available")
parser.add_argument("--device",default=torch.device('cuda'), help="torch.device object")
parser.add_argument("--output_dir",default='./output', type=str,  help="path to save evaluation results")
parser.add_argument("--model_dir",default='./weights', type=str,  help="path to save trained model")
parser.add_argument("--root_dir",default='./gpt2_1024_data', type=str, help="location of json dataset.")
parser.add_argument("--ids_file",default='./ids_1024_less.json', type=str, help="location of train, valid and test file indexes")
args = parser.parse_args([])
print(args)

Namespace(seed=42, num_workers=4, device=device(type='cuda'), output_dir='./output', model_dir='./weights', root_dir='./gpt2_1024_data', ids_file='./ids_1024_less.json')


In [3]:
# using the same validation and training data as during training
tokenizer = add_special_tokens()
# train_data = GPT21024Dataset(args.root_dir,args.ids_file,mode='train',length=3000)
# valid_data = GPT21024Dataset(args.root_dir,args.ids_file,mode='valid',length=500)
test_data = GPT21024Dataset_new('test_dataset.csv')


In [4]:
# model_file and config_file are files used to load finetuned model, change these name as per your file names

# model_file = os.path.join(args.model_dir, 'model_data{}_trained_after_{}_epochs_only_sum_loss_ignr_pad.bin'.format(len(train_data),args.num_train_epochs))
# config_file = os.path.join(args.model_dir, 'config_data{}_trained_after_{}_epochs_only_sum_loss_ignr_pad.json'.format(len(train_data),args.num_train_epochs))

# path to model and config files
model_file = "weights\model_O0_data4096_trained_after_3_epochs_only_sum_loss_ignr_pad_1210.bin"
config_file = "weights\config_O0_data4096_trained_after_3_epochs_only_sum_loss_ignr_pad_1210.json"

config = GPT2Config.from_json_file(config_file)
model = GPT2LMHeadModel(config)
state_dict = torch.load(model_file)
model.load_state_dict(state_dict)
model.eval()
model.to(args.device)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50259, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50259, bias=False)
)

In [5]:
generate_sample_all(test_data, tokenizer, model, num=30, length=100, save_path='generate_30_100token.txt', temperature=0.8, top_k=30, top_p=0.75, device=args.device)

sample len -->: 653
sample len -->: 1024


  0%|          | 0/100 [00:00<?, ?it/s]

100%|██████████| 100/100 [00:08<00:00, 12.28it/s]


sample len -->: 314
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 38.76it/s]


sample len -->: 689
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.52it/s]


sample len -->: 872
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.28it/s]


sample len -->: 864
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.46it/s]


sample len -->: 687
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.41it/s]


sample len -->: 585
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.71it/s]


sample len -->: 646
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.80it/s]


sample len -->: 900
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.20it/s]


sample len -->: 922
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.43it/s]


sample len -->: 825
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.28it/s]


sample len -->: 717
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.96it/s]


sample len -->: 627
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.31it/s]


sample len -->: 777
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.46it/s]


sample len -->: 815
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.65it/s]


sample len -->: 575
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.09it/s]


sample len -->: 440
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.60it/s]


sample len -->: 475
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.38it/s]


sample len -->: 912
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.86it/s]


sample len -->: 627
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.70it/s]


sample len -->: 803
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.14it/s]


sample len -->: 884
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.18it/s]


sample len -->: 572
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.91it/s]


sample len -->: 571
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.07it/s]


sample len -->: 901
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.01it/s]


sample len -->: 567
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.42it/s]


sample len -->: 757
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.35it/s]


sample len -->: 593
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.60it/s]


sample len -->: 489
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.74it/s]


sample len -->: 758
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.08it/s]


Average ROUGE-1: F1=0.26864736269285205, P=0.2546135273267724, R=0.31956812501646936
Average ROUGE-2: F1=0.05455298611690605, P=0.04851476980286271, R=0.07142554047690627
Average ROUGE-L: F1=0.245849925088637, P=0.23409125820206494, R=0.2908428462489599
Average BERTScore - Precision: 0.50322511891524, Recall: 0.3181892603635788, F1: 0.3865427354971568


array([0.50322512, 0.31818926, 0.38654274])

In [7]:
result= generate_sample_all(test_data, tokenizer, model, num=470, length=100, save_path='generate_test_token100_new.txt', temperature=0.8, top_k=30, top_p=0.75, device=args.device)

sample len -->: 653
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.92it/s]


sample len -->: 314
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 37.53it/s]


sample len -->: 689
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.56it/s]


sample len -->: 872
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.69it/s]


sample len -->: 864
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.50it/s]


sample len -->: 687
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.44it/s]


sample len -->: 585
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.73it/s]


sample len -->: 646
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.79it/s]


sample len -->: 900
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.06it/s]


sample len -->: 922
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.42it/s]


sample len -->: 825
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.25it/s]


sample len -->: 717
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.92it/s]


sample len -->: 627
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.14it/s]


sample len -->: 777
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.37it/s]


sample len -->: 815
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.57it/s]


sample len -->: 575
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.28it/s]


sample len -->: 440
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.84it/s]


sample len -->: 475
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.65it/s]


sample len -->: 912
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.59it/s]


sample len -->: 627
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.30it/s]


sample len -->: 803
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.12it/s]


sample len -->: 884
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.55it/s]


sample len -->: 572
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.71it/s]


sample len -->: 571
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.54it/s]


sample len -->: 901
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.21it/s]


sample len -->: 567
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.97it/s]


sample len -->: 757
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.11it/s]


sample len -->: 593
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.42it/s]


sample len -->: 489
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.99it/s]


sample len -->: 758
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.02it/s]


sample len -->: 755
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.80it/s]


sample len -->: 395
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.36it/s]


sample len -->: 847
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.95it/s]


sample len -->: 676
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.61it/s]


sample len -->: 814
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.44it/s]


sample len -->: 513
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.44it/s]


sample len -->: 202
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 49.39it/s]


sample len -->: 812
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.34it/s]


sample len -->: 779
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.33it/s]


sample len -->: 624
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.22it/s]


sample len -->: 822
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.16it/s]


sample len -->: 694
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.96it/s]


sample len -->: 887
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.09it/s]


sample len -->: 376
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.59it/s]


sample len -->: 650
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.07it/s]


sample len -->: 363
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.10it/s]


sample len -->: 458
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.01it/s]


sample len -->: 721
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.11it/s]


sample len -->: 733
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.78it/s]


sample len -->: 509
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.38it/s]


sample len -->: 541
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.71it/s]


sample len -->: 393
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.73it/s]


sample len -->: 711
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.29it/s]


sample len -->: 685
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.06it/s]


sample len -->: 792
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.98it/s]


sample len -->: 828
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.05it/s]


sample len -->: 660
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.99it/s]


sample len -->: 430
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.07it/s]


sample len -->: 717
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.82it/s]


sample len -->: 524
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.79it/s]


sample len -->: 802
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.92it/s]


sample len -->: 872
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.12it/s]


sample len -->: 658
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.35it/s]


sample len -->: 831
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.17it/s]


sample len -->: 545
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.99it/s]


sample len -->: 897
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.31it/s]


sample len -->: 915
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.73it/s]


sample len -->: 833
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.59it/s]


sample len -->: 725
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.57it/s]


sample len -->: 586
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.31it/s]


sample len -->: 554
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.56it/s]


sample len -->: 809
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.80it/s]


sample len -->: 309
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 37.74it/s]


sample len -->: 486
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.42it/s]


sample len -->: 783
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.31it/s]


sample len -->: 874
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.82it/s]


sample len -->: 540
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.19it/s]


sample len -->: 955
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.12it/s]


sample len -->: 591
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.54it/s]


sample len -->: 665
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.26it/s]


sample len -->: 415
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.87it/s]


sample len -->: 600
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.66it/s]


sample len -->: 750
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.86it/s]


sample len -->: 828
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.03it/s]


sample len -->: 669
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.17it/s]


sample len -->: 650
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.18it/s]


sample len -->: 909
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.75it/s]


sample len -->: 604
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.11it/s]


sample len -->: 471
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.48it/s]


sample len -->: 838
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.08it/s]


sample len -->: 820
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.00it/s]


sample len -->: 932
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.52it/s]


sample len -->: 594
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.00it/s]


sample len -->: 488
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.15it/s]


sample len -->: 809
sample len -->: 1024


100%|██████████| 100/100 [01:08<00:00,  1.45it/s]


sample len -->: 712
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.00it/s]


sample len -->: 586
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.25it/s]


sample len -->: 520
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.09it/s]


sample len -->: 752
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.05it/s]


sample len -->: 889
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.18it/s]


sample len -->: 651
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.95it/s]


sample len -->: 453
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.13it/s]


sample len -->: 497
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.00it/s]


sample len -->: 702
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.89it/s]


sample len -->: 777
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.72it/s]


sample len -->: 817
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.87it/s]


sample len -->: 597
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.77it/s]


sample len -->: 419
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 32.91it/s]


sample len -->: 871
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.13it/s]


sample len -->: 748
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.91it/s]


sample len -->: 768
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.41it/s]


sample len -->: 676
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.62it/s]


sample len -->: 637
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.49it/s]


sample len -->: 595
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.13it/s]


sample len -->: 407
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.43it/s]


sample len -->: 757
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.71it/s]


sample len -->: 518
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.45it/s]


sample len -->: 470
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.20it/s]


sample len -->: 830
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.49it/s]


sample len -->: 488
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.12it/s]


sample len -->: 474
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.38it/s]


sample len -->: 589
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.59it/s]


sample len -->: 559
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.50it/s]


sample len -->: 601
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.69it/s]


sample len -->: 505
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.45it/s]


sample len -->: 840
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.02it/s]


sample len -->: 796
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.37it/s]


sample len -->: 633
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.77it/s]


sample len -->: 444
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.28it/s]


sample len -->: 865
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.98it/s]


sample len -->: 619
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.03it/s]


sample len -->: 629
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.20it/s]


sample len -->: 570
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.59it/s]


sample len -->: 781
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.29it/s]


sample len -->: 490
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.75it/s]


sample len -->: 854
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.28it/s]


sample len -->: 712
sample len -->: 1024


100%|██████████| 100/100 [00:11<00:00,  8.84it/s]


sample len -->: 605
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.22it/s]


sample len -->: 527
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.37it/s]


sample len -->: 386
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.06it/s]


sample len -->: 646
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.55it/s]


sample len -->: 636
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.90it/s]


sample len -->: 866
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.53it/s]


sample len -->: 480
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.18it/s]


sample len -->: 217
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 39.17it/s]


sample len -->: 911
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.86it/s]


sample len -->: 840
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.13it/s]


sample len -->: 339
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.54it/s]


sample len -->: 666
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.88it/s]


sample len -->: 772
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.89it/s]


sample len -->: 912
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.59it/s]


sample len -->: 809
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.37it/s]


sample len -->: 560
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.71it/s]


sample len -->: 670
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.76it/s]


sample len -->: 576
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.72it/s]


sample len -->: 478
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.83it/s]


sample len -->: 694
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.35it/s]


sample len -->: 707
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.86it/s]


sample len -->: 446
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.89it/s]


sample len -->: 569
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.22it/s]


sample len -->: 772
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.29it/s]


sample len -->: 790
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.07it/s]


sample len -->: 841
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.47it/s]


sample len -->: 473
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.95it/s]


sample len -->: 570
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.43it/s]


sample len -->: 906
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.68it/s]


sample len -->: 649
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.47it/s]


sample len -->: 743
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.88it/s]


sample len -->: 629
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.10it/s]


sample len -->: 785
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.70it/s]


sample len -->: 514
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.14it/s]


sample len -->: 619
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.61it/s]


sample len -->: 832
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.06it/s]


sample len -->: 716
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.11it/s]


sample len -->: 678
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.06it/s]


sample len -->: 824
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.22it/s]


sample len -->: 729
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.04it/s]


sample len -->: 733
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.61it/s]


sample len -->: 841
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.18it/s]


sample len -->: 371
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.92it/s]


sample len -->: 504
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.53it/s]


sample len -->: 734
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.35it/s]


sample len -->: 478
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.77it/s]


sample len -->: 620
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.24it/s]


sample len -->: 932
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.07it/s]


sample len -->: 535
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.41it/s]


sample len -->: 709
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.38it/s]


sample len -->: 750
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.64it/s]


sample len -->: 875
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.65it/s]


sample len -->: 681
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.39it/s]


sample len -->: 827
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.66it/s]


sample len -->: 632
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.92it/s]


sample len -->: 586
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.07it/s]


sample len -->: 357
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.26it/s]


sample len -->: 867
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.03it/s]


sample len -->: 692
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.59it/s]


sample len -->: 804
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.70it/s]


sample len -->: 865
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.01it/s]


sample len -->: 677
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.83it/s]


sample len -->: 619
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.00it/s]


sample len -->: 692
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.67it/s]


sample len -->: 744
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.00it/s]


sample len -->: 601
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.91it/s]


sample len -->: 414
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.60it/s]


sample len -->: 527
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.45it/s]


sample len -->: 564
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.13it/s]


sample len -->: 331
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.59it/s]


sample len -->: 713
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.60it/s]


sample len -->: 489
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.83it/s]


sample len -->: 495
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.53it/s]


sample len -->: 851
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.77it/s]


sample len -->: 499
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.25it/s]


sample len -->: 557
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.68it/s]


sample len -->: 487
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.24it/s]


sample len -->: 755
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.62it/s]


sample len -->: 485
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.58it/s]


sample len -->: 511
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.48it/s]


sample len -->: 696
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.10it/s]


sample len -->: 633
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.74it/s]


sample len -->: 869
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.03it/s]


sample len -->: 734
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.62it/s]


sample len -->: 851
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.34it/s]


sample len -->: 946
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.28it/s]


sample len -->: 756
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.89it/s]


sample len -->: 861
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.56it/s]


sample len -->: 926
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.21it/s]


sample len -->: 912
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.69it/s]


sample len -->: 444
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.59it/s]


sample len -->: 553
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.75it/s]


sample len -->: 663
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.19it/s]


sample len -->: 798
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.12it/s]


sample len -->: 606
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.62it/s]


sample len -->: 539
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.84it/s]


sample len -->: 698
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.41it/s]


sample len -->: 905
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.03it/s]


sample len -->: 496
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.03it/s]


sample len -->: 678
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.78it/s]


sample len -->: 750
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.47it/s]


sample len -->: 763
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.16it/s]


sample len -->: 447
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.00it/s]


sample len -->: 605
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.24it/s]


sample len -->: 608
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.29it/s]


sample len -->: 785
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.28it/s]


sample len -->: 642
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.17it/s]


sample len -->: 910
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 10.42it/s]


sample len -->: 791
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.92it/s]


sample len -->: 343
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.71it/s]


sample len -->: 840
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.81it/s]


sample len -->: 878
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.18it/s]


sample len -->: 348
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.78it/s]


sample len -->: 681
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.03it/s]


sample len -->: 923
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 11.82it/s]


sample len -->: 450
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.02it/s]


sample len -->: 914
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.67it/s]


sample len -->: 707
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.68it/s]


sample len -->: 753
sample len -->: 1024


100%|██████████| 100/100 [03:00<00:00,  1.80s/it]


sample len -->: 655
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 20.00it/s]


sample len -->: 688
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.62it/s]


sample len -->: 914
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.60it/s]


sample len -->: 754
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.94it/s]


sample len -->: 863
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.33it/s]


sample len -->: 533
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.07it/s]


sample len -->: 548
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.63it/s]


sample len -->: 842
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.75it/s]


sample len -->: 807
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.47it/s]


sample len -->: 631
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.00it/s]


sample len -->: 574
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.35it/s]


sample len -->: 424
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.83it/s]


sample len -->: 604
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.65it/s]


sample len -->: 731
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.63it/s]


sample len -->: 737
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.75it/s]


sample len -->: 697
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.64it/s]


sample len -->: 798
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.98it/s]


sample len -->: 828
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.79it/s]


sample len -->: 642
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.21it/s]


sample len -->: 666
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.99it/s]


sample len -->: 545
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.57it/s]


sample len -->: 475
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.18it/s]


sample len -->: 687
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.58it/s]


sample len -->: 414
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.28it/s]


sample len -->: 534
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.92it/s]


sample len -->: 812
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.66it/s]


sample len -->: 322
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 38.83it/s]


sample len -->: 735
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.48it/s]


sample len -->: 617
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.17it/s]


sample len -->: 513
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.52it/s]


sample len -->: 374
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 35.64it/s]


sample len -->: 822
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.12it/s]


sample len -->: 535
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.25it/s]


sample len -->: 545
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.57it/s]


sample len -->: 431
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.80it/s]


sample len -->: 665
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.81it/s]


sample len -->: 775
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.92it/s]


sample len -->: 658
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.16it/s]


sample len -->: 429
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.39it/s]


sample len -->: 790
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.49it/s]


sample len -->: 372
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 32.97it/s]


sample len -->: 952
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.78it/s]


sample len -->: 727
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.38it/s]


sample len -->: 950
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.11it/s]


sample len -->: 492
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.05it/s]


sample len -->: 660
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.10it/s]


sample len -->: 712
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.08it/s]


sample len -->: 696
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.61it/s]


sample len -->: 255
sample len -->: 1024


100%|██████████| 100/100 [00:01<00:00, 51.60it/s]


sample len -->: 630
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.46it/s]


sample len -->: 757
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.04it/s]


sample len -->: 480
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.40it/s]


sample len -->: 679
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.73it/s]


sample len -->: 803
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.71it/s]


sample len -->: 668
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.40it/s]


sample len -->: 880
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.44it/s]


sample len -->: 592
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.28it/s]


sample len -->: 933
sample len -->: 1024


100%|██████████| 100/100 [00:08<00:00, 12.32it/s]


sample len -->: 731
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.54it/s]


sample len -->: 735
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.23it/s]


sample len -->: 907
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.53it/s]


sample len -->: 969
sample len -->: 1024


100%|██████████| 100/100 [00:09<00:00, 11.01it/s]


sample len -->: 847
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.30it/s]


sample len -->: 445
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.99it/s]


sample len -->: 551
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.87it/s]


sample len -->: 690
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.80it/s]


sample len -->: 345
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 37.08it/s]


sample len -->: 996
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.15it/s]


sample len -->: 439
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.49it/s]


sample len -->: 954
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.69it/s]


sample len -->: 595
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.70it/s]


sample len -->: 280
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 43.27it/s]


sample len -->: 472
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.14it/s]


sample len -->: 858
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.44it/s]


sample len -->: 890
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.63it/s]


sample len -->: 918
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.71it/s]


sample len -->: 628
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.85it/s]


sample len -->: 702
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.03it/s]


sample len -->: 804
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.68it/s]


sample len -->: 808
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.59it/s]


sample len -->: 758
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.99it/s]


sample len -->: 607
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.85it/s]


sample len -->: 910
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.82it/s]


sample len -->: 971
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.24it/s]


sample len -->: 470
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.33it/s]


sample len -->: 611
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.71it/s]


sample len -->: 623
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.47it/s]


sample len -->: 623
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.02it/s]


sample len -->: 438
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.61it/s]


sample len -->: 860
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.71it/s]


sample len -->: 771
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.59it/s]


sample len -->: 821
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.99it/s]


sample len -->: 483
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.40it/s]


sample len -->: 411
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 29.85it/s]


sample len -->: 732
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.26it/s]


sample len -->: 405
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 32.21it/s]


sample len -->: 977
sample len -->: 1024


100%|██████████| 100/100 [01:28<00:00,  1.13it/s]


sample len -->: 631
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.37it/s]


sample len -->: 289
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 38.71it/s]


sample len -->: 695
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.94it/s]


sample len -->: 898
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.62it/s]


sample len -->: 281
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 43.53it/s]


sample len -->: 902
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.88it/s]


sample len -->: 695
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.23it/s]


sample len -->: 782
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.21it/s]


sample len -->: 546
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.05it/s]


sample len -->: 540
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.58it/s]


sample len -->: 468
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.65it/s]


sample len -->: 842
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.25it/s]


sample len -->: 887
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.61it/s]


sample len -->: 527
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.13it/s]


sample len -->: 683
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.26it/s]


sample len -->: 387
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.62it/s]


sample len -->: 1012
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.04it/s]


sample len -->: 899
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.72it/s]


sample len -->: 570
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.60it/s]


sample len -->: 915
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.95it/s]


sample len -->: 574
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.49it/s]


sample len -->: 748
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.55it/s]


sample len -->: 436
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.97it/s]


sample len -->: 385
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.62it/s]


sample len -->: 882
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.12it/s]


sample len -->: 720
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.37it/s]


sample len -->: 850
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.74it/s]


sample len -->: 1016
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 12.87it/s]


sample len -->: 499
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.76it/s]


sample len -->: 554
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.84it/s]


sample len -->: 929
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.75it/s]


sample len -->: 822
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.73it/s]


sample len -->: 430
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 31.75it/s]


sample len -->: 841
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.03it/s]


sample len -->: 403
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 32.92it/s]


sample len -->: 505
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 26.89it/s]


sample len -->: 752
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.91it/s]


sample len -->: 430
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.72it/s]


sample len -->: 653
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.26it/s]


sample len -->: 716
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.76it/s]


sample len -->: 586
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 23.04it/s]


sample len -->: 422
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.62it/s]


sample len -->: 726
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.56it/s]


sample len -->: 899
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.41it/s]


sample len -->: 593
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.43it/s]


sample len -->: 830
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.99it/s]


sample len -->: 928
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.55it/s]


sample len -->: 801
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.62it/s]


sample len -->: 610
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.19it/s]


sample len -->: 817
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.72it/s]


sample len -->: 870
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.73it/s]


sample len -->: 641
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.32it/s]


sample len -->: 780
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.43it/s]


sample len -->: 837
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.79it/s]


sample len -->: 197
sample len -->: 1024


100%|██████████| 100/100 [00:01<00:00, 52.72it/s]


sample len -->: 866
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.14it/s]


sample len -->: 839
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.90it/s]


sample len -->: 761
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.38it/s]


sample len -->: 467
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.19it/s]


sample len -->: 522
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.24it/s]


sample len -->: 640
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.60it/s]


sample len -->: 496
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.30it/s]


sample len -->: 484
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 27.17it/s]


sample len -->: 306
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 39.78it/s]


sample len -->: 436
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 28.20it/s]


sample len -->: 870
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.28it/s]


sample len -->: 620
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.64it/s]


sample len -->: 705
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.09it/s]


sample len -->: 571
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.90it/s]


sample len -->: 598
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.62it/s]


sample len -->: 770
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.04it/s]


sample len -->: 741
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.57it/s]


sample len -->: 803
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.08it/s]


sample len -->: 508
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.40it/s]


sample len -->: 812
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.88it/s]


sample len -->: 832
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.59it/s]


sample len -->: 802
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.58it/s]


sample len -->: 733
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.86it/s]


sample len -->: 635
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.94it/s]


sample len -->: 676
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.21it/s]


sample len -->: 386
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 35.25it/s]


sample len -->: 590
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.12it/s]


sample len -->: 652
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.32it/s]


sample len -->: 425
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 30.51it/s]


sample len -->: 777
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.49it/s]


sample len -->: 824
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.63it/s]


sample len -->: 755
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.10it/s]


sample len -->: 620
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.64it/s]


sample len -->: 837
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.21it/s]


sample len -->: 278
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 38.83it/s]


sample len -->: 726
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.26it/s]


sample len -->: 581
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.77it/s]


sample len -->: 613
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.95it/s]


sample len -->: 274
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 39.60it/s]


sample len -->: 707
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.57it/s]


sample len -->: 642
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.53it/s]


sample len -->: 717
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.13it/s]


sample len -->: 676
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 17.62it/s]


sample len -->: 838
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.75it/s]


sample len -->: 701
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 18.43it/s]


sample len -->: 624
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.17it/s]


sample len -->: 836
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.82it/s]


sample len -->: 822
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 14.53it/s]


sample len -->: 654
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 19.03it/s]


sample len -->: 780
sample len -->: 1024


100%|██████████| 100/100 [00:05<00:00, 16.80it/s]


sample len -->: 592
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.30it/s]


sample len -->: 821
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 14.25it/s]


sample len -->: 740
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 16.31it/s]


sample len -->: 616
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 20.29it/s]


sample len -->: 308
sample len -->: 1024


100%|██████████| 100/100 [00:02<00:00, 40.08it/s]


sample len -->: 474
sample len -->: 1024


100%|██████████| 100/100 [00:03<00:00, 25.86it/s]


sample len -->: 574
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.88it/s]


sample len -->: 577
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 21.55it/s]


sample len -->: 593
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 22.02it/s]


sample len -->: 847
sample len -->: 1024


100%|██████████| 100/100 [00:07<00:00, 13.84it/s]


sample len -->: 525
sample len -->: 1024


100%|██████████| 100/100 [00:04<00:00, 24.64it/s]


sample len -->: 780
sample len -->: 1024


100%|██████████| 100/100 [00:06<00:00, 15.11it/s]


Average ROUGE-1: {'f': 0.2787949566197231, 'p': 0.27657954905797616, 'r': 0.310382498461751}
Average ROUGE-2: {'f': 0.07843665145642449, 'p': 0.07367977602769231, 'r': 0.09584257984742381}
Average ROUGE-L: {'f': 0.25991309251042977, 'p': 0.25810740435673035, 'r': 0.28945470489940095}
Average BERTScore - Precision: 0.5191473780794347, Recall: 0.3300098029856986, F1: 0.39993160322625587


In [8]:
print(result)

{'rouge': {'rouge-1': {'f': 0.2787949566197231, 'p': 0.27657954905797616, 'r': 0.310382498461751}, 'rouge-2': {'f': 0.07843665145642449, 'p': 0.07367977602769231, 'r': 0.09584257984742381}, 'rouge-l': {'f': 0.25991309251042977, 'p': 0.25810740435673035, 'r': 0.28945470489940095}}, 'bert': array([0.51914738, 0.3300098 , 0.3999316 ])}


In [7]:
generate_sample(test_data, tokenizer, model, num=2, length=50, temperature=0.8, top_k=30, top_p=0.75, device=args.device, br_eval=True)

sample len -->: 554
sample len -->: 1024


100%|██████████| 50/50 [00:02<00:00, 22.13it/s]


new_article

new york -lrb- cnn -rrb- -- mayor bill de blasio bit into some unexpected publicity recently when he was photographed politely eating pizza -- with a knife and fork. at a staten island pizzeria, no less. now, the act that sent the internet into a frenzy with chatter about what most new yorkers scorn as a serious food flub portends to deliver some dough -- as in money -- to charity. goodfella's pizzeria co-owner marc cosentino says he will auction off the infamous fork that de blasio used in a charity fundraiser. after the mayor and his entourage went their way following a friday stop at goodfella's, cosentino decided it would be a good idea to hold on to the notorious fork. a retired nypd sergeant, cosentino baked up the idea to store the fork in an evidence bag and put it on display at the bar. `` you can't imagine all the people that come in that want to look at the fork and take pictures of it, '' cosentino told cnn on wednesday. dubbed by some with tongues firmly in ch

100%|██████████| 50/50 [00:03<00:00, 14.56it/s]


new_article

do you know your child's favorite nursery rhyme, family game or tv character? do you know what makes them really laugh? as research reveals that one in six fathers don't know how to play with their children a leading national nursery is offering parents lessons on how to have fun with their own children. busy bees day nursery group, who have multiple playgroups across the uk, are offering the coaching for parents to boost their playtime skills when children go home at the end of the day. how to play : aleeza haider, three, and binuthi kannangara, three, with chef robert england are seen learning vital skills at busy bees nursery in rotherham, south yorkshire. the research, conducted by toy company playmobil, states that most parents of young children worry that they do not spend enough time playing with them, while more than six out of ten parents add that they play with their children only occasionally. a third of dads say they simply do not have time to commit to playing

In [6]:
generate_beam_sample(test_data, tokenizer, model, num=1, length=50, beam_size=3, device=args.device, br_eval=True)

sample len -->: 306
sample len -->: 1024


  next_token_probs = F.softmax(next_token_logits)
  next_token_probs = F.softmax(next_token_logits)
100%|██████████| 2/2 [00:00<00:00, 14.35it/s]
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


new_article

-lrb- cnn -rrb- -- a body has been discovered in the charred remains of a three-story apartment building in minneapolis where an explosion and fire injured at least 14 people, fire officials said thursday. firefighters discovered the body, which has not been publicly identified, while removing debris from wednesday's fire, officials said. the body was turned over to the hennepin county medical examiner's office, they said. `` the cause of the fire is still under investigation. while a gas leak could be one potential cause, it could be weeks until a cause is determined and may ultimately be undetermined, '' according to a statement released by the minneapolis fire department. most of the injuries suffered were burns or came from falls or jumps from windows, robert ball, a spokesman for hennepin county emergency medical services, said wednesday. officials don't know whether people jumped out, fell or were pushed out by the explosion. the explosion was reported about 8:15 a.m

## Download An Article Given A Query

In [7]:
def sentences_from_query(query):
    # Get url
    if query.startswith("http"):
        url = query
    else:
        url = search(query, num_results=1)[0]
    print(url)
    page = requests.get(url).text
    soup = BeautifulSoup(page)
    # Get text from all <p> tags.
    p_tags = soup.find_all('p')
    # Get the text from each of the "p" tags and strip surrounding whitespace.
    p_tags_text = " ".join([tag.get_text().strip() for tag in p_tags])
    return p_tags_text

In [8]:
article = sentences_from_query("neural embedding")
article = tokenizer.encode(article)[:900]

https://towardsdatascience.com/neural-network-embeddings-explained-4d028e6f0526


Token indices sequence length is longer than the specified maximum sequence length for this model (1957 > 1024). Running this sequence through the model will result in indexing errors


In [9]:
generated_text = sample_seq(model, article, 50, args.device, temperature=1, top_k=10, top_p=0.5)
generated_text = generated_text[0, len(article):].tolist()
text = tokenizer.convert_ids_to_tokens(generated_text,skip_special_tokens=True)
text = tokenizer.convert_tokens_to_string(text)

HBox(children=(IntProgress(value=0, max=50), HTML(value='')))




In [10]:
print("Article: \n")
print(tokenizer.decode(article))
print("------------------------------------------------------------ \n")
print("Generated Summary: \n")
print(text)

Article: 

Applications of neural networks have expanded significantly in recent years from image segmentation to natural language processing to time-series forecasting. One notably successful use of deep learning is embedding, a method used to represent discrete variables as continuous vectors. This technique has found practical applications with word embeddings for machine translation and entity embeddings for categorical variables. In this article, I’ll explain what neural network embeddings are, why we want to use them, and how they are learned. We’ll go through these concepts in the context of a real problem I’m working on: representing all the books on Wikipedia as vectors to create a book recommendation system. An embedding is a mapping of a discrete — categorical — variable to a vector of continuous numbers. In the context of neural networks, embeddings are low-dimensional, learned continuous vector representations of discrete variables. Neural network embeddings are useful bec