## Set up

In [None]:
!pip install git+https://github.com/jameschamberlain/bigbird.git -q

[K     |████████████████████████████████| 1.2MB 13.0MB/s 
[K     |████████████████████████████████| 3.4MB 47.8MB/s 
[K     |████████████████████████████████| 1.5MB 52.7MB/s 
[K     |████████████████████████████████| 3.9MB 53.9MB/s 
[K     |████████████████████████████████| 983kB 52.0MB/s 
[K     |████████████████████████████████| 358kB 53.6MB/s 
[K     |████████████████████████████████| 5.6MB 45.4MB/s 
[K     |████████████████████████████████| 368kB 54.3MB/s 
[K     |████████████████████████████████| 194kB 54.3MB/s 
[K     |████████████████████████████████| 378kB 48.8MB/s 
[K     |████████████████████████████████| 706kB 48.1MB/s 
[K     |████████████████████████████████| 655kB 51.4MB/s 
[K     |████████████████████████████████| 368kB 47.4MB/s 
[K     |████████████████████████████████| 256kB 52.3MB/s 
[?25h  Building wheel for bigbird (setup.py) ... [?25l[?25hdone
  Building wheel for gunicorn (setup.py) ... [?25l[?25hdone
  Building wheel for bz2file (setup.py) ... 

In [None]:
from bigbird.core import flags
from bigbird.core import modeling
from bigbird.core import utils
from bigbird.question_generation import run_question_generation
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
import tensorflow_text as tft
from tqdm import tqdm
import sys

FLAGS = flags.FLAGS
if not hasattr(FLAGS, "f"): flags.DEFINE_string("f", "", "")
FLAGS(sys.argv)

tf.enable_v2_behavior()

## Set options

In [None]:
FLAGS.data_dir = "tfds://sqa_qg_dataset"
FLAGS.attention_type = "original_full"
FLAGS.couple_encoder_decoder = True
FLAGS.max_encoder_length = 256  # on free colab only lower memory GPU like T4 is available
FLAGS.max_decoder_length = 128
FLAGS.block_size = 64
FLAGS.learning_rate = 1e-5
FLAGS.num_train_steps = 10000
FLAGS.attention_probs_dropout_prob = 0.0
FLAGS.hidden_dropout_prob = 0.0
FLAGS.use_gradient_checkpointing = True
FLAGS.vocab_model_file = "gpt2"

In [None]:
transformer_config = flags.as_dictionary()

## Define question generation model

In [None]:
from tensorflow.python.ops.variable_scope import EagerVariableStore
container = EagerVariableStore()

In [None]:
with container.as_default():
  model = modeling.TransformerModel(transformer_config)

In [None]:
@tf.function(experimental_compile=True)
def fwd_bwd(features, labels):
  with tf.GradientTape() as g:
    (llh, logits, pred_ids), _ = model(features, target_ids=labels,
                                       training=True)
    loss = run_question_generation.padded_cross_entropy_loss(
        logits, labels,
        transformer_config["label_smoothing"],
        transformer_config["vocab_size"])
  grads = g.gradient(loss, model.trainable_weights)
  return loss, llh, logits, pred_ids, grads

## Dataset pipeline

In [None]:
train_input_fn = run_question_generation.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        max_decoder_length=FLAGS.max_decoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=True,
        tmp_dir='gs://q-gen/tensorflow_datasets')
dataset = train_input_fn({'batch_size': 8})

  deterministic=is_training)


## Check outputs

In [None]:
loss, llh, logits, pred_ids, grads = fwd_bwd(ex[0], ex[1])
print('Loss: ', loss)



Loss:  tf.Tensor(45.25094, shape=(), dtype=float32)


## (Optionally) Load pretrained model

In [None]:
# For training from scratch use
# ckpt_path = 'gs://bigbird-transformer/pretrain/bigbr_base/model.ckpt-0'
# For quick check continue from trained checkpoint
ckpt_path = 'gs://bigbird-transformer/summarization/pubmed/roberta/model.ckpt-300000'
ckpt_reader = tf.compat.v1.train.NewCheckpointReader(ckpt_path)
loaded_weights = []
for v in tqdm(model.trainable_weights, position=0):
  try:
    val = ckpt_reader.get_tensor(v.name[:-2])
  except:
    val = v.numpy()
  loaded_weights.append(val)

model.set_weights(loaded_weights)

100%|██████████| 316/316 [00:00<00:00, 417.43it/s]


## Train

In [None]:
opt = tf.keras.optimizers.Adam(FLAGS.learning_rate)
train_loss = tf.keras.metrics.Mean(name='train_loss')

for i, ex in enumerate(tqdm(dataset.take(FLAGS.num_train_steps), position=0)):
  loss, llh, logits, pred_ids, grads = fwd_bwd(ex[0], ex[1])
  opt.apply_gradients(zip(grads, model.trainable_weights))
  train_loss(loss)
  if i% 10 == 0:
    print('Loss = {} '.format(train_loss.result().numpy()))

  0%|          | 1/10000 [00:03<9:08:06,  3.29s/it]

Loss = 52.736576080322266 


  0%|          | 11/10000 [00:05<50:18,  3.31it/s]

Loss = 22.46729278564453 


  0%|          | 21/10000 [00:07<36:07,  4.60it/s]

Loss = 16.794607162475586 


  0%|          | 31/10000 [00:09<36:04,  4.60it/s]

Loss = 14.59433364868164 


  0%|          | 41/10000 [00:11<36:05,  4.60it/s]

Loss = 13.353221893310547 


  1%|          | 51/10000 [00:14<35:53,  4.62it/s]

Loss = 12.527636528015137 


  1%|          | 61/10000 [00:16<36:13,  4.57it/s]

Loss = 11.910737037658691 


  1%|          | 71/10000 [00:18<36:14,  4.57it/s]

Loss = 11.43585205078125 


  1%|          | 81/10000 [00:20<36:18,  4.55it/s]

Loss = 11.073097229003906 


  1%|          | 91/10000 [00:22<36:29,  4.53it/s]

Loss = 10.729941368103027 


  1%|          | 101/10000 [00:25<36:39,  4.50it/s]

Loss = 10.464337348937988 


  1%|          | 111/10000 [00:27<36:48,  4.48it/s]

Loss = 10.230423927307129 


  1%|          | 121/10000 [00:29<36:49,  4.47it/s]

Loss = 10.022096633911133 


  1%|▏         | 131/10000 [00:31<36:51,  4.46it/s]

Loss = 9.848771095275879 


  1%|▏         | 141/10000 [00:34<36:48,  4.46it/s]

Loss = 9.687811851501465 


  2%|▏         | 151/10000 [00:36<36:43,  4.47it/s]

Loss = 9.541717529296875 


  2%|▏         | 161/10000 [00:38<36:43,  4.46it/s]

Loss = 9.397281646728516 


  2%|▏         | 171/10000 [00:40<36:37,  4.47it/s]

Loss = 9.265467643737793 


  2%|▏         | 181/10000 [00:43<36:36,  4.47it/s]

Loss = 9.14140510559082 


  2%|▏         | 191/10000 [00:45<36:15,  4.51it/s]

Loss = 9.043268203735352 


  2%|▏         | 201/10000 [00:47<36:08,  4.52it/s]

Loss = 8.940035820007324 


  2%|▏         | 211/10000 [00:49<36:19,  4.49it/s]

Loss = 8.836051940917969 


  2%|▏         | 221/10000 [00:51<36:00,  4.53it/s]

Loss = 8.756492614746094 


  2%|▏         | 231/10000 [00:54<35:37,  4.57it/s]

Loss = 8.669888496398926 


  2%|▏         | 241/10000 [00:56<35:36,  4.57it/s]

Loss = 8.594752311706543 


  3%|▎         | 251/10000 [00:58<35:31,  4.57it/s]

Loss = 8.519044876098633 


  3%|▎         | 261/10000 [01:00<35:24,  4.58it/s]

Loss = 8.44279670715332 


  3%|▎         | 271/10000 [01:02<35:31,  4.56it/s]

Loss = 8.384714126586914 


  3%|▎         | 281/10000 [01:04<35:20,  4.58it/s]

Loss = 8.30813980102539 


  3%|▎         | 291/10000 [01:07<35:18,  4.58it/s]

Loss = 8.253623008728027 


  3%|▎         | 301/10000 [01:09<35:10,  4.60it/s]

Loss = 8.192765235900879 


  3%|▎         | 311/10000 [01:11<34:59,  4.61it/s]

Loss = 8.134328842163086 


  3%|▎         | 321/10000 [01:13<35:04,  4.60it/s]

Loss = 8.081398010253906 


  3%|▎         | 331/10000 [01:15<35:07,  4.59it/s]

Loss = 8.014141082763672 


  3%|▎         | 341/10000 [01:18<34:55,  4.61it/s]

Loss = 7.963667392730713 


  4%|▎         | 351/10000 [01:20<34:55,  4.60it/s]

Loss = 7.914650917053223 


  4%|▎         | 361/10000 [01:22<34:53,  4.60it/s]

Loss = 7.870625972747803 


  4%|▎         | 371/10000 [01:24<34:59,  4.59it/s]

Loss = 7.823163986206055 


  4%|▍         | 381/10000 [01:26<35:10,  4.56it/s]

Loss = 7.7859625816345215 


  4%|▍         | 391/10000 [01:28<34:44,  4.61it/s]

Loss = 7.749709129333496 


  4%|▍         | 401/10000 [01:31<34:59,  4.57it/s]

Loss = 7.706432819366455 


  4%|▍         | 411/10000 [01:33<34:46,  4.60it/s]

Loss = 7.66203498840332 


  4%|▍         | 421/10000 [01:35<34:54,  4.57it/s]

Loss = 7.625426769256592 


  4%|▍         | 431/10000 [01:37<34:50,  4.58it/s]

Loss = 7.586068153381348 


  4%|▍         | 441/10000 [01:39<34:53,  4.57it/s]

Loss = 7.549753189086914 


  5%|▍         | 451/10000 [01:41<34:52,  4.56it/s]

Loss = 7.515993595123291 


  5%|▍         | 461/10000 [01:44<34:34,  4.60it/s]

Loss = 7.474462509155273 


  5%|▍         | 471/10000 [01:46<34:39,  4.58it/s]

Loss = 7.444622039794922 


  5%|▍         | 481/10000 [01:48<34:52,  4.55it/s]

Loss = 7.419692516326904 


  5%|▍         | 491/10000 [01:50<34:36,  4.58it/s]

Loss = 7.387444019317627 


  5%|▌         | 501/10000 [01:52<34:42,  4.56it/s]

Loss = 7.353725910186768 


  5%|▌         | 511/10000 [01:55<34:29,  4.58it/s]

Loss = 7.3264241218566895 


  5%|▌         | 521/10000 [01:57<34:48,  4.54it/s]

Loss = 7.300089359283447 


  5%|▌         | 531/10000 [01:59<34:43,  4.54it/s]

Loss = 7.271378993988037 


  5%|▌         | 541/10000 [02:01<34:28,  4.57it/s]

Loss = 7.24373722076416 


  6%|▌         | 551/10000 [02:03<34:26,  4.57it/s]

Loss = 7.219072341918945 


  6%|▌         | 561/10000 [02:06<34:29,  4.56it/s]

Loss = 7.1912841796875 


  6%|▌         | 571/10000 [02:08<34:53,  4.50it/s]

Loss = 7.1646504402160645 


  6%|▌         | 581/10000 [02:10<34:40,  4.53it/s]

Loss = 7.138629913330078 


  6%|▌         | 591/10000 [02:12<34:28,  4.55it/s]

Loss = 7.1133623123168945 


  6%|▌         | 601/10000 [02:14<34:17,  4.57it/s]

Loss = 7.086764335632324 


  6%|▌         | 611/10000 [02:17<34:17,  4.56it/s]

Loss = 7.064182281494141 


  6%|▌         | 621/10000 [02:19<34:08,  4.58it/s]

Loss = 7.036494731903076 


  6%|▋         | 631/10000 [02:21<34:05,  4.58it/s]

Loss = 7.012925148010254 


  6%|▋         | 641/10000 [02:23<34:13,  4.56it/s]

Loss = 6.992189884185791 


  7%|▋         | 651/10000 [02:25<34:16,  4.55it/s]

Loss = 6.967654228210449 


  7%|▋         | 661/10000 [02:28<34:01,  4.57it/s]

Loss = 6.947710990905762 


  7%|▋         | 671/10000 [02:30<34:06,  4.56it/s]

Loss = 6.927056312561035 


  7%|▋         | 681/10000 [02:32<33:56,  4.58it/s]

Loss = 6.905586242675781 


  7%|▋         | 691/10000 [02:34<33:53,  4.58it/s]

Loss = 6.884620189666748 


  7%|▋         | 701/10000 [02:36<33:50,  4.58it/s]

Loss = 6.866889953613281 


  7%|▋         | 711/10000 [02:38<33:40,  4.60it/s]

Loss = 6.847347736358643 


  7%|▋         | 721/10000 [02:41<33:56,  4.56it/s]

Loss = 6.829951286315918 


  7%|▋         | 731/10000 [02:43<33:53,  4.56it/s]

Loss = 6.80812406539917 


  7%|▋         | 741/10000 [02:45<33:35,  4.59it/s]

Loss = 6.787790775299072 


  8%|▊         | 751/10000 [02:47<33:50,  4.56it/s]

Loss = 6.7682061195373535 


  8%|▊         | 761/10000 [02:49<33:34,  4.59it/s]

Loss = 6.748281002044678 


  8%|▊         | 771/10000 [02:52<33:33,  4.58it/s]

Loss = 6.73040246963501 


  8%|▊         | 781/10000 [02:54<33:22,  4.60it/s]

Loss = 6.710592269897461 


  8%|▊         | 791/10000 [02:56<33:43,  4.55it/s]

Loss = 6.693044662475586 


  8%|▊         | 801/10000 [02:58<33:30,  4.57it/s]

Loss = 6.675908088684082 


  8%|▊         | 811/10000 [03:00<33:20,  4.59it/s]

Loss = 6.65803337097168 


  8%|▊         | 821/10000 [03:02<33:26,  4.57it/s]

Loss = 6.641064643859863 


  8%|▊         | 831/10000 [03:05<33:26,  4.57it/s]

Loss = 6.625613212585449 


  8%|▊         | 841/10000 [03:07<33:19,  4.58it/s]

Loss = 6.60920524597168 


  9%|▊         | 851/10000 [03:09<33:18,  4.58it/s]

Loss = 6.594986915588379 


  9%|▊         | 861/10000 [03:11<33:03,  4.61it/s]

Loss = 6.5795063972473145 


  9%|▊         | 871/10000 [03:13<33:36,  4.53it/s]

Loss = 6.560555458068848 


  9%|▉         | 881/10000 [03:16<33:03,  4.60it/s]

Loss = 6.542651653289795 


  9%|▉         | 891/10000 [03:18<33:09,  4.58it/s]

Loss = 6.530422687530518 


  9%|▉         | 901/10000 [03:20<33:08,  4.58it/s]

Loss = 6.517165184020996 


  9%|▉         | 911/10000 [03:22<32:55,  4.60it/s]

Loss = 6.500670433044434 


  9%|▉         | 921/10000 [03:24<32:57,  4.59it/s]

Loss = 6.488458156585693 


  9%|▉         | 931/10000 [03:26<33:03,  4.57it/s]

Loss = 6.47642183303833 


  9%|▉         | 941/10000 [03:29<33:07,  4.56it/s]

Loss = 6.465020179748535 


 10%|▉         | 951/10000 [03:31<32:59,  4.57it/s]

Loss = 6.452162265777588 


 10%|▉         | 961/10000 [03:33<33:03,  4.56it/s]

Loss = 6.438125133514404 


 10%|▉         | 971/10000 [03:35<33:01,  4.56it/s]

Loss = 6.423699378967285 


 10%|▉         | 981/10000 [03:37<32:44,  4.59it/s]

Loss = 6.409327983856201 


 10%|▉         | 991/10000 [03:40<32:54,  4.56it/s]

Loss = 6.398726940155029 


 10%|█         | 1001/10000 [03:42<32:48,  4.57it/s]

Loss = 6.384840488433838 


 10%|█         | 1011/10000 [03:44<32:43,  4.58it/s]

Loss = 6.373270511627197 


 10%|█         | 1021/10000 [03:46<32:48,  4.56it/s]

Loss = 6.365612030029297 


 10%|█         | 1031/10000 [03:48<32:41,  4.57it/s]

Loss = 6.35240364074707 


 10%|█         | 1041/10000 [03:51<32:46,  4.56it/s]

Loss = 6.34354305267334 


 11%|█         | 1051/10000 [03:53<32:42,  4.56it/s]

Loss = 6.333762168884277 


 11%|█         | 1061/10000 [03:55<32:39,  4.56it/s]

Loss = 6.3213090896606445 


 11%|█         | 1071/10000 [03:57<32:32,  4.57it/s]

Loss = 6.308765411376953 


 11%|█         | 1081/10000 [03:59<32:51,  4.52it/s]

Loss = 6.299354076385498 


 11%|█         | 1091/10000 [04:01<32:40,  4.54it/s]

Loss = 6.289885520935059 


 11%|█         | 1101/10000 [04:04<32:26,  4.57it/s]

Loss = 6.277713775634766 


 11%|█         | 1111/10000 [04:06<32:38,  4.54it/s]

Loss = 6.2671613693237305 


 11%|█         | 1121/10000 [04:08<32:26,  4.56it/s]

Loss = 6.25751256942749 


 11%|█▏        | 1131/10000 [04:10<32:31,  4.54it/s]

Loss = 6.246871471405029 


 11%|█▏        | 1141/10000 [04:12<32:32,  4.54it/s]

Loss = 6.236933708190918 


 12%|█▏        | 1151/10000 [04:15<32:25,  4.55it/s]

Loss = 6.225213527679443 


 12%|█▏        | 1161/10000 [04:17<32:22,  4.55it/s]

Loss = 6.213526725769043 


 12%|█▏        | 1171/10000 [04:19<32:28,  4.53it/s]

Loss = 6.202805995941162 


 12%|█▏        | 1181/10000 [04:21<32:22,  4.54it/s]

Loss = 6.192044258117676 


 12%|█▏        | 1191/10000 [04:23<32:10,  4.56it/s]

Loss = 6.183605194091797 


 12%|█▏        | 1201/10000 [04:26<32:08,  4.56it/s]

Loss = 6.174224853515625 


 12%|█▏        | 1211/10000 [04:28<32:08,  4.56it/s]

Loss = 6.165327548980713 


 12%|█▏        | 1221/10000 [04:30<32:07,  4.55it/s]

Loss = 6.15668249130249 


 12%|█▏        | 1231/10000 [04:32<32:02,  4.56it/s]

Loss = 6.1457109451293945 


 12%|█▏        | 1241/10000 [04:34<32:11,  4.53it/s]

Loss = 6.135770320892334 


 13%|█▎        | 1251/10000 [04:37<32:10,  4.53it/s]

Loss = 6.1280131340026855 


 13%|█▎        | 1261/10000 [04:39<31:48,  4.58it/s]

Loss = 6.1176018714904785 


 13%|█▎        | 1271/10000 [04:41<32:02,  4.54it/s]

Loss = 6.108392238616943 


 13%|█▎        | 1281/10000 [04:43<31:55,  4.55it/s]

Loss = 6.100151062011719 


 13%|█▎        | 1291/10000 [04:45<31:36,  4.59it/s]

Loss = 6.091867446899414 


 13%|█▎        | 1301/10000 [04:48<31:43,  4.57it/s]

Loss = 6.083589553833008 


 13%|█▎        | 1311/10000 [04:50<31:36,  4.58it/s]

Loss = 6.075460433959961 


 13%|█▎        | 1321/10000 [04:52<31:45,  4.55it/s]

Loss = 6.065572261810303 


 13%|█▎        | 1331/10000 [04:54<31:28,  4.59it/s]

Loss = 6.056328296661377 


 13%|█▎        | 1341/10000 [04:56<31:26,  4.59it/s]

Loss = 6.0469255447387695 


 14%|█▎        | 1351/10000 [04:58<31:42,  4.55it/s]

Loss = 6.040163040161133 


 14%|█▎        | 1361/10000 [05:01<31:19,  4.60it/s]

Loss = 6.031347751617432 


 14%|█▎        | 1371/10000 [05:03<31:38,  4.54it/s]

Loss = 6.021304130554199 


 14%|█▍        | 1381/10000 [05:05<31:29,  4.56it/s]

Loss = 6.014432907104492 


 14%|█▍        | 1391/10000 [05:07<31:28,  4.56it/s]

Loss = 6.00700044631958 


 14%|█▍        | 1401/10000 [05:09<31:26,  4.56it/s]

Loss = 5.997910022735596 


 14%|█▍        | 1411/10000 [05:12<31:21,  4.56it/s]

Loss = 5.98999547958374 


 14%|█▍        | 1421/10000 [05:14<31:24,  4.55it/s]

Loss = 5.983123779296875 


 14%|█▍        | 1431/10000 [05:16<31:28,  4.54it/s]

Loss = 5.973885536193848 


 14%|█▍        | 1441/10000 [05:18<31:11,  4.57it/s]

Loss = 5.967387676239014 


 15%|█▍        | 1451/10000 [05:20<31:14,  4.56it/s]

Loss = 5.960798263549805 


 15%|█▍        | 1461/10000 [05:23<31:20,  4.54it/s]

Loss = 5.9537200927734375 


 15%|█▍        | 1471/10000 [05:25<31:07,  4.57it/s]

Loss = 5.9452667236328125 


 15%|█▍        | 1481/10000 [05:27<30:51,  4.60it/s]

Loss = 5.939493179321289 


 15%|█▍        | 1491/10000 [05:29<31:00,  4.57it/s]

Loss = 5.930835723876953 


 15%|█▌        | 1501/10000 [05:31<31:05,  4.56it/s]

Loss = 5.925205230712891 


 15%|█▌        | 1511/10000 [05:34<31:09,  4.54it/s]

Loss = 5.918008804321289 


 15%|█▌        | 1521/10000 [05:36<31:02,  4.55it/s]

Loss = 5.911158084869385 


 15%|█▌        | 1531/10000 [05:38<31:05,  4.54it/s]

Loss = 5.905515670776367 


 15%|█▌        | 1541/10000 [05:40<30:53,  4.56it/s]

Loss = 5.89772891998291 


 16%|█▌        | 1551/10000 [05:42<30:54,  4.56it/s]

Loss = 5.889886379241943 


 16%|█▌        | 1561/10000 [05:44<30:59,  4.54it/s]

Loss = 5.884154319763184 


 16%|█▌        | 1571/10000 [05:47<30:50,  4.56it/s]

Loss = 5.879263877868652 


 16%|█▌        | 1581/10000 [05:49<30:37,  4.58it/s]

Loss = 5.873293876647949 


 16%|█▌        | 1591/10000 [05:51<30:42,  4.56it/s]

Loss = 5.866476058959961 


 16%|█▌        | 1601/10000 [05:53<30:40,  4.56it/s]

Loss = 5.860424041748047 


 16%|█▌        | 1611/10000 [05:55<30:31,  4.58it/s]

Loss = 5.854400157928467 


 16%|█▌        | 1621/10000 [05:58<30:40,  4.55it/s]

Loss = 5.848694324493408 


 16%|█▋        | 1631/10000 [06:00<30:28,  4.58it/s]

Loss = 5.841918468475342 


 16%|█▋        | 1641/10000 [06:02<30:30,  4.57it/s]

Loss = 5.835277080535889 


 17%|█▋        | 1651/10000 [06:04<30:15,  4.60it/s]

Loss = 5.829071998596191 


 17%|█▋        | 1661/10000 [06:06<30:29,  4.56it/s]

Loss = 5.8238043785095215 


 17%|█▋        | 1671/10000 [06:09<30:25,  4.56it/s]

Loss = 5.819206714630127 


 17%|█▋        | 1681/10000 [06:11<30:16,  4.58it/s]

Loss = 5.813426494598389 


 17%|█▋        | 1691/10000 [06:13<30:19,  4.57it/s]

Loss = 5.807836532592773 


 17%|█▋        | 1701/10000 [06:15<30:05,  4.60it/s]

Loss = 5.802810192108154 


 17%|█▋        | 1711/10000 [06:17<30:17,  4.56it/s]

Loss = 5.79758882522583 


 17%|█▋        | 1721/10000 [06:19<30:28,  4.53it/s]

Loss = 5.791553497314453 


 17%|█▋        | 1731/10000 [06:22<30:15,  4.55it/s]

Loss = 5.7865824699401855 


 17%|█▋        | 1741/10000 [06:24<30:00,  4.59it/s]

Loss = 5.780537128448486 


 18%|█▊        | 1751/10000 [06:26<29:56,  4.59it/s]

Loss = 5.776525020599365 


 18%|█▊        | 1761/10000 [06:28<29:48,  4.61it/s]

Loss = 5.772078990936279 


 18%|█▊        | 1771/10000 [06:30<29:54,  4.58it/s]

Loss = 5.768070697784424 


 18%|█▊        | 1781/10000 [06:33<30:03,  4.56it/s]

Loss = 5.761931896209717 


 18%|█▊        | 1791/10000 [06:35<30:02,  4.55it/s]

Loss = 5.7566046714782715 


 18%|█▊        | 1801/10000 [06:37<29:40,  4.60it/s]

Loss = 5.750307559967041 


 18%|█▊        | 1811/10000 [06:39<29:42,  4.59it/s]

Loss = 5.745161533355713 


 18%|█▊        | 1821/10000 [06:41<29:47,  4.57it/s]

Loss = 5.739904403686523 


 18%|█▊        | 1831/10000 [06:43<29:46,  4.57it/s]

Loss = 5.73382043838501 


 18%|█▊        | 1841/10000 [06:46<29:52,  4.55it/s]

Loss = 5.726577281951904 


 19%|█▊        | 1851/10000 [06:48<29:41,  4.58it/s]

Loss = 5.721297264099121 


 19%|█▊        | 1861/10000 [06:50<29:34,  4.59it/s]

Loss = 5.717531204223633 


 19%|█▊        | 1871/10000 [06:52<29:34,  4.58it/s]

Loss = 5.712557792663574 


 19%|█▉        | 1881/10000 [06:54<29:46,  4.54it/s]

Loss = 5.70759916305542 


 19%|█▉        | 1891/10000 [06:57<29:40,  4.55it/s]

Loss = 5.703399181365967 


 19%|█▉        | 1901/10000 [06:59<29:24,  4.59it/s]

Loss = 5.698348522186279 


 19%|█▉        | 1911/10000 [07:01<29:29,  4.57it/s]

Loss = 5.694998741149902 


 19%|█▉        | 1921/10000 [07:03<29:20,  4.59it/s]

Loss = 5.689696311950684 


 19%|█▉        | 1931/10000 [07:05<29:34,  4.55it/s]

Loss = 5.685455322265625 


 19%|█▉        | 1941/10000 [07:08<29:32,  4.55it/s]

Loss = 5.680147171020508 


 20%|█▉        | 1951/10000 [07:10<29:17,  4.58it/s]

Loss = 5.675939559936523 


 20%|█▉        | 1961/10000 [07:12<29:22,  4.56it/s]

Loss = 5.6705732345581055 


 20%|█▉        | 1971/10000 [07:14<29:18,  4.57it/s]

Loss = 5.666901111602783 


 20%|█▉        | 1981/10000 [07:16<29:12,  4.58it/s]

Loss = 5.662960052490234 


 20%|█▉        | 1991/10000 [07:18<29:09,  4.58it/s]

Loss = 5.659557342529297 


 20%|██        | 2001/10000 [07:21<29:11,  4.57it/s]

Loss = 5.654543876647949 


 20%|██        | 2011/10000 [07:23<29:08,  4.57it/s]

Loss = 5.64970588684082 


 20%|██        | 2021/10000 [07:25<29:05,  4.57it/s]

Loss = 5.64528226852417 


 20%|██        | 2031/10000 [07:27<28:51,  4.60it/s]

Loss = 5.6392035484313965 


 20%|██        | 2041/10000 [07:29<29:05,  4.56it/s]

Loss = 5.634724140167236 


 21%|██        | 2051/10000 [07:32<28:58,  4.57it/s]

Loss = 5.630283355712891 


 21%|██        | 2061/10000 [07:34<28:58,  4.57it/s]

Loss = 5.62599515914917 


 21%|██        | 2071/10000 [07:36<28:53,  4.57it/s]

Loss = 5.621557235717773 


 21%|██        | 2081/10000 [07:38<29:04,  4.54it/s]

Loss = 5.618046760559082 


 21%|██        | 2091/10000 [07:40<28:49,  4.57it/s]

Loss = 5.613148212432861 


 21%|██        | 2101/10000 [07:43<28:56,  4.55it/s]

Loss = 5.610307216644287 


 21%|██        | 2111/10000 [07:45<28:36,  4.60it/s]

Loss = 5.60606050491333 


 21%|██        | 2121/10000 [07:47<28:48,  4.56it/s]

Loss = 5.601419925689697 


 21%|██▏       | 2131/10000 [07:49<28:44,  4.56it/s]

Loss = 5.596693515777588 


 21%|██▏       | 2141/10000 [07:51<28:59,  4.52it/s]

Loss = 5.592906951904297 


 22%|██▏       | 2151/10000 [07:53<28:43,  4.56it/s]

Loss = 5.589014053344727 


 22%|██▏       | 2161/10000 [07:56<28:55,  4.52it/s]

Loss = 5.585808753967285 


 22%|██▏       | 2171/10000 [07:58<28:46,  4.53it/s]

Loss = 5.582623481750488 


 22%|██▏       | 2181/10000 [08:00<28:32,  4.57it/s]

Loss = 5.578815937042236 


 22%|██▏       | 2191/10000 [08:02<28:42,  4.53it/s]

Loss = 5.5752482414245605 


 22%|██▏       | 2201/10000 [08:04<28:27,  4.57it/s]

Loss = 5.571435451507568 


 22%|██▏       | 2211/10000 [08:07<28:34,  4.54it/s]

Loss = 5.567859649658203 


 22%|██▏       | 2221/10000 [08:09<28:29,  4.55it/s]

Loss = 5.564184665679932 


 22%|██▏       | 2231/10000 [08:11<28:20,  4.57it/s]

Loss = 5.560856819152832 


 22%|██▏       | 2241/10000 [08:13<28:34,  4.53it/s]

Loss = 5.556576728820801 


 23%|██▎       | 2251/10000 [08:15<28:33,  4.52it/s]

Loss = 5.554994583129883 


 23%|██▎       | 2261/10000 [08:18<28:25,  4.54it/s]

Loss = 5.552646160125732 


 23%|██▎       | 2271/10000 [08:20<28:08,  4.58it/s]

Loss = 5.54795503616333 


 23%|██▎       | 2281/10000 [08:22<28:20,  4.54it/s]

Loss = 5.543639183044434 


 23%|██▎       | 2291/10000 [08:24<28:23,  4.53it/s]

Loss = 5.539955139160156 


 23%|██▎       | 2301/10000 [08:26<28:31,  4.50it/s]

Loss = 5.537336349487305 


 23%|██▎       | 2311/10000 [08:29<28:11,  4.54it/s]

Loss = 5.534654140472412 


 23%|██▎       | 2321/10000 [08:31<28:13,  4.53it/s]

Loss = 5.531638145446777 


 23%|██▎       | 2331/10000 [08:33<28:02,  4.56it/s]

Loss = 5.528280258178711 


 23%|██▎       | 2341/10000 [08:35<27:58,  4.56it/s]

Loss = 5.525909900665283 


 24%|██▎       | 2351/10000 [08:37<27:56,  4.56it/s]

Loss = 5.522731781005859 


 24%|██▎       | 2361/10000 [08:40<28:05,  4.53it/s]

Loss = 5.5192437171936035 


 24%|██▎       | 2371/10000 [08:42<28:08,  4.52it/s]

Loss = 5.514747142791748 


 24%|██▍       | 2381/10000 [08:44<27:47,  4.57it/s]

Loss = 5.510494232177734 


 24%|██▍       | 2391/10000 [08:46<28:01,  4.53it/s]

Loss = 5.5072102546691895 


 24%|██▍       | 2401/10000 [08:48<27:52,  4.54it/s]

Loss = 5.503507137298584 


 24%|██▍       | 2411/10000 [08:51<27:45,  4.56it/s]

Loss = 5.4998321533203125 


 24%|██▍       | 2421/10000 [08:53<27:35,  4.58it/s]

Loss = 5.496710300445557 


 24%|██▍       | 2431/10000 [08:55<27:40,  4.56it/s]

Loss = 5.494089603424072 


 24%|██▍       | 2441/10000 [08:57<27:47,  4.53it/s]

Loss = 5.490393161773682 


 25%|██▍       | 2451/10000 [08:59<27:35,  4.56it/s]

Loss = 5.4868364334106445 


 25%|██▍       | 2461/10000 [09:02<27:45,  4.53it/s]

Loss = 5.4834723472595215 


 25%|██▍       | 2471/10000 [09:04<27:29,  4.56it/s]

Loss = 5.481298923492432 


 25%|██▍       | 2481/10000 [09:06<27:25,  4.57it/s]

Loss = 5.477900505065918 


 25%|██▍       | 2491/10000 [09:08<27:31,  4.55it/s]

Loss = 5.474756240844727 


 25%|██▌       | 2501/10000 [09:10<27:29,  4.55it/s]

Loss = 5.471161365509033 


 25%|██▌       | 2511/10000 [09:13<27:28,  4.54it/s]

Loss = 5.4672136306762695 


 25%|██▌       | 2521/10000 [09:15<27:21,  4.56it/s]

Loss = 5.464524745941162 


 25%|██▌       | 2531/10000 [09:17<27:16,  4.56it/s]

Loss = 5.462796211242676 


 25%|██▌       | 2541/10000 [09:19<27:29,  4.52it/s]

Loss = 5.459138870239258 


 26%|██▌       | 2551/10000 [09:21<27:13,  4.56it/s]

Loss = 5.4558539390563965 


 26%|██▌       | 2561/10000 [09:24<27:10,  4.56it/s]

Loss = 5.452724933624268 


 26%|██▌       | 2571/10000 [09:26<27:10,  4.56it/s]

Loss = 5.4485650062561035 


 26%|██▌       | 2581/10000 [09:28<27:09,  4.55it/s]

Loss = 5.445416450500488 


 26%|██▌       | 2591/10000 [09:30<27:11,  4.54it/s]

Loss = 5.443419456481934 


 26%|██▌       | 2601/10000 [09:32<27:05,  4.55it/s]

Loss = 5.440961837768555 


 26%|██▌       | 2611/10000 [09:35<27:08,  4.54it/s]

Loss = 5.439181327819824 


 26%|██▌       | 2621/10000 [09:37<26:58,  4.56it/s]

Loss = 5.437203884124756 


 26%|██▋       | 2631/10000 [09:39<27:08,  4.52it/s]

Loss = 5.433376789093018 


 26%|██▋       | 2641/10000 [09:41<26:49,  4.57it/s]

Loss = 5.429962635040283 


 27%|██▋       | 2651/10000 [09:43<26:56,  4.55it/s]

Loss = 5.4272308349609375 


 27%|██▋       | 2661/10000 [09:45<26:50,  4.56it/s]

Loss = 5.42440128326416 


 27%|██▋       | 2671/10000 [09:48<26:47,  4.56it/s]

Loss = 5.421075820922852 


 27%|██▋       | 2681/10000 [09:50<26:49,  4.55it/s]

Loss = 5.41987943649292 


 27%|██▋       | 2691/10000 [09:52<26:40,  4.57it/s]

Loss = 5.416906356811523 


 27%|██▋       | 2701/10000 [09:54<26:47,  4.54it/s]

Loss = 5.414026260375977 


 27%|██▋       | 2711/10000 [09:56<26:33,  4.58it/s]

Loss = 5.410425186157227 


 27%|██▋       | 2721/10000 [09:59<26:42,  4.54it/s]

Loss = 5.408514499664307 


 27%|██▋       | 2731/10000 [10:01<26:45,  4.53it/s]

Loss = 5.405612945556641 


 27%|██▋       | 2741/10000 [10:03<26:44,  4.53it/s]

Loss = 5.402566909790039 


 28%|██▊       | 2751/10000 [10:05<26:21,  4.58it/s]

Loss = 5.401711940765381 


 28%|██▊       | 2761/10000 [10:07<26:33,  4.54it/s]

Loss = 5.39923095703125 


 28%|██▊       | 2771/10000 [10:10<26:20,  4.57it/s]

Loss = 5.395716190338135 


 28%|██▊       | 2781/10000 [10:12<26:30,  4.54it/s]

Loss = 5.3927788734436035 


 28%|██▊       | 2791/10000 [10:14<26:33,  4.53it/s]

Loss = 5.390652179718018 


 28%|██▊       | 2801/10000 [10:16<26:13,  4.57it/s]

Loss = 5.387973308563232 


 28%|██▊       | 2811/10000 [10:18<26:09,  4.58it/s]

Loss = 5.3856024742126465 


 28%|██▊       | 2821/10000 [10:21<26:18,  4.55it/s]

Loss = 5.382744789123535 


 28%|██▊       | 2831/10000 [10:23<26:20,  4.54it/s]

Loss = 5.3803911209106445 


 28%|██▊       | 2841/10000 [10:25<26:13,  4.55it/s]

Loss = 5.377869129180908 


 29%|██▊       | 2851/10000 [10:27<26:11,  4.55it/s]

Loss = 5.374785423278809 


 29%|██▊       | 2861/10000 [10:29<26:22,  4.51it/s]

Loss = 5.372771263122559 


 29%|██▊       | 2871/10000 [10:32<26:12,  4.53it/s]

Loss = 5.370580673217773 


 29%|██▉       | 2881/10000 [10:34<25:50,  4.59it/s]

Loss = 5.367427349090576 


 29%|██▉       | 2891/10000 [10:36<25:51,  4.58it/s]

Loss = 5.3645806312561035 


 29%|██▉       | 2901/10000 [10:38<25:52,  4.57it/s]

Loss = 5.3621931076049805 


 29%|██▉       | 2911/10000 [10:40<26:08,  4.52it/s]

Loss = 5.3586626052856445 


 29%|██▉       | 2921/10000 [10:43<25:59,  4.54it/s]

Loss = 5.355928897857666 


 29%|██▉       | 2931/10000 [10:45<26:03,  4.52it/s]

Loss = 5.353679656982422 


 29%|██▉       | 2941/10000 [10:47<25:41,  4.58it/s]

Loss = 5.350194931030273 


 30%|██▉       | 2951/10000 [10:49<25:39,  4.58it/s]

Loss = 5.34680700302124 


 30%|██▉       | 2961/10000 [10:51<25:45,  4.55it/s]

Loss = 5.343864440917969 


 30%|██▉       | 2971/10000 [10:54<25:49,  4.54it/s]

Loss = 5.340777397155762 


 30%|██▉       | 2981/10000 [10:56<25:56,  4.51it/s]

Loss = 5.338773250579834 


 30%|██▉       | 2991/10000 [10:58<25:34,  4.57it/s]

Loss = 5.3359575271606445 


 30%|███       | 3001/10000 [11:00<25:32,  4.57it/s]

Loss = 5.334033012390137 


 30%|███       | 3011/10000 [11:02<25:30,  4.57it/s]

Loss = 5.3317742347717285 


 30%|███       | 3021/10000 [11:04<25:23,  4.58it/s]

Loss = 5.3296051025390625 


 30%|███       | 3031/10000 [11:07<25:20,  4.58it/s]

Loss = 5.328638076782227 


 30%|███       | 3041/10000 [11:09<25:25,  4.56it/s]

Loss = 5.326551914215088 


 31%|███       | 3051/10000 [11:11<25:23,  4.56it/s]

Loss = 5.3243513107299805 


 31%|███       | 3061/10000 [11:13<25:03,  4.62it/s]

Loss = 5.321778774261475 


 31%|███       | 3071/10000 [11:15<25:16,  4.57it/s]

Loss = 5.318640232086182 


 31%|███       | 3081/10000 [11:18<25:14,  4.57it/s]

Loss = 5.316112518310547 


 31%|███       | 3091/10000 [11:20<25:15,  4.56it/s]

Loss = 5.313693046569824 


 31%|███       | 3101/10000 [11:22<25:03,  4.59it/s]

Loss = 5.3120646476745605 


 31%|███       | 3111/10000 [11:24<25:08,  4.57it/s]

Loss = 5.310388088226318 


 31%|███       | 3121/10000 [11:26<25:10,  4.55it/s]

Loss = 5.308485984802246 


 31%|███▏      | 3131/10000 [11:29<25:04,  4.57it/s]

Loss = 5.3066086769104 


 31%|███▏      | 3141/10000 [11:31<24:55,  4.59it/s]

Loss = 5.30439567565918 


 32%|███▏      | 3151/10000 [11:33<25:05,  4.55it/s]

Loss = 5.3023834228515625 


 32%|███▏      | 3161/10000 [11:35<25:07,  4.54it/s]

Loss = 5.3000569343566895 


 32%|███▏      | 3171/10000 [11:37<25:09,  4.53it/s]

Loss = 5.298115253448486 


 32%|███▏      | 3181/10000 [11:39<24:58,  4.55it/s]

Loss = 5.296143531799316 


 32%|███▏      | 3191/10000 [11:42<24:53,  4.56it/s]

Loss = 5.294142246246338 


 32%|███▏      | 3201/10000 [11:44<24:43,  4.58it/s]

Loss = 5.2925543785095215 


 32%|███▏      | 3211/10000 [11:46<24:41,  4.58it/s]

Loss = 5.291048526763916 


 32%|███▏      | 3221/10000 [11:48<24:45,  4.56it/s]

Loss = 5.28902530670166 


 32%|███▏      | 3231/10000 [11:50<24:48,  4.55it/s]

Loss = 5.286411285400391 


 32%|███▏      | 3241/10000 [11:53<24:45,  4.55it/s]

Loss = 5.284515857696533 


 33%|███▎      | 3251/10000 [11:55<24:41,  4.56it/s]

Loss = 5.282541275024414 


 33%|███▎      | 3261/10000 [11:57<24:47,  4.53it/s]

Loss = 5.280516147613525 


 33%|███▎      | 3271/10000 [11:59<24:21,  4.61it/s]

Loss = 5.278392791748047 


 33%|███▎      | 3281/10000 [12:01<24:21,  4.60it/s]

Loss = 5.276714324951172 


 33%|███▎      | 3291/10000 [12:04<24:29,  4.57it/s]

Loss = 5.275309085845947 


 33%|███▎      | 3301/10000 [12:06<24:18,  4.59it/s]

Loss = 5.272854804992676 


 33%|███▎      | 3311/10000 [12:08<24:22,  4.57it/s]

Loss = 5.270794868469238 


 33%|███▎      | 3321/10000 [12:10<24:15,  4.59it/s]

Loss = 5.268465995788574 


 33%|███▎      | 3331/10000 [12:12<24:19,  4.57it/s]

Loss = 5.26753044128418 


 33%|███▎      | 3341/10000 [12:14<24:30,  4.53it/s]

Loss = 5.26591682434082 


 34%|███▎      | 3351/10000 [12:17<24:47,  4.47it/s]

Loss = 5.263192176818848 


 34%|███▎      | 3361/10000 [12:19<24:20,  4.55it/s]

Loss = 5.2611236572265625 


 34%|███▎      | 3371/10000 [12:21<24:02,  4.59it/s]

Loss = 5.259396553039551 


 34%|███▍      | 3381/10000 [12:23<24:09,  4.57it/s]

Loss = 5.257719993591309 


 34%|███▍      | 3391/10000 [12:25<23:59,  4.59it/s]

Loss = 5.255524635314941 


 34%|███▍      | 3401/10000 [12:28<24:06,  4.56it/s]

Loss = 5.252787113189697 


 34%|███▍      | 3411/10000 [12:30<24:10,  4.54it/s]

Loss = 5.251556396484375 


 34%|███▍      | 3421/10000 [12:32<24:07,  4.55it/s]

Loss = 5.249644756317139 


 34%|███▍      | 3431/10000 [12:34<23:53,  4.58it/s]

Loss = 5.247086524963379 


 34%|███▍      | 3441/10000 [12:36<24:04,  4.54it/s]

Loss = 5.244742393493652 


 35%|███▍      | 3451/10000 [12:39<23:54,  4.57it/s]

Loss = 5.2425055503845215 


 35%|███▍      | 3461/10000 [12:41<23:49,  4.57it/s]

Loss = 5.2402567863464355 


 35%|███▍      | 3471/10000 [12:43<23:49,  4.57it/s]

Loss = 5.238053321838379 


 35%|███▍      | 3481/10000 [12:45<23:39,  4.59it/s]

Loss = 5.236938953399658 


 35%|███▍      | 3491/10000 [12:47<23:43,  4.57it/s]

Loss = 5.234972953796387 


 35%|███▌      | 3501/10000 [12:50<23:42,  4.57it/s]

Loss = 5.233062744140625 


 35%|███▌      | 3511/10000 [12:52<23:55,  4.52it/s]

Loss = 5.230371952056885 


 35%|███▌      | 3521/10000 [12:54<23:41,  4.56it/s]

Loss = 5.229005336761475 


 35%|███▌      | 3531/10000 [12:56<23:42,  4.55it/s]

Loss = 5.227262020111084 


 35%|███▌      | 3541/10000 [12:58<23:37,  4.56it/s]

Loss = 5.225603103637695 


 36%|███▌      | 3551/10000 [13:01<23:38,  4.55it/s]

Loss = 5.223877906799316 


 36%|███▌      | 3561/10000 [13:03<23:30,  4.57it/s]

Loss = 5.221440315246582 


 36%|███▌      | 3571/10000 [13:05<23:17,  4.60it/s]

Loss = 5.2191667556762695 


 36%|███▌      | 3581/10000 [13:07<23:37,  4.53it/s]

Loss = 5.218267440795898 


 36%|███▌      | 3591/10000 [13:09<23:21,  4.57it/s]

Loss = 5.2163004875183105 


 36%|███▌      | 3601/10000 [13:11<23:18,  4.58it/s]

Loss = 5.213987350463867 


 36%|███▌      | 3611/10000 [13:14<23:23,  4.55it/s]

Loss = 5.212166786193848 


 36%|███▌      | 3621/10000 [13:16<23:22,  4.55it/s]

Loss = 5.210383415222168 


 36%|███▋      | 3631/10000 [13:18<23:28,  4.52it/s]

Loss = 5.20867395401001 


 36%|███▋      | 3641/10000 [13:20<23:30,  4.51it/s]

Loss = 5.207402229309082 


 37%|███▋      | 3651/10000 [13:22<23:08,  4.57it/s]

Loss = 5.205286026000977 


 37%|███▋      | 3661/10000 [13:25<23:14,  4.54it/s]

Loss = 5.2038068771362305 


 37%|███▋      | 3671/10000 [13:27<23:17,  4.53it/s]

Loss = 5.202411651611328 


 37%|███▋      | 3681/10000 [13:29<23:15,  4.53it/s]

Loss = 5.201204299926758 


 37%|███▋      | 3691/10000 [13:31<23:10,  4.54it/s]

Loss = 5.200689315795898 


 37%|███▋      | 3701/10000 [13:33<23:03,  4.55it/s]

Loss = 5.198604106903076 


 37%|███▋      | 3711/10000 [13:36<22:59,  4.56it/s]

Loss = 5.197197914123535 


 37%|███▋      | 3721/10000 [13:38<23:11,  4.51it/s]

Loss = 5.195952415466309 


 37%|███▋      | 3731/10000 [13:40<22:56,  4.55it/s]

Loss = 5.19484281539917 


 37%|███▋      | 3741/10000 [13:42<22:43,  4.59it/s]

Loss = 5.193106174468994 


 38%|███▊      | 3751/10000 [13:44<22:48,  4.57it/s]

Loss = 5.19126558303833 


 38%|███▊      | 3761/10000 [13:47<22:52,  4.55it/s]

Loss = 5.190311908721924 


 38%|███▊      | 3771/10000 [13:49<22:53,  4.54it/s]

Loss = 5.18871545791626 


 38%|███▊      | 3781/10000 [13:51<22:48,  4.54it/s]

Loss = 5.187763214111328 


 38%|███▊      | 3791/10000 [13:53<22:39,  4.57it/s]

Loss = 5.186006546020508 


 38%|███▊      | 3801/10000 [13:55<22:46,  4.54it/s]

Loss = 5.184170246124268 


 38%|███▊      | 3811/10000 [13:58<22:41,  4.54it/s]

Loss = 5.182880878448486 


 38%|███▊      | 3821/10000 [14:00<22:31,  4.57it/s]

Loss = 5.181607723236084 


 38%|███▊      | 3831/10000 [14:02<22:45,  4.52it/s]

Loss = 5.180338382720947 


 38%|███▊      | 3841/10000 [14:04<22:30,  4.56it/s]

Loss = 5.178346633911133 


 39%|███▊      | 3851/10000 [14:06<22:20,  4.59it/s]

Loss = 5.176916122436523 


 39%|███▊      | 3861/10000 [14:09<22:43,  4.50it/s]

Loss = 5.175903797149658 


 39%|███▊      | 3871/10000 [14:11<22:31,  4.54it/s]

Loss = 5.174464702606201 


 39%|███▉      | 3881/10000 [14:13<22:31,  4.53it/s]

Loss = 5.172300815582275 


 39%|███▉      | 3891/10000 [14:15<22:24,  4.54it/s]

Loss = 5.170863151550293 


 39%|███▉      | 3901/10000 [14:17<22:14,  4.57it/s]

Loss = 5.169257640838623 


 39%|███▉      | 3911/10000 [14:20<22:15,  4.56it/s]

Loss = 5.1682024002075195 


 39%|███▉      | 3921/10000 [14:22<22:26,  4.52it/s]

Loss = 5.1665143966674805 


 39%|███▉      | 3931/10000 [14:24<22:09,  4.56it/s]

Loss = 5.165305137634277 


 39%|███▉      | 3941/10000 [14:26<22:14,  4.54it/s]

Loss = 5.164187908172607 


 40%|███▉      | 3951/10000 [14:28<22:11,  4.54it/s]

Loss = 5.163222312927246 


 40%|███▉      | 3961/10000 [14:31<21:59,  4.58it/s]

Loss = 5.162126064300537 


 40%|███▉      | 3971/10000 [14:33<22:08,  4.54it/s]

Loss = 5.160621643066406 


 40%|███▉      | 3981/10000 [14:35<21:55,  4.58it/s]

Loss = 5.159519672393799 


 40%|███▉      | 3991/10000 [14:37<21:59,  4.56it/s]

Loss = 5.157245635986328 


 40%|████      | 4001/10000 [14:39<22:08,  4.52it/s]

Loss = 5.155534267425537 


 40%|████      | 4011/10000 [14:42<21:51,  4.57it/s]

Loss = 5.153913497924805 


 40%|████      | 4021/10000 [14:44<22:02,  4.52it/s]

Loss = 5.15176248550415 


 40%|████      | 4031/10000 [14:46<21:58,  4.53it/s]

Loss = 5.150091171264648 


 40%|████      | 4041/10000 [14:48<21:53,  4.54it/s]

Loss = 5.148581027984619 


 41%|████      | 4051/10000 [14:50<21:52,  4.53it/s]

Loss = 5.14745569229126 


 41%|████      | 4061/10000 [14:53<21:47,  4.54it/s]

Loss = 5.145635604858398 


 41%|████      | 4071/10000 [14:55<21:43,  4.55it/s]

Loss = 5.1434502601623535 


 41%|████      | 4081/10000 [14:57<22:00,  4.48it/s]

Loss = 5.140827655792236 


 41%|████      | 4091/10000 [14:59<21:45,  4.52it/s]

Loss = 5.140312194824219 


 41%|████      | 4101/10000 [15:01<21:35,  4.55it/s]

Loss = 5.139273643493652 


 41%|████      | 4111/10000 [15:04<21:43,  4.52it/s]

Loss = 5.137964725494385 


 41%|████      | 4121/10000 [15:06<21:29,  4.56it/s]

Loss = 5.137121200561523 


 41%|████▏     | 4131/10000 [15:08<21:17,  4.59it/s]

Loss = 5.135422706604004 


 41%|████▏     | 4141/10000 [15:10<21:19,  4.58it/s]

Loss = 5.133537292480469 


 42%|████▏     | 4151/10000 [15:12<21:25,  4.55it/s]

Loss = 5.132266044616699 


 42%|████▏     | 4161/10000 [15:15<21:21,  4.56it/s]

Loss = 5.130296230316162 


 42%|████▏     | 4171/10000 [15:17<21:16,  4.57it/s]

Loss = 5.128427505493164 


 42%|████▏     | 4181/10000 [15:19<21:13,  4.57it/s]

Loss = 5.126267910003662 


 42%|████▏     | 4191/10000 [15:21<21:18,  4.54it/s]

Loss = 5.124467849731445 


 42%|████▏     | 4201/10000 [15:23<21:22,  4.52it/s]

Loss = 5.123074054718018 


 42%|████▏     | 4211/10000 [15:25<21:06,  4.57it/s]

Loss = 5.121518611907959 


 42%|████▏     | 4221/10000 [15:28<21:03,  4.57it/s]

Loss = 5.120241165161133 


 42%|████▏     | 4231/10000 [15:30<21:06,  4.56it/s]

Loss = 5.118853569030762 


 42%|████▏     | 4241/10000 [15:32<20:55,  4.59it/s]

Loss = 5.117820739746094 


 43%|████▎     | 4251/10000 [15:34<21:10,  4.52it/s]

Loss = 5.115879058837891 


 43%|████▎     | 4261/10000 [15:36<20:56,  4.57it/s]

Loss = 5.114416122436523 


 43%|████▎     | 4271/10000 [15:39<21:06,  4.52it/s]

Loss = 5.112615585327148 


 43%|████▎     | 4281/10000 [15:41<20:58,  4.54it/s]

Loss = 5.11112642288208 


 43%|████▎     | 4291/10000 [15:43<20:58,  4.54it/s]

Loss = 5.109564304351807 


 43%|████▎     | 4301/10000 [15:45<20:50,  4.56it/s]

Loss = 5.107741355895996 


 43%|████▎     | 4311/10000 [15:47<21:02,  4.51it/s]

Loss = 5.1053619384765625 


 43%|████▎     | 4321/10000 [15:50<20:38,  4.59it/s]

Loss = 5.103771209716797 


 43%|████▎     | 4331/10000 [15:52<20:53,  4.52it/s]

Loss = 5.1024861335754395 


 43%|████▎     | 4341/10000 [15:54<20:43,  4.55it/s]

Loss = 5.101335525512695 


 44%|████▎     | 4351/10000 [15:56<20:37,  4.56it/s]

Loss = 5.099644184112549 


 44%|████▎     | 4361/10000 [15:58<20:31,  4.58it/s]

Loss = 5.09827184677124 


 44%|████▎     | 4371/10000 [16:01<20:53,  4.49it/s]

Loss = 5.096774101257324 


 44%|████▍     | 4381/10000 [16:03<20:34,  4.55it/s]

Loss = 5.095620632171631 


 44%|████▍     | 4391/10000 [16:05<20:40,  4.52it/s]

Loss = 5.094653606414795 


 44%|████▍     | 4401/10000 [16:07<20:23,  4.58it/s]

Loss = 5.092733860015869 


 44%|████▍     | 4411/10000 [16:09<20:44,  4.49it/s]

Loss = 5.091233730316162 


 44%|████▍     | 4421/10000 [16:12<20:30,  4.53it/s]

Loss = 5.0902628898620605 


 44%|████▍     | 4431/10000 [16:14<20:26,  4.54it/s]

Loss = 5.08925724029541 


 44%|████▍     | 4441/10000 [16:16<20:19,  4.56it/s]

Loss = 5.088516712188721 


 45%|████▍     | 4451/10000 [16:18<20:17,  4.56it/s]

Loss = 5.087253570556641 


 45%|████▍     | 4461/10000 [16:20<20:17,  4.55it/s]

Loss = 5.086306571960449 


 45%|████▍     | 4471/10000 [16:23<20:13,  4.56it/s]

Loss = 5.084722518920898 


 45%|████▍     | 4481/10000 [16:25<20:11,  4.55it/s]

Loss = 5.0840744972229 


 45%|████▍     | 4491/10000 [16:27<20:13,  4.54it/s]

Loss = 5.082483768463135 


 45%|████▌     | 4501/10000 [16:29<20:11,  4.54it/s]

Loss = 5.080832004547119 


 45%|████▌     | 4511/10000 [16:31<20:09,  4.54it/s]

Loss = 5.079497814178467 


 45%|████▌     | 4521/10000 [16:34<20:08,  4.53it/s]

Loss = 5.078064441680908 


 45%|████▌     | 4531/10000 [16:36<19:59,  4.56it/s]

Loss = 5.076587677001953 


 45%|████▌     | 4541/10000 [16:38<20:00,  4.55it/s]

Loss = 5.075723171234131 


 46%|████▌     | 4551/10000 [16:40<19:57,  4.55it/s]

Loss = 5.074442386627197 


 46%|████▌     | 4561/10000 [16:42<20:05,  4.51it/s]

Loss = 5.072693347930908 


 46%|████▌     | 4571/10000 [16:45<20:01,  4.52it/s]

Loss = 5.0719122886657715 


 46%|████▌     | 4581/10000 [16:47<19:40,  4.59it/s]

Loss = 5.070898532867432 


 46%|████▌     | 4591/10000 [16:49<19:45,  4.56it/s]

Loss = 5.069762229919434 


 46%|████▌     | 4601/10000 [16:51<19:31,  4.61it/s]

Loss = 5.067990779876709 


 46%|████▌     | 4611/10000 [16:53<19:39,  4.57it/s]

Loss = 5.0668721199035645 


 46%|████▌     | 4621/10000 [16:56<19:35,  4.58it/s]

Loss = 5.0657758712768555 


 46%|████▋     | 4631/10000 [16:58<19:30,  4.59it/s]

Loss = 5.064631462097168 


 46%|████▋     | 4641/10000 [17:00<19:39,  4.54it/s]

Loss = 5.063816547393799 


 47%|████▋     | 4651/10000 [17:02<19:30,  4.57it/s]

Loss = 5.062513828277588 


 47%|████▋     | 4661/10000 [17:04<19:25,  4.58it/s]

Loss = 5.060494422912598 


 47%|████▋     | 4671/10000 [17:07<19:28,  4.56it/s]

Loss = 5.058751106262207 


 47%|████▋     | 4681/10000 [17:09<19:32,  4.54it/s]

Loss = 5.058828830718994 


 47%|████▋     | 4691/10000 [17:11<19:29,  4.54it/s]

Loss = 5.057459831237793 


 47%|████▋     | 4701/10000 [17:13<19:15,  4.58it/s]

Loss = 5.055976867675781 


 47%|████▋     | 4711/10000 [17:15<19:24,  4.54it/s]

Loss = 5.055141448974609 


 47%|████▋     | 4721/10000 [17:17<19:09,  4.59it/s]

Loss = 5.053828239440918 


 47%|████▋     | 4731/10000 [17:20<19:14,  4.56it/s]

Loss = 5.0528693199157715 


 47%|████▋     | 4741/10000 [17:22<19:12,  4.56it/s]

Loss = 5.05165433883667 


 48%|████▊     | 4751/10000 [17:24<19:17,  4.54it/s]

Loss = 5.050700664520264 


 48%|████▊     | 4761/10000 [17:26<19:03,  4.58it/s]

Loss = 5.0493083000183105 


 48%|████▊     | 4771/10000 [17:28<19:26,  4.48it/s]

Loss = 5.048076152801514 


 48%|████▊     | 4781/10000 [17:31<19:04,  4.56it/s]

Loss = 5.0466203689575195 


 48%|████▊     | 4791/10000 [17:33<19:06,  4.54it/s]

Loss = 5.045642375946045 


 48%|████▊     | 4801/10000 [17:35<18:49,  4.60it/s]

Loss = 5.0437846183776855 


 48%|████▊     | 4811/10000 [17:37<19:02,  4.54it/s]

Loss = 5.0431952476501465 


 48%|████▊     | 4821/10000 [17:39<18:59,  4.54it/s]

Loss = 5.041621208190918 


 48%|████▊     | 4831/10000 [17:42<18:58,  4.54it/s]

Loss = 5.03988790512085 


 48%|████▊     | 4841/10000 [17:44<18:56,  4.54it/s]

Loss = 5.0380754470825195 


 49%|████▊     | 4851/10000 [17:46<19:01,  4.51it/s]

Loss = 5.036991596221924 


 49%|████▊     | 4861/10000 [17:48<18:46,  4.56it/s]

Loss = 5.036533832550049 


 49%|████▊     | 4871/10000 [17:50<18:53,  4.53it/s]

Loss = 5.035274028778076 


 49%|████▉     | 4881/10000 [17:53<18:48,  4.54it/s]

Loss = 5.034075736999512 


 49%|████▉     | 4891/10000 [17:55<18:46,  4.54it/s]

Loss = 5.03342866897583 


 49%|████▉     | 4901/10000 [17:57<18:47,  4.52it/s]

Loss = 5.032391548156738 


 49%|████▉     | 4911/10000 [17:59<18:42,  4.53it/s]

Loss = 5.031179428100586 


 49%|████▉     | 4921/10000 [18:01<18:24,  4.60it/s]

Loss = 5.030834674835205 


 49%|████▉     | 4931/10000 [18:04<18:36,  4.54it/s]

Loss = 5.030355453491211 


 49%|████▉     | 4941/10000 [18:06<18:31,  4.55it/s]

Loss = 5.029193878173828 


 50%|████▉     | 4951/10000 [18:08<18:20,  4.59it/s]

Loss = 5.028075695037842 


 50%|████▉     | 4961/10000 [18:10<18:30,  4.54it/s]

Loss = 5.02648401260376 


 50%|████▉     | 4971/10000 [18:12<18:21,  4.57it/s]

Loss = 5.025996208190918 


 50%|████▉     | 4981/10000 [18:15<18:24,  4.54it/s]

Loss = 5.024697303771973 


 50%|████▉     | 4991/10000 [18:17<18:21,  4.55it/s]

Loss = 5.02454137802124 


 50%|█████     | 5001/10000 [18:19<18:11,  4.58it/s]

Loss = 5.023970603942871 


 50%|█████     | 5011/10000 [18:21<18:07,  4.59it/s]

Loss = 5.0229902267456055 


 50%|█████     | 5021/10000 [18:23<18:17,  4.54it/s]

Loss = 5.021795749664307 


 50%|█████     | 5031/10000 [18:26<18:09,  4.56it/s]

Loss = 5.0207414627075195 


 50%|█████     | 5041/10000 [18:28<18:12,  4.54it/s]

Loss = 5.019992351531982 


 51%|█████     | 5051/10000 [18:30<18:05,  4.56it/s]

Loss = 5.019262313842773 


 51%|█████     | 5061/10000 [18:32<17:58,  4.58it/s]

Loss = 5.0184221267700195 


 51%|█████     | 5071/10000 [18:34<17:56,  4.58it/s]

Loss = 5.0171308517456055 


 51%|█████     | 5081/10000 [18:36<17:55,  4.57it/s]

Loss = 5.015659809112549 


 51%|█████     | 5091/10000 [18:39<18:03,  4.53it/s]

Loss = 5.014814853668213 


 51%|█████     | 5101/10000 [18:41<17:56,  4.55it/s]

Loss = 5.013648509979248 


 51%|█████     | 5111/10000 [18:43<18:02,  4.52it/s]

Loss = 5.012292861938477 


 51%|█████     | 5121/10000 [18:45<17:48,  4.57it/s]

Loss = 5.011506080627441 


 51%|█████▏    | 5131/10000 [18:47<17:47,  4.56it/s]

Loss = 5.0104241371154785 


 51%|█████▏    | 5141/10000 [18:50<17:35,  4.61it/s]

Loss = 5.008877754211426 


 52%|█████▏    | 5151/10000 [18:52<17:42,  4.56it/s]

Loss = 5.008283615112305 


 52%|█████▏    | 5161/10000 [18:54<17:46,  4.54it/s]

Loss = 5.007526874542236 


 52%|█████▏    | 5171/10000 [18:56<17:37,  4.57it/s]

Loss = 5.006557941436768 


 52%|█████▏    | 5181/10000 [18:58<17:47,  4.51it/s]

Loss = 5.005647659301758 


 52%|█████▏    | 5191/10000 [19:01<17:32,  4.57it/s]

Loss = 5.004522323608398 


 52%|█████▏    | 5201/10000 [19:03<17:31,  4.56it/s]

Loss = 5.003332614898682 


 52%|█████▏    | 5211/10000 [19:05<17:21,  4.60it/s]

Loss = 5.002195835113525 


 52%|█████▏    | 5221/10000 [19:07<17:24,  4.57it/s]

Loss = 5.000864505767822 


 52%|█████▏    | 5231/10000 [19:09<17:22,  4.57it/s]

Loss = 5.000313758850098 


 52%|█████▏    | 5241/10000 [19:12<17:32,  4.52it/s]

Loss = 4.999443054199219 


 53%|█████▎    | 5251/10000 [19:14<17:33,  4.51it/s]

Loss = 4.998175621032715 


 53%|█████▎    | 5261/10000 [19:16<17:23,  4.54it/s]

Loss = 4.996619701385498 


 53%|█████▎    | 5271/10000 [19:18<17:14,  4.57it/s]

Loss = 4.995439529418945 


 53%|█████▎    | 5281/10000 [19:20<17:17,  4.55it/s]

Loss = 4.993768215179443 


 53%|█████▎    | 5291/10000 [19:23<17:17,  4.54it/s]

Loss = 4.992551326751709 


 53%|█████▎    | 5301/10000 [19:25<17:24,  4.50it/s]

Loss = 4.990977764129639 


 53%|█████▎    | 5311/10000 [19:27<17:14,  4.53it/s]

Loss = 4.990396499633789 


 53%|█████▎    | 5321/10000 [19:29<17:05,  4.56it/s]

Loss = 4.989332675933838 


 53%|█████▎    | 5331/10000 [19:31<17:05,  4.55it/s]

Loss = 4.988289833068848 


 53%|█████▎    | 5341/10000 [19:34<17:14,  4.50it/s]

Loss = 4.986852169036865 


 54%|█████▎    | 5351/10000 [19:36<17:00,  4.56it/s]

Loss = 4.98606014251709 


 54%|█████▎    | 5361/10000 [19:38<16:57,  4.56it/s]

Loss = 4.9847517013549805 


 54%|█████▎    | 5371/10000 [19:40<16:52,  4.57it/s]

Loss = 4.983822822570801 


 54%|█████▍    | 5381/10000 [19:42<16:57,  4.54it/s]

Loss = 4.982970714569092 


 54%|█████▍    | 5391/10000 [19:45<16:39,  4.61it/s]

Loss = 4.982123374938965 


 54%|█████▍    | 5401/10000 [19:47<16:48,  4.56it/s]

Loss = 4.9812912940979 


 54%|█████▍    | 5411/10000 [19:49<16:47,  4.56it/s]

Loss = 4.97980260848999 


 54%|█████▍    | 5421/10000 [19:51<16:49,  4.53it/s]

Loss = 4.979273319244385 


 54%|█████▍    | 5431/10000 [19:53<16:40,  4.57it/s]

Loss = 4.977999210357666 


 54%|█████▍    | 5441/10000 [19:56<16:46,  4.53it/s]

Loss = 4.976898193359375 


 55%|█████▍    | 5451/10000 [19:58<16:50,  4.50it/s]

Loss = 4.975780010223389 


 55%|█████▍    | 5461/10000 [20:00<16:34,  4.56it/s]

Loss = 4.975028038024902 


 55%|█████▍    | 5471/10000 [20:02<16:38,  4.54it/s]

Loss = 4.974620819091797 


 55%|█████▍    | 5481/10000 [20:04<16:33,  4.55it/s]

Loss = 4.973482608795166 


 55%|█████▍    | 5491/10000 [20:07<16:29,  4.56it/s]

Loss = 4.972908973693848 


 55%|█████▌    | 5501/10000 [20:09<16:34,  4.53it/s]

Loss = 4.972310543060303 


 55%|█████▌    | 5511/10000 [20:11<16:30,  4.53it/s]

Loss = 4.9709320068359375 


 55%|█████▌    | 5521/10000 [20:13<16:24,  4.55it/s]

Loss = 4.969948768615723 


 55%|█████▌    | 5531/10000 [20:15<16:16,  4.58it/s]

Loss = 4.969326019287109 


 55%|█████▌    | 5541/10000 [20:18<16:22,  4.54it/s]

Loss = 4.968173503875732 


 56%|█████▌    | 5551/10000 [20:20<16:24,  4.52it/s]

Loss = 4.967130184173584 


 56%|█████▌    | 5561/10000 [20:22<16:17,  4.54it/s]

Loss = 4.965829849243164 


 56%|█████▌    | 5571/10000 [20:24<16:14,  4.55it/s]

Loss = 4.964728832244873 


 56%|█████▌    | 5581/10000 [20:26<16:07,  4.57it/s]

Loss = 4.964118480682373 


 56%|█████▌    | 5591/10000 [20:28<16:08,  4.55it/s]

Loss = 4.963119029998779 


 56%|█████▌    | 5601/10000 [20:31<16:16,  4.51it/s]

Loss = 4.96173620223999 


 56%|█████▌    | 5611/10000 [20:33<16:05,  4.55it/s]

Loss = 4.960203170776367 


 56%|█████▌    | 5621/10000 [20:35<16:03,  4.55it/s]

Loss = 4.95849084854126 


 56%|█████▋    | 5631/10000 [20:37<16:03,  4.53it/s]

Loss = 4.957186698913574 


 56%|█████▋    | 5641/10000 [20:39<15:46,  4.61it/s]

Loss = 4.956528663635254 


 57%|█████▋    | 5651/10000 [20:42<15:47,  4.59it/s]

Loss = 4.955964088439941 


 57%|█████▋    | 5661/10000 [20:44<15:48,  4.57it/s]

Loss = 4.955429553985596 


 57%|█████▋    | 5671/10000 [20:46<15:53,  4.54it/s]

Loss = 4.954843044281006 


 57%|█████▋    | 5681/10000 [20:48<15:35,  4.62it/s]

Loss = 4.953624725341797 


 57%|█████▋    | 5691/10000 [20:50<15:39,  4.58it/s]

Loss = 4.952195644378662 


 57%|█████▋    | 5701/10000 [20:53<15:39,  4.58it/s]

Loss = 4.9516520500183105 


 57%|█████▋    | 5711/10000 [20:55<15:37,  4.58it/s]

Loss = 4.9505295753479 


 57%|█████▋    | 5721/10000 [20:57<15:39,  4.55it/s]

Loss = 4.949497222900391 


 57%|█████▋    | 5731/10000 [20:59<15:32,  4.58it/s]

Loss = 4.948403358459473 


 57%|█████▋    | 5741/10000 [21:01<15:34,  4.56it/s]

Loss = 4.94746208190918 


 58%|█████▊    | 5751/10000 [21:04<15:35,  4.54it/s]

Loss = 4.946728229522705 


 58%|█████▊    | 5761/10000 [21:06<15:26,  4.57it/s]

Loss = 4.945600509643555 


 58%|█████▊    | 5771/10000 [21:08<15:31,  4.54it/s]

Loss = 4.944406986236572 


 58%|█████▊    | 5781/10000 [21:10<15:21,  4.58it/s]

Loss = 4.943837642669678 


 58%|█████▊    | 5791/10000 [21:12<15:35,  4.50it/s]

Loss = 4.943081378936768 


 58%|█████▊    | 5801/10000 [21:14<15:17,  4.57it/s]

Loss = 4.941984176635742 


 58%|█████▊    | 5811/10000 [21:17<15:17,  4.56it/s]

Loss = 4.940952301025391 


 58%|█████▊    | 5821/10000 [21:19<15:10,  4.59it/s]

Loss = 4.940427303314209 


 58%|█████▊    | 5831/10000 [21:21<15:17,  4.54it/s]

Loss = 4.940155982971191 


 58%|█████▊    | 5841/10000 [21:23<15:04,  4.60it/s]

Loss = 4.938732624053955 


 59%|█████▊    | 5851/10000 [21:25<15:17,  4.52it/s]

Loss = 4.937535285949707 


 59%|█████▊    | 5861/10000 [21:28<15:24,  4.48it/s]

Loss = 4.936454772949219 


 59%|█████▊    | 5871/10000 [21:30<15:05,  4.56it/s]

Loss = 4.9354963302612305 


 59%|█████▉    | 5881/10000 [21:32<15:10,  4.52it/s]

Loss = 4.934629440307617 


 59%|█████▉    | 5891/10000 [21:34<15:04,  4.54it/s]

Loss = 4.934504985809326 


 59%|█████▉    | 5901/10000 [21:36<14:58,  4.56it/s]

Loss = 4.933993339538574 


 59%|█████▉    | 5911/10000 [21:39<14:55,  4.57it/s]

Loss = 4.932920455932617 


 59%|█████▉    | 5921/10000 [21:41<15:02,  4.52it/s]

Loss = 4.932559013366699 


 59%|█████▉    | 5931/10000 [21:43<14:51,  4.56it/s]

Loss = 4.931269645690918 


 59%|█████▉    | 5941/10000 [21:45<14:53,  4.54it/s]

Loss = 4.930149555206299 


 60%|█████▉    | 5951/10000 [21:47<14:44,  4.58it/s]

Loss = 4.928995132446289 


 60%|█████▉    | 5961/10000 [21:50<14:47,  4.55it/s]

Loss = 4.928438186645508 


 60%|█████▉    | 5971/10000 [21:52<14:41,  4.57it/s]

Loss = 4.927677631378174 


 60%|█████▉    | 5981/10000 [21:54<14:54,  4.49it/s]

Loss = 4.926584720611572 


 60%|█████▉    | 5991/10000 [21:56<14:40,  4.55it/s]

Loss = 4.925932884216309 


 60%|██████    | 6001/10000 [21:58<14:42,  4.53it/s]

Loss = 4.925279140472412 


 60%|██████    | 6011/10000 [22:01<14:33,  4.57it/s]

Loss = 4.924295902252197 


 60%|██████    | 6021/10000 [22:03<14:41,  4.52it/s]

Loss = 4.923573970794678 


 60%|██████    | 6031/10000 [22:05<14:33,  4.55it/s]

Loss = 4.922752857208252 


 60%|██████    | 6041/10000 [22:07<14:39,  4.50it/s]

Loss = 4.921807765960693 


 61%|██████    | 6051/10000 [22:09<14:27,  4.55it/s]

Loss = 4.920269966125488 


 61%|██████    | 6061/10000 [22:12<14:26,  4.55it/s]

Loss = 4.9198408126831055 


 61%|██████    | 6071/10000 [22:14<14:19,  4.57it/s]

Loss = 4.9189958572387695 


 61%|██████    | 6081/10000 [22:16<14:20,  4.56it/s]

Loss = 4.917957782745361 


 61%|██████    | 6091/10000 [22:18<14:16,  4.56it/s]

Loss = 4.916330814361572 


 61%|██████    | 6101/10000 [22:20<14:13,  4.57it/s]

Loss = 4.914528846740723 


 61%|██████    | 6111/10000 [22:22<14:09,  4.58it/s]

Loss = 4.913725852966309 


 61%|██████    | 6121/10000 [22:25<14:24,  4.49it/s]

Loss = 4.912804126739502 


 61%|██████▏   | 6131/10000 [22:27<14:14,  4.53it/s]

Loss = 4.911949157714844 


 61%|██████▏   | 6141/10000 [22:29<14:15,  4.51it/s]

Loss = 4.910678386688232 


 62%|██████▏   | 6151/10000 [22:31<14:00,  4.58it/s]

Loss = 4.910098075866699 


 62%|██████▏   | 6161/10000 [22:33<14:04,  4.54it/s]

Loss = 4.908902645111084 


 62%|██████▏   | 6171/10000 [22:36<13:53,  4.60it/s]

Loss = 4.908331394195557 


 62%|██████▏   | 6181/10000 [22:38<14:03,  4.53it/s]

Loss = 4.907464981079102 


 62%|██████▏   | 6191/10000 [22:40<13:56,  4.55it/s]

Loss = 4.9072747230529785 


 62%|██████▏   | 6201/10000 [22:42<13:55,  4.55it/s]

Loss = 4.906364440917969 


 62%|██████▏   | 6211/10000 [22:45<13:49,  4.57it/s]

Loss = 4.9056010246276855 


 62%|██████▏   | 6221/10000 [22:47<13:47,  4.57it/s]

Loss = 4.904797554016113 


 62%|██████▏   | 6231/10000 [22:49<13:49,  4.54it/s]

Loss = 4.9039459228515625 


 62%|██████▏   | 6241/10000 [22:51<13:46,  4.55it/s]

Loss = 4.902830600738525 


 63%|██████▎   | 6251/10000 [22:53<13:44,  4.55it/s]

Loss = 4.902354717254639 


 63%|██████▎   | 6261/10000 [22:55<13:38,  4.57it/s]

Loss = 4.901711940765381 


 63%|██████▎   | 6271/10000 [22:58<13:51,  4.48it/s]

Loss = 4.900733470916748 


 63%|██████▎   | 6281/10000 [23:00<13:35,  4.56it/s]

Loss = 4.900198936462402 


 63%|██████▎   | 6291/10000 [23:02<13:30,  4.57it/s]

Loss = 4.899123191833496 


 63%|██████▎   | 6301/10000 [23:04<13:27,  4.58it/s]

Loss = 4.898066997528076 


 63%|██████▎   | 6311/10000 [23:06<13:25,  4.58it/s]

Loss = 4.8969597816467285 


 63%|██████▎   | 6321/10000 [23:09<13:27,  4.56it/s]

Loss = 4.896164894104004 


 63%|██████▎   | 6331/10000 [23:11<13:36,  4.49it/s]

Loss = 4.895646095275879 


 63%|██████▎   | 6341/10000 [23:13<13:27,  4.53it/s]

Loss = 4.894620418548584 


 64%|██████▎   | 6351/10000 [23:15<13:18,  4.57it/s]

Loss = 4.8934454917907715 


 64%|██████▎   | 6361/10000 [23:17<13:20,  4.55it/s]

Loss = 4.892212390899658 


 64%|██████▎   | 6371/10000 [23:20<13:14,  4.57it/s]

Loss = 4.891505241394043 


 64%|██████▍   | 6381/10000 [23:22<13:18,  4.53it/s]

Loss = 4.890934944152832 


 64%|██████▍   | 6391/10000 [23:24<13:14,  4.54it/s]

Loss = 4.890190601348877 


 64%|██████▍   | 6401/10000 [23:26<13:17,  4.51it/s]

Loss = 4.889158248901367 


 64%|██████▍   | 6411/10000 [23:28<13:07,  4.56it/s]

Loss = 4.888134002685547 


 64%|██████▍   | 6421/10000 [23:31<13:02,  4.57it/s]

Loss = 4.887341499328613 


 64%|██████▍   | 6431/10000 [23:33<13:00,  4.57it/s]

Loss = 4.886359691619873 


 64%|██████▍   | 6441/10000 [23:35<12:58,  4.57it/s]

Loss = 4.885660648345947 


 65%|██████▍   | 6451/10000 [23:37<13:03,  4.53it/s]

Loss = 4.884820938110352 


 65%|██████▍   | 6461/10000 [23:39<12:59,  4.54it/s]

Loss = 4.884195804595947 


 65%|██████▍   | 6471/10000 [23:42<12:56,  4.54it/s]

Loss = 4.883625507354736 


 65%|██████▍   | 6481/10000 [23:44<12:54,  4.54it/s]

Loss = 4.883237361907959 


 65%|██████▍   | 6491/10000 [23:46<12:51,  4.55it/s]

Loss = 4.882867336273193 


 65%|██████▌   | 6501/10000 [23:48<12:55,  4.51it/s]

Loss = 4.881959438323975 


 65%|██████▌   | 6511/10000 [23:50<12:50,  4.53it/s]

Loss = 4.880982875823975 


 65%|██████▌   | 6521/10000 [23:53<12:39,  4.58it/s]

Loss = 4.8797173500061035 


 65%|██████▌   | 6531/10000 [23:55<12:44,  4.54it/s]

Loss = 4.879096984863281 


 65%|██████▌   | 6541/10000 [23:57<12:40,  4.55it/s]

Loss = 4.878259658813477 


 66%|██████▌   | 6551/10000 [23:59<12:45,  4.50it/s]

Loss = 4.877674579620361 


 66%|██████▌   | 6561/10000 [24:01<12:44,  4.50it/s]

Loss = 4.876806735992432 


 66%|██████▌   | 6571/10000 [24:04<12:39,  4.51it/s]

Loss = 4.8758225440979 


 66%|██████▌   | 6581/10000 [24:06<12:26,  4.58it/s]

Loss = 4.87470006942749 


 66%|██████▌   | 6591/10000 [24:08<12:28,  4.55it/s]

Loss = 4.874032974243164 


 66%|██████▌   | 6601/10000 [24:10<12:26,  4.55it/s]

Loss = 4.873370170593262 


 66%|██████▌   | 6611/10000 [24:12<12:20,  4.57it/s]

Loss = 4.872413635253906 


 66%|██████▌   | 6621/10000 [24:15<12:21,  4.56it/s]

Loss = 4.872139930725098 


 66%|██████▋   | 6631/10000 [24:17<12:20,  4.55it/s]

Loss = 4.8711957931518555 


 66%|██████▋   | 6641/10000 [24:19<12:14,  4.57it/s]

Loss = 4.870752811431885 


 67%|██████▋   | 6651/10000 [24:21<12:23,  4.50it/s]

Loss = 4.870570659637451 


 67%|██████▋   | 6661/10000 [24:23<12:19,  4.52it/s]

Loss = 4.870129585266113 


 67%|██████▋   | 6671/10000 [24:26<12:09,  4.56it/s]

Loss = 4.869396686553955 


 67%|██████▋   | 6681/10000 [24:28<12:11,  4.54it/s]

Loss = 4.868886947631836 


 67%|██████▋   | 6691/10000 [24:30<12:03,  4.57it/s]

Loss = 4.868298053741455 


 67%|██████▋   | 6701/10000 [24:32<12:08,  4.53it/s]

Loss = 4.867595195770264 


 67%|██████▋   | 6711/10000 [24:34<12:00,  4.56it/s]

Loss = 4.866552829742432 


 67%|██████▋   | 6721/10000 [24:37<12:05,  4.52it/s]

Loss = 4.8659467697143555 


 67%|██████▋   | 6731/10000 [24:39<11:56,  4.56it/s]

Loss = 4.865072250366211 


 67%|██████▋   | 6741/10000 [24:41<11:53,  4.56it/s]

Loss = 4.864444732666016 


 68%|██████▊   | 6751/10000 [24:43<11:51,  4.57it/s]

Loss = 4.86393928527832 


 68%|██████▊   | 6761/10000 [24:45<11:50,  4.56it/s]

Loss = 4.863218784332275 


 68%|██████▊   | 6771/10000 [24:48<11:52,  4.53it/s]

Loss = 4.862569332122803 


 68%|██████▊   | 6781/10000 [24:50<11:41,  4.59it/s]

Loss = 4.862284183502197 


 68%|██████▊   | 6791/10000 [24:52<11:49,  4.52it/s]

Loss = 4.861839771270752 


 68%|██████▊   | 6801/10000 [24:54<11:45,  4.54it/s]

Loss = 4.86131477355957 


 68%|██████▊   | 6811/10000 [24:56<11:41,  4.55it/s]

Loss = 4.86057186126709 


 68%|██████▊   | 6821/10000 [24:59<11:37,  4.56it/s]

Loss = 4.860281467437744 


 68%|██████▊   | 6831/10000 [25:01<11:43,  4.51it/s]

Loss = 4.859432220458984 


 68%|██████▊   | 6841/10000 [25:03<11:32,  4.56it/s]

Loss = 4.858307361602783 


 69%|██████▊   | 6851/10000 [25:05<11:23,  4.60it/s]

Loss = 4.8582377433776855 


 69%|██████▊   | 6861/10000 [25:07<11:35,  4.51it/s]

Loss = 4.857758045196533 


 69%|██████▊   | 6871/10000 [25:10<11:23,  4.58it/s]

Loss = 4.857304573059082 


 69%|██████▉   | 6881/10000 [25:12<11:20,  4.58it/s]

Loss = 4.856240749359131 


 69%|██████▉   | 6891/10000 [25:14<11:26,  4.53it/s]

Loss = 4.855111598968506 


 69%|██████▉   | 6901/10000 [25:16<11:17,  4.58it/s]

Loss = 4.8542704582214355 


 69%|██████▉   | 6911/10000 [25:18<11:22,  4.52it/s]

Loss = 4.853418827056885 


 69%|██████▉   | 6921/10000 [25:21<11:23,  4.50it/s]

Loss = 4.852807521820068 


 69%|██████▉   | 6931/10000 [25:23<11:18,  4.52it/s]

Loss = 4.852019786834717 


 69%|██████▉   | 6941/10000 [25:25<11:12,  4.55it/s]

Loss = 4.8514533042907715 


 70%|██████▉   | 6951/10000 [25:27<11:07,  4.57it/s]

Loss = 4.851053237915039 


 70%|██████▉   | 6961/10000 [25:29<11:02,  4.59it/s]

Loss = 4.8502678871154785 


 70%|██████▉   | 6971/10000 [25:32<11:08,  4.53it/s]

Loss = 4.849320888519287 


 70%|██████▉   | 6981/10000 [25:34<11:00,  4.57it/s]

Loss = 4.848795413970947 


 70%|██████▉   | 6991/10000 [25:36<11:00,  4.56it/s]

Loss = 4.847846031188965 


 70%|███████   | 7001/10000 [25:38<10:56,  4.57it/s]

Loss = 4.847333908081055 


 70%|███████   | 7011/10000 [25:40<10:54,  4.56it/s]

Loss = 4.846355438232422 


 70%|███████   | 7021/10000 [25:43<10:48,  4.60it/s]

Loss = 4.845279216766357 


 70%|███████   | 7031/10000 [25:45<10:45,  4.60it/s]

Loss = 4.844364643096924 


 70%|███████   | 7041/10000 [25:47<10:48,  4.56it/s]

Loss = 4.843799591064453 


 71%|███████   | 7051/10000 [25:49<10:46,  4.56it/s]

Loss = 4.843124866485596 


 71%|███████   | 7061/10000 [25:51<10:47,  4.54it/s]

Loss = 4.842686176300049 


 71%|███████   | 7071/10000 [25:53<10:45,  4.54it/s]

Loss = 4.841457843780518 


 71%|███████   | 7081/10000 [25:56<10:40,  4.56it/s]

Loss = 4.840451717376709 


 71%|███████   | 7091/10000 [25:58<10:39,  4.55it/s]

Loss = 4.840006351470947 


 71%|███████   | 7101/10000 [26:00<10:36,  4.56it/s]

Loss = 4.839173316955566 


 71%|███████   | 7111/10000 [26:02<10:37,  4.54it/s]

Loss = 4.838260173797607 


 71%|███████   | 7121/10000 [26:04<10:31,  4.56it/s]

Loss = 4.837716102600098 


 71%|███████▏  | 7131/10000 [26:07<10:31,  4.54it/s]

Loss = 4.836969375610352 


 71%|███████▏  | 7141/10000 [26:09<10:25,  4.57it/s]

Loss = 4.836360454559326 


 72%|███████▏  | 7151/10000 [26:11<10:24,  4.56it/s]

Loss = 4.835166931152344 


 72%|███████▏  | 7161/10000 [26:13<10:29,  4.51it/s]

Loss = 4.834516525268555 


 72%|███████▏  | 7171/10000 [26:15<10:20,  4.56it/s]

Loss = 4.8340044021606445 


 72%|███████▏  | 7181/10000 [26:18<10:20,  4.54it/s]

Loss = 4.833102703094482 


 72%|███████▏  | 7191/10000 [26:20<10:15,  4.56it/s]

Loss = 4.83217716217041 


 72%|███████▏  | 7201/10000 [26:22<10:17,  4.54it/s]

Loss = 4.831556797027588 


 72%|███████▏  | 7211/10000 [26:24<10:17,  4.52it/s]

Loss = 4.830536842346191 


 72%|███████▏  | 7221/10000 [26:26<10:09,  4.56it/s]

Loss = 4.829697608947754 


 72%|███████▏  | 7231/10000 [26:29<10:13,  4.51it/s]

Loss = 4.828987121582031 


 72%|███████▏  | 7241/10000 [26:31<10:05,  4.56it/s]

Loss = 4.827984809875488 


 73%|███████▎  | 7251/10000 [26:33<10:04,  4.54it/s]

Loss = 4.827377796173096 


 73%|███████▎  | 7261/10000 [26:35<10:03,  4.54it/s]

Loss = 4.826240539550781 


 73%|███████▎  | 7271/10000 [26:37<10:02,  4.53it/s]

Loss = 4.825403690338135 


 73%|███████▎  | 7281/10000 [26:40<09:53,  4.58it/s]

Loss = 4.8245062828063965 


 73%|███████▎  | 7291/10000 [26:42<09:55,  4.55it/s]

Loss = 4.823652267456055 


 73%|███████▎  | 7301/10000 [26:44<09:55,  4.53it/s]

Loss = 4.822935104370117 


 73%|███████▎  | 7311/10000 [26:46<09:53,  4.53it/s]

Loss = 4.821990966796875 


 73%|███████▎  | 7321/10000 [26:48<09:52,  4.52it/s]

Loss = 4.821317672729492 


 73%|███████▎  | 7331/10000 [26:51<09:45,  4.56it/s]

Loss = 4.820793628692627 


 73%|███████▎  | 7341/10000 [26:53<09:43,  4.56it/s]

Loss = 4.820145606994629 


 74%|███████▎  | 7351/10000 [26:55<09:41,  4.56it/s]

Loss = 4.818984031677246 


 74%|███████▎  | 7361/10000 [26:57<09:42,  4.53it/s]

Loss = 4.818603038787842 


 74%|███████▎  | 7371/10000 [26:59<09:40,  4.53it/s]

Loss = 4.817605018615723 


 74%|███████▍  | 7381/10000 [27:02<09:38,  4.52it/s]

Loss = 4.816927433013916 


 74%|███████▍  | 7391/10000 [27:04<09:40,  4.50it/s]

Loss = 4.816496849060059 


 74%|███████▍  | 7401/10000 [27:06<09:34,  4.53it/s]

Loss = 4.815667629241943 


 74%|███████▍  | 7411/10000 [27:08<09:30,  4.54it/s]

Loss = 4.814913749694824 


 74%|███████▍  | 7421/10000 [27:10<09:27,  4.55it/s]

Loss = 4.813861846923828 


 74%|███████▍  | 7431/10000 [27:13<09:30,  4.51it/s]

Loss = 4.8131513595581055 


 74%|███████▍  | 7441/10000 [27:15<09:23,  4.54it/s]

Loss = 4.812220096588135 


 75%|███████▍  | 7451/10000 [27:17<09:27,  4.49it/s]

Loss = 4.811223030090332 


 75%|███████▍  | 7461/10000 [27:19<09:16,  4.56it/s]

Loss = 4.810845375061035 


 75%|███████▍  | 7471/10000 [27:21<09:13,  4.57it/s]

Loss = 4.810547351837158 


 75%|███████▍  | 7481/10000 [27:24<09:15,  4.53it/s]

Loss = 4.809240341186523 


 75%|███████▍  | 7491/10000 [27:26<09:08,  4.57it/s]

Loss = 4.8084259033203125 


 75%|███████▌  | 7501/10000 [27:28<09:08,  4.56it/s]

Loss = 4.80771541595459 


 75%|███████▌  | 7511/10000 [27:30<09:03,  4.58it/s]

Loss = 4.807442665100098 


 75%|███████▌  | 7521/10000 [27:32<09:04,  4.55it/s]

Loss = 4.80659818649292 


 75%|███████▌  | 7531/10000 [27:35<09:01,  4.56it/s]

Loss = 4.80576229095459 


 75%|███████▌  | 7541/10000 [27:37<08:59,  4.55it/s]

Loss = 4.805142402648926 


 76%|███████▌  | 7551/10000 [27:39<08:56,  4.57it/s]

Loss = 4.804417610168457 


 76%|███████▌  | 7561/10000 [27:41<08:59,  4.52it/s]

Loss = 4.803775310516357 


 76%|███████▌  | 7571/10000 [27:43<08:50,  4.58it/s]

Loss = 4.80276346206665 


 76%|███████▌  | 7581/10000 [27:46<08:47,  4.58it/s]

Loss = 4.802076816558838 


 76%|███████▌  | 7591/10000 [27:48<08:52,  4.52it/s]

Loss = 4.801328182220459 


 76%|███████▌  | 7601/10000 [27:50<08:41,  4.60it/s]

Loss = 4.800230026245117 


 76%|███████▌  | 7611/10000 [27:52<08:41,  4.58it/s]

Loss = 4.799612045288086 


 76%|███████▌  | 7621/10000 [27:54<08:44,  4.54it/s]

Loss = 4.799227714538574 


 76%|███████▋  | 7631/10000 [27:57<08:45,  4.51it/s]

Loss = 4.7986531257629395 


 76%|███████▋  | 7641/10000 [27:59<08:44,  4.50it/s]

Loss = 4.797801971435547 


 77%|███████▋  | 7651/10000 [28:01<08:39,  4.52it/s]

Loss = 4.796757698059082 


 77%|███████▋  | 7661/10000 [28:03<08:29,  4.60it/s]

Loss = 4.796051502227783 


 77%|███████▋  | 7671/10000 [28:05<08:29,  4.57it/s]

Loss = 4.794925212860107 


 77%|███████▋  | 7681/10000 [28:08<08:31,  4.53it/s]

Loss = 4.794432163238525 


 77%|███████▋  | 7691/10000 [28:10<08:26,  4.56it/s]

Loss = 4.793690204620361 


 77%|███████▋  | 7701/10000 [28:12<08:25,  4.55it/s]

Loss = 4.793272972106934 


 77%|███████▋  | 7711/10000 [28:14<08:20,  4.57it/s]

Loss = 4.793161869049072 


 77%|███████▋  | 7721/10000 [28:16<08:24,  4.52it/s]

Loss = 4.7922563552856445 


 77%|███████▋  | 7731/10000 [28:18<08:20,  4.53it/s]

Loss = 4.791476249694824 


 77%|███████▋  | 7741/10000 [28:21<08:23,  4.48it/s]

Loss = 4.790887355804443 


 78%|███████▊  | 7751/10000 [28:23<08:12,  4.56it/s]

Loss = 4.7899298667907715 


 78%|███████▊  | 7761/10000 [28:25<08:12,  4.55it/s]

Loss = 4.788846969604492 


 78%|███████▊  | 7771/10000 [28:27<08:11,  4.53it/s]

Loss = 4.788125514984131 


 78%|███████▊  | 7781/10000 [28:29<08:04,  4.58it/s]

Loss = 4.787415504455566 


 78%|███████▊  | 7791/10000 [28:32<08:06,  4.54it/s]

Loss = 4.786647319793701 


 78%|███████▊  | 7801/10000 [28:34<08:07,  4.51it/s]

Loss = 4.786111354827881 


 78%|███████▊  | 7811/10000 [28:36<07:58,  4.57it/s]

Loss = 4.785305976867676 


 78%|███████▊  | 7821/10000 [28:38<07:55,  4.58it/s]

Loss = 4.7844767570495605 


 78%|███████▊  | 7831/10000 [28:40<07:59,  4.52it/s]

Loss = 4.78333044052124 


 78%|███████▊  | 7841/10000 [28:43<08:01,  4.49it/s]

Loss = 4.7823991775512695 


 79%|███████▊  | 7851/10000 [28:45<07:53,  4.54it/s]

Loss = 4.781798839569092 


 79%|███████▊  | 7861/10000 [28:47<07:50,  4.54it/s]

Loss = 4.7810773849487305 


 79%|███████▊  | 7871/10000 [28:49<07:45,  4.57it/s]

Loss = 4.780229568481445 


 79%|███████▉  | 7881/10000 [28:51<07:41,  4.59it/s]

Loss = 4.779588222503662 


 79%|███████▉  | 7891/10000 [28:54<07:47,  4.51it/s]

Loss = 4.778871536254883 


 79%|███████▉  | 7901/10000 [28:56<07:43,  4.53it/s]

Loss = 4.778392791748047 


 79%|███████▉  | 7911/10000 [28:58<07:36,  4.58it/s]

Loss = 4.778110980987549 


 79%|███████▉  | 7921/10000 [29:00<07:34,  4.57it/s]

Loss = 4.777498245239258 


 79%|███████▉  | 7931/10000 [29:02<07:31,  4.59it/s]

Loss = 4.777122974395752 


 79%|███████▉  | 7941/10000 [29:05<07:28,  4.59it/s]

Loss = 4.776271820068359 


 80%|███████▉  | 7951/10000 [29:07<07:28,  4.57it/s]

Loss = 4.7753987312316895 


 80%|███████▉  | 7961/10000 [29:09<07:26,  4.57it/s]

Loss = 4.775045394897461 


 80%|███████▉  | 7971/10000 [29:11<07:27,  4.54it/s]

Loss = 4.774240970611572 


 80%|███████▉  | 7981/10000 [29:13<07:25,  4.53it/s]

Loss = 4.77347993850708 


 80%|███████▉  | 7991/10000 [29:16<07:20,  4.56it/s]

Loss = 4.7729668617248535 


 80%|████████  | 8001/10000 [29:18<07:15,  4.59it/s]

Loss = 4.772311687469482 


 80%|████████  | 8011/10000 [29:20<07:17,  4.55it/s]

Loss = 4.771655559539795 


 80%|████████  | 8021/10000 [29:22<07:13,  4.56it/s]

Loss = 4.771124839782715 


 80%|████████  | 8031/10000 [29:24<07:11,  4.57it/s]

Loss = 4.7703423500061035 


 80%|████████  | 8041/10000 [29:27<07:09,  4.56it/s]

Loss = 4.769948959350586 


 81%|████████  | 8051/10000 [29:29<07:12,  4.51it/s]

Loss = 4.769524574279785 


 81%|████████  | 8061/10000 [29:31<07:03,  4.58it/s]

Loss = 4.769050598144531 


 81%|████████  | 8071/10000 [29:33<07:02,  4.56it/s]

Loss = 4.768494606018066 


 81%|████████  | 8081/10000 [29:35<07:05,  4.51it/s]

Loss = 4.768306732177734 


 81%|████████  | 8091/10000 [29:38<07:00,  4.54it/s]

Loss = 4.7675604820251465 


 81%|████████  | 8101/10000 [29:40<07:00,  4.52it/s]

Loss = 4.767309665679932 


 81%|████████  | 8111/10000 [29:42<07:02,  4.47it/s]

Loss = 4.766894817352295 


 81%|████████  | 8121/10000 [29:44<06:55,  4.52it/s]

Loss = 4.766165733337402 


 81%|████████▏ | 8131/10000 [29:46<06:52,  4.53it/s]

Loss = 4.764847278594971 


 81%|████████▏ | 8141/10000 [29:49<06:49,  4.54it/s]

Loss = 4.764203071594238 


 82%|████████▏ | 8151/10000 [29:51<06:44,  4.57it/s]

Loss = 4.7635979652404785 


 82%|████████▏ | 8161/10000 [29:53<06:43,  4.55it/s]

Loss = 4.763041019439697 


 82%|████████▏ | 8171/10000 [29:55<06:39,  4.58it/s]

Loss = 4.762524604797363 


 82%|████████▏ | 8181/10000 [29:57<06:42,  4.51it/s]

Loss = 4.761791229248047 


 82%|████████▏ | 8191/10000 [30:00<06:40,  4.52it/s]

Loss = 4.761053562164307 


 82%|████████▏ | 8201/10000 [30:02<06:34,  4.56it/s]

Loss = 4.760330677032471 


 82%|████████▏ | 8211/10000 [30:04<06:31,  4.57it/s]

Loss = 4.759706020355225 


 82%|████████▏ | 8221/10000 [30:06<06:30,  4.55it/s]

Loss = 4.759031295776367 


 82%|████████▏ | 8231/10000 [30:08<06:27,  4.57it/s]

Loss = 4.758554935455322 


 82%|████████▏ | 8241/10000 [30:11<06:27,  4.54it/s]

Loss = 4.757718563079834 


 83%|████████▎ | 8251/10000 [30:13<06:25,  4.54it/s]

Loss = 4.75698184967041 


 83%|████████▎ | 8261/10000 [30:15<06:22,  4.55it/s]

Loss = 4.756595611572266 


 83%|████████▎ | 8271/10000 [30:17<06:21,  4.54it/s]

Loss = 4.7556962966918945 


 83%|████████▎ | 8281/10000 [30:19<06:16,  4.56it/s]

Loss = 4.754906177520752 


 83%|████████▎ | 8291/10000 [30:22<06:12,  4.59it/s]

Loss = 4.754627227783203 


 83%|████████▎ | 8301/10000 [30:24<06:11,  4.58it/s]

Loss = 4.754359722137451 


 83%|████████▎ | 8311/10000 [30:26<06:11,  4.55it/s]

Loss = 4.754153251647949 


 83%|████████▎ | 8321/10000 [30:28<06:07,  4.57it/s]

Loss = 4.753762722015381 


 83%|████████▎ | 8331/10000 [30:30<06:05,  4.56it/s]

Loss = 4.752986431121826 


 83%|████████▎ | 8341/10000 [30:33<06:07,  4.51it/s]

Loss = 4.752439498901367 


 84%|████████▎ | 8351/10000 [30:35<06:01,  4.56it/s]

Loss = 4.752066135406494 


 84%|████████▎ | 8361/10000 [30:37<06:02,  4.52it/s]

Loss = 4.751678943634033 


 84%|████████▎ | 8371/10000 [30:39<06:02,  4.49it/s]

Loss = 4.751293659210205 


 84%|████████▍ | 8381/10000 [30:41<05:54,  4.57it/s]

Loss = 4.7509260177612305 


 84%|████████▍ | 8391/10000 [30:44<05:53,  4.55it/s]

Loss = 4.750321388244629 


 84%|████████▍ | 8401/10000 [30:46<05:50,  4.56it/s]

Loss = 4.749862194061279 


 84%|████████▍ | 8411/10000 [30:48<05:46,  4.59it/s]

Loss = 4.749698638916016 


 84%|████████▍ | 8421/10000 [30:50<05:45,  4.57it/s]

Loss = 4.748845100402832 


 84%|████████▍ | 8431/10000 [30:52<05:42,  4.58it/s]

Loss = 4.748612880706787 


 84%|████████▍ | 8441/10000 [30:54<05:41,  4.57it/s]

Loss = 4.748260021209717 


 85%|████████▍ | 8451/10000 [30:57<05:38,  4.57it/s]

Loss = 4.74761962890625 


 85%|████████▍ | 8461/10000 [30:59<05:37,  4.56it/s]

Loss = 4.746628761291504 


 85%|████████▍ | 8471/10000 [31:01<05:36,  4.54it/s]

Loss = 4.745641708374023 


 85%|████████▍ | 8481/10000 [31:03<05:31,  4.58it/s]

Loss = 4.745100498199463 


 85%|████████▍ | 8491/10000 [31:05<05:30,  4.56it/s]

Loss = 4.7444868087768555 


 85%|████████▌ | 8501/10000 [31:08<05:28,  4.56it/s]

Loss = 4.7439284324646 


 85%|████████▌ | 8511/10000 [31:10<05:26,  4.56it/s]

Loss = 4.743106365203857 


 85%|████████▌ | 8521/10000 [31:12<05:23,  4.57it/s]

Loss = 4.74259090423584 


 85%|████████▌ | 8531/10000 [31:14<05:25,  4.52it/s]

Loss = 4.742191314697266 


 85%|████████▌ | 8541/10000 [31:16<05:20,  4.56it/s]

Loss = 4.741730690002441 


 86%|████████▌ | 8551/10000 [31:19<05:16,  4.57it/s]

Loss = 4.741303443908691 


 86%|████████▌ | 8561/10000 [31:21<05:13,  4.59it/s]

Loss = 4.740975856781006 


 86%|████████▌ | 8571/10000 [31:23<05:12,  4.57it/s]

Loss = 4.7404351234436035 


 86%|████████▌ | 8581/10000 [31:25<05:13,  4.52it/s]

Loss = 4.739755630493164 


 86%|████████▌ | 8591/10000 [31:27<05:08,  4.56it/s]

Loss = 4.739397048950195 


 86%|████████▌ | 8601/10000 [31:29<05:11,  4.50it/s]

Loss = 4.738956928253174 


 86%|████████▌ | 8611/10000 [31:32<05:04,  4.55it/s]

Loss = 4.738300323486328 


 86%|████████▌ | 8621/10000 [31:34<05:06,  4.50it/s]

Loss = 4.737828254699707 


 86%|████████▋ | 8631/10000 [31:36<05:01,  4.54it/s]

Loss = 4.736810207366943 


 86%|████████▋ | 8641/10000 [31:38<05:02,  4.50it/s]

Loss = 4.736485481262207 


 87%|████████▋ | 8651/10000 [31:40<04:58,  4.52it/s]

Loss = 4.735694408416748 


 87%|████████▋ | 8661/10000 [31:43<04:52,  4.59it/s]

Loss = 4.73500919342041 


 87%|████████▋ | 8671/10000 [31:45<04:50,  4.58it/s]

Loss = 4.734739780426025 


 87%|████████▋ | 8681/10000 [31:47<04:46,  4.60it/s]

Loss = 4.734179496765137 


 87%|████████▋ | 8691/10000 [31:49<04:45,  4.59it/s]

Loss = 4.733743190765381 


 87%|████████▋ | 8701/10000 [31:51<04:46,  4.53it/s]

Loss = 4.732928276062012 


 87%|████████▋ | 8711/10000 [31:54<04:44,  4.53it/s]

Loss = 4.732578754425049 


 87%|████████▋ | 8721/10000 [31:56<04:44,  4.50it/s]

Loss = 4.732186317443848 


 87%|████████▋ | 8731/10000 [31:58<04:38,  4.56it/s]

Loss = 4.731863021850586 


 87%|████████▋ | 8741/10000 [32:00<04:37,  4.54it/s]

Loss = 4.731485366821289 


 88%|████████▊ | 8751/10000 [32:02<04:32,  4.59it/s]

Loss = 4.730752944946289 


 88%|████████▊ | 8761/10000 [32:05<04:30,  4.58it/s]

Loss = 4.730513572692871 


 88%|████████▊ | 8771/10000 [32:07<04:27,  4.59it/s]

Loss = 4.730042457580566 


 88%|████████▊ | 8781/10000 [32:09<04:31,  4.50it/s]

Loss = 4.729496479034424 


 88%|████████▊ | 8791/10000 [32:11<04:27,  4.52it/s]

Loss = 4.729171276092529 


 88%|████████▊ | 8801/10000 [32:13<04:23,  4.56it/s]

Loss = 4.728697776794434 


 88%|████████▊ | 8811/10000 [32:16<04:20,  4.57it/s]

Loss = 4.72821044921875 


 88%|████████▊ | 8821/10000 [32:18<04:20,  4.53it/s]

Loss = 4.727756977081299 


 88%|████████▊ | 8831/10000 [32:20<04:18,  4.52it/s]

Loss = 4.72728157043457 


 88%|████████▊ | 8841/10000 [32:22<04:13,  4.57it/s]

Loss = 4.726787090301514 


 89%|████████▊ | 8851/10000 [32:24<04:11,  4.56it/s]

Loss = 4.726292610168457 


 89%|████████▊ | 8861/10000 [32:27<04:08,  4.58it/s]

Loss = 4.725843906402588 


 89%|████████▊ | 8871/10000 [32:29<04:09,  4.52it/s]

Loss = 4.725146770477295 


 89%|████████▉ | 8881/10000 [32:31<04:02,  4.61it/s]

Loss = 4.724570274353027 


 89%|████████▉ | 8891/10000 [32:33<04:03,  4.56it/s]

Loss = 4.723731517791748 


 89%|████████▉ | 8901/10000 [32:35<04:01,  4.54it/s]

Loss = 4.723371505737305 


 89%|████████▉ | 8911/10000 [32:38<03:59,  4.55it/s]

Loss = 4.722951412200928 


 89%|████████▉ | 8921/10000 [32:40<03:58,  4.53it/s]

Loss = 4.722158908843994 


 89%|████████▉ | 8931/10000 [32:42<03:55,  4.54it/s]

Loss = 4.7217326164245605 


 89%|████████▉ | 8941/10000 [32:44<03:52,  4.55it/s]

Loss = 4.721190452575684 


 90%|████████▉ | 8951/10000 [32:46<03:51,  4.53it/s]

Loss = 4.72061824798584 


 90%|████████▉ | 8961/10000 [32:49<03:49,  4.52it/s]

Loss = 4.720144271850586 


 90%|████████▉ | 8971/10000 [32:51<03:46,  4.53it/s]

Loss = 4.719696044921875 


 90%|████████▉ | 8981/10000 [32:53<03:44,  4.54it/s]

Loss = 4.7192158699035645 


 90%|████████▉ | 8991/10000 [32:55<03:42,  4.53it/s]

Loss = 4.7187957763671875 


 90%|█████████ | 9001/10000 [32:57<03:42,  4.48it/s]

Loss = 4.7183756828308105 


 90%|█████████ | 9011/10000 [33:00<03:36,  4.58it/s]

Loss = 4.717887878417969 


 90%|█████████ | 9021/10000 [33:02<03:34,  4.56it/s]

Loss = 4.717267036437988 


 90%|█████████ | 9031/10000 [33:04<03:31,  4.57it/s]

Loss = 4.717029571533203 


 90%|█████████ | 9041/10000 [33:06<03:32,  4.51it/s]

Loss = 4.71643590927124 


 91%|█████████ | 9051/10000 [33:08<03:27,  4.56it/s]

Loss = 4.716083526611328 


 91%|█████████ | 9061/10000 [33:11<03:25,  4.57it/s]

Loss = 4.715398788452148 


 91%|█████████ | 9071/10000 [33:13<03:25,  4.52it/s]

Loss = 4.7150468826293945 


 91%|█████████ | 9081/10000 [33:15<03:22,  4.54it/s]

Loss = 4.714837074279785 


 91%|█████████ | 9091/10000 [33:17<03:19,  4.57it/s]

Loss = 4.714177131652832 


 91%|█████████ | 9101/10000 [33:19<03:15,  4.60it/s]

Loss = 4.71358585357666 


 91%|█████████ | 9111/10000 [33:22<03:17,  4.51it/s]

Loss = 4.712965488433838 


 91%|█████████ | 9121/10000 [33:24<03:12,  4.57it/s]

Loss = 4.7124104499816895 


 91%|█████████▏| 9131/10000 [33:26<03:11,  4.54it/s]

Loss = 4.712052822113037 


 91%|█████████▏| 9141/10000 [33:28<03:08,  4.56it/s]

Loss = 4.7115936279296875 


 92%|█████████▏| 9151/10000 [33:30<03:06,  4.55it/s]

Loss = 4.711119651794434 


 92%|█████████▏| 9161/10000 [33:33<03:06,  4.50it/s]

Loss = 4.710729598999023 


 92%|█████████▏| 9171/10000 [33:35<03:02,  4.54it/s]

Loss = 4.710569858551025 


 92%|█████████▏| 9181/10000 [33:37<02:58,  4.59it/s]

Loss = 4.710233688354492 


 92%|█████████▏| 9191/10000 [33:39<02:57,  4.57it/s]

Loss = 4.70947265625 


 92%|█████████▏| 9201/10000 [33:41<02:54,  4.59it/s]

Loss = 4.709084510803223 


 92%|█████████▏| 9211/10000 [33:44<02:53,  4.54it/s]

Loss = 4.708795547485352 


 92%|█████████▏| 9221/10000 [33:46<02:49,  4.59it/s]

Loss = 4.708473205566406 


 92%|█████████▏| 9231/10000 [33:48<02:49,  4.55it/s]

Loss = 4.708104133605957 


 92%|█████████▏| 9241/10000 [33:50<02:45,  4.58it/s]

Loss = 4.707719326019287 


 93%|█████████▎| 9251/10000 [33:52<02:44,  4.56it/s]

Loss = 4.707149028778076 


 93%|█████████▎| 9261/10000 [33:54<02:41,  4.57it/s]

Loss = 4.70684289932251 


 93%|█████████▎| 9271/10000 [33:57<02:42,  4.48it/s]

Loss = 4.706538677215576 


 93%|█████████▎| 9281/10000 [33:59<02:38,  4.54it/s]

Loss = 4.706095218658447 


 93%|█████████▎| 9291/10000 [34:01<02:37,  4.50it/s]

Loss = 4.70524263381958 


 93%|█████████▎| 9301/10000 [34:03<02:34,  4.52it/s]

Loss = 4.7046613693237305 


 93%|█████████▎| 9311/10000 [34:06<02:33,  4.50it/s]

Loss = 4.704199314117432 


 93%|█████████▎| 9321/10000 [34:08<02:28,  4.58it/s]

Loss = 4.703608989715576 


 93%|█████████▎| 9331/10000 [34:10<02:26,  4.58it/s]

Loss = 4.703244686126709 


 93%|█████████▎| 9341/10000 [34:12<02:25,  4.54it/s]

Loss = 4.702597618103027 


 94%|█████████▎| 9351/10000 [34:14<02:23,  4.53it/s]

Loss = 4.701900959014893 


 94%|█████████▎| 9361/10000 [34:16<02:19,  4.60it/s]

Loss = 4.701544284820557 


 94%|█████████▎| 9371/10000 [34:19<02:18,  4.53it/s]

Loss = 4.701131343841553 


 94%|█████████▍| 9381/10000 [34:21<02:17,  4.52it/s]

Loss = 4.700512409210205 


 94%|█████████▍| 9391/10000 [34:23<02:13,  4.56it/s]

Loss = 4.699796199798584 


 94%|█████████▍| 9401/10000 [34:25<02:12,  4.53it/s]

Loss = 4.698978900909424 


 94%|█████████▍| 9411/10000 [34:28<02:11,  4.50it/s]

Loss = 4.698215484619141 


 94%|█████████▍| 9421/10000 [34:30<02:06,  4.56it/s]

Loss = 4.698213577270508 


 94%|█████████▍| 9431/10000 [34:32<02:04,  4.56it/s]

Loss = 4.697316646575928 


 94%|█████████▍| 9441/10000 [34:34<02:03,  4.54it/s]

Loss = 4.6968183517456055 


 95%|█████████▍| 9451/10000 [34:36<02:01,  4.53it/s]

Loss = 4.6962571144104 


 95%|█████████▍| 9461/10000 [34:38<01:57,  4.58it/s]

Loss = 4.695634365081787 


 95%|█████████▍| 9471/10000 [34:41<01:56,  4.56it/s]

Loss = 4.694983005523682 


 95%|█████████▍| 9481/10000 [34:43<01:53,  4.56it/s]

Loss = 4.694269180297852 


 95%|█████████▍| 9491/10000 [34:45<01:51,  4.58it/s]

Loss = 4.693422317504883 


 95%|█████████▌| 9501/10000 [34:47<01:49,  4.57it/s]

Loss = 4.692767143249512 


 95%|█████████▌| 9511/10000 [34:49<01:47,  4.56it/s]

Loss = 4.692511081695557 


 95%|█████████▌| 9521/10000 [34:52<01:44,  4.60it/s]

Loss = 4.692142963409424 


 95%|█████████▌| 9531/10000 [34:54<01:43,  4.55it/s]

Loss = 4.691925525665283 


 95%|█████████▌| 9541/10000 [34:56<01:41,  4.53it/s]

Loss = 4.691151142120361 


 96%|█████████▌| 9551/10000 [34:58<01:38,  4.58it/s]

Loss = 4.690586090087891 


 96%|█████████▌| 9561/10000 [35:00<01:35,  4.58it/s]

Loss = 4.690032482147217 


 96%|█████████▌| 9571/10000 [35:03<01:34,  4.55it/s]

Loss = 4.689687252044678 


 96%|█████████▌| 9581/10000 [35:05<01:31,  4.58it/s]

Loss = 4.6893205642700195 


 96%|█████████▌| 9591/10000 [35:07<01:29,  4.57it/s]

Loss = 4.688940525054932 


 96%|█████████▌| 9601/10000 [35:09<01:26,  4.60it/s]

Loss = 4.68840217590332 


 96%|█████████▌| 9611/10000 [35:11<01:25,  4.53it/s]

Loss = 4.688103675842285 


 96%|█████████▌| 9621/10000 [35:13<01:23,  4.54it/s]

Loss = 4.687514781951904 


 96%|█████████▋| 9631/10000 [35:16<01:20,  4.57it/s]

Loss = 4.687051296234131 


 96%|█████████▋| 9641/10000 [35:18<01:18,  4.58it/s]

Loss = 4.686521530151367 


 97%|█████████▋| 9651/10000 [35:20<01:16,  4.57it/s]

Loss = 4.686038494110107 


 97%|█████████▋| 9661/10000 [35:22<01:13,  4.59it/s]

Loss = 4.685308933258057 


 97%|█████████▋| 9671/10000 [35:24<01:11,  4.57it/s]

Loss = 4.684472560882568 


 97%|█████████▋| 9681/10000 [35:27<01:10,  4.55it/s]

Loss = 4.684016227722168 


 97%|█████████▋| 9691/10000 [35:29<01:08,  4.52it/s]

Loss = 4.683469295501709 


 97%|█████████▋| 9701/10000 [35:31<01:05,  4.57it/s]

Loss = 4.68289852142334 


 97%|█████████▋| 9711/10000 [35:33<01:03,  4.54it/s]

Loss = 4.682492733001709 


 97%|█████████▋| 9721/10000 [35:35<01:01,  4.54it/s]

Loss = 4.682231903076172 


 97%|█████████▋| 9731/10000 [35:38<00:58,  4.60it/s]

Loss = 4.682032108306885 


 97%|█████████▋| 9741/10000 [35:40<00:56,  4.58it/s]

Loss = 4.681825160980225 


 98%|█████████▊| 9751/10000 [35:42<00:54,  4.56it/s]

Loss = 4.681325912475586 


 98%|█████████▊| 9761/10000 [35:44<00:52,  4.55it/s]

Loss = 4.68062162399292 


 98%|█████████▊| 9771/10000 [35:46<00:49,  4.59it/s]

Loss = 4.680141925811768 


 98%|█████████▊| 9781/10000 [35:49<00:48,  4.49it/s]

Loss = 4.679914951324463 


 98%|█████████▊| 9791/10000 [35:51<00:46,  4.50it/s]

Loss = 4.67963981628418 


 98%|█████████▊| 9801/10000 [35:53<00:43,  4.60it/s]

Loss = 4.6794939041137695 


 98%|█████████▊| 9811/10000 [35:55<00:42,  4.48it/s]

Loss = 4.679078102111816 


 98%|█████████▊| 9821/10000 [35:57<00:39,  4.57it/s]

Loss = 4.678713798522949 


 98%|█████████▊| 9831/10000 [36:00<00:37,  4.55it/s]

Loss = 4.678168296813965 


 98%|█████████▊| 9841/10000 [36:02<00:34,  4.56it/s]

Loss = 4.677452564239502 


 99%|█████████▊| 9851/10000 [36:04<00:32,  4.55it/s]

Loss = 4.67694091796875 


 99%|█████████▊| 9861/10000 [36:06<00:30,  4.56it/s]

Loss = 4.676894664764404 


 99%|█████████▊| 9871/10000 [36:08<00:28,  4.50it/s]

Loss = 4.676753044128418 


 99%|█████████▉| 9881/10000 [36:11<00:26,  4.55it/s]

Loss = 4.676355838775635 


 99%|█████████▉| 9891/10000 [36:13<00:23,  4.55it/s]

Loss = 4.675568103790283 


 99%|█████████▉| 9901/10000 [36:15<00:21,  4.57it/s]

Loss = 4.674842357635498 


 99%|█████████▉| 9911/10000 [36:17<00:19,  4.58it/s]

Loss = 4.6743621826171875 


 99%|█████████▉| 9921/10000 [36:19<00:17,  4.52it/s]

Loss = 4.673769474029541 


 99%|█████████▉| 9931/10000 [36:21<00:15,  4.53it/s]

Loss = 4.673242092132568 


 99%|█████████▉| 9941/10000 [36:24<00:12,  4.58it/s]

Loss = 4.672764778137207 


100%|█████████▉| 9951/10000 [36:26<00:10,  4.53it/s]

Loss = 4.672128200531006 


100%|█████████▉| 9961/10000 [36:28<00:08,  4.54it/s]

Loss = 4.671752452850342 


100%|█████████▉| 9971/10000 [36:30<00:06,  4.53it/s]

Loss = 4.671277046203613 


100%|█████████▉| 9981/10000 [36:32<00:04,  4.55it/s]

Loss = 4.670989036560059 


100%|█████████▉| 9991/10000 [36:35<00:01,  4.54it/s]

Loss = 4.670675754547119 


100%|██████████| 10000/10000 [36:37<00:00,  4.55it/s]


## Eval

In [None]:
@tf.function(experimental_compile=True)
def fwd_only(features, labels):
  (llh, logits, pred_ids), _ = model(features, target_ids=labels,
                                       training=False)
  return llh, logits, pred_ids

In [None]:
eval_input_fn = run_summarization.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        max_decoder_length=FLAGS.max_decoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=False)
eval_dataset = eval_input_fn({'batch_size': 8})

DatasetNotFoundError: ignored

In [None]:
eval_llh = tf.keras.metrics.Mean(name='eval_llh')

for ex in tqdm(eval_dataset, position=0):
  llh, logits, pred_ids = fwd_only(ex[0], ex[1])
  eval_llh(llh)
print('Log Likelihood = {}'.format(eval_llh.result().numpy()))

### Print predictions

In [None]:
tokenizer = tft.SentencepieceTokenizer(
        model=tf.io.gfile.GFile(FLAGS.vocab_model_file, "rb").read())

In [None]:
_, _, pred_ids = fwd_only(ex[0], ex[1])



Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))


In [None]:
print('Context:\n {}\n\n Predicted question:\n {}\n\n Ground truth question:\n {}\n\n'.format(
    tokenizer.detokenize(ex[0]),
    tokenizer.detokenize(pred_ids),
    tokenizer.detokenize(ex[1])))

Context:
 [b'it regards its target countries as the middle east but adopts the convention of calling them the near east to be in conformance with the practices of the state department .'
 b'the football team has won 13 sec championships and 25 bowls , including four sugar bowls , three cotton bowls , an orange bowl and a fiesta bowl .']

 Predicted question:
 [b'what is the name of the ?' b'what is the name of the ?']

 Ground truth question:
 [b'what is wineps target countries as ?'
 b'how many college football bowl championships have the tennessee volunteers won ?']


