In [20]:
import torch
from torch import nn
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import logging, os, argparse

import t5_model, t5_dataset
from copy import deepcopy
from transformers import AdamW

import datasets

In [2]:
class ResMLP(torch.nn.Module):
    def __init__(self, bottleneck_size,
                 module_type='MLP1',
                 emb_dimension=512,
                 residual=True,
                 layer_norm=True):
        super().__init__()
        if module_type=='MLP1':
            if layer_norm:
                self.module = nn.Sequential(
                    nn.Linear(emb_dimension, bottleneck_size),
                    #nn.ReLU(),
                    nn.Tanh(),
                    nn.Linear(bottleneck_size, emb_dimension),
                    nn.LayerNorm(emb_dimension),
                )
            else:
                self.module = nn.Sequential(
                    nn.Linear(emb_dimension, bottleneck_size),
                    #nn.ReLU(),
                    nn.Tanh(),
                    nn.Linear(bottleneck_size, emb_dimension),
                )

        elif module_type=='MLP2':
            self.module = nn.Sequential(
                nn.Linear(emb_dimension, bottleneck_size),
                nn.ReLU(),
                nn.Linear(bottleneck_size, bottleneck_size),
                nn.Tanh(),
                nn.Linear(bottleneck_size, emb_dimension),
                nn.LayerNorm(emb_dimension),
            )
        self.residual = residual
        if self.residual:
            print('Using skip connection in MLP')

    def forward(self, inputs):
        if self.residual:
            return self.module(inputs) + inputs
        else:
            return self.module(inputs)


In [3]:
def train_step_lester(trainer, batch, prompt, embed_prompt=False):
    model = trainer.model
    if embed_prompt:
        mlp = model.mlp
    tokenizer = trainer.tokenizer
    prefix_MLP = trainer.prefix_MLP
    prefix_len = trainer.prefix_len

    batch = {k: batch[k].to(trainer.device) for k in batch}
    lm_labels = batch["target_ids"]
    lm_labels[lm_labels[:, :] == tokenizer.pad_token_id] = -100

    inputs_embeds = model.encoder.embed_tokens(batch["source_ids"])
    #inputs_embeds[:, -prefix_len:, :] = prefix_MLP(inputs_embeds[:, -prefix_len:, :].clone().to(self.device))
    k = inputs_embeds.shape[0]
    if embed_prompt:
        #prompt = model.prompt_layer_norm(prompt)
        prompt = mlp(prompt)
    inputs_embeds = torch.concat([prompt.repeat(k, 1, 1), 
                                  inputs_embeds], axis=1)[:,:512]
    
    source_mask_updated = torch.concat( (batch["source_mask"][0][0].repeat(k,5), 
                                         batch["source_mask"]), axis=1)[:,:512]
    #source_mask_updated = batch["source_mask"][0][0].repeat(k,512)
    
    encoder_outputs = model.encoder(
                            #input_ids=batch["source_ids"],
                            attention_mask=source_mask_updated, #batch["source_mask"],
                            #labels=lm_labels,
                            #decoder_attention_mask=batch['target_mask']
                            #input_ids=input_ids,
                            #attention_mask=attention_mask,
                            inputs_embeds=inputs_embeds,
                            head_mask=None, #head_mask,
                            output_attentions=None, #output_attentions,
                            output_hidden_states=None, #output_hidden_states,
                            return_dict=None, #return_dict,
                        )

    outputs = model(
        input_ids=batch["source_ids"],
        attention_mask=source_mask_updated, #batch["source_mask"],
        labels=lm_labels,
        decoder_attention_mask=batch['target_mask'],
        encoder_outputs=encoder_outputs,
    )
    loss = outputs[0]

    return loss



def validate_lester(trainer, dataloader_val, task,
                    class_keys=['equivalent', 'different'],
                    max_length=2, print_outpust=False,
                    embed_prompt=False):
    model = trainer.model
    if embed_prompt:
        mlp = model.mlp
        #mlp.eval()  
    prompt = model.prompt
    tokenizer = trainer.tokenizer
    
    #prefix_len = trainer.prefix_len
    #N = model.encoder.embed_tokens.weight.shape[0] - prefix_len
    model.eval()

    corr, total = 0, 0
    try:
        metric = datasets.load_metric('glue', task)
    except:
        metric = datasets.load_metric('accuracy')


    for i, batch in enumerate(tqdm(dataloader_val)):
        batch = {k:batch[k].to(trainer.device) for k in batch}
        # batch['source_ids'] = torch.concat([prefix[:batch['source_ids'].shape[0]],
        #                                     batch['source_ids'].to(self.device)], axis=1)[:,:512]

        inputs_embeds = model.encoder.embed_tokens(batch["source_ids"]).to(trainer.device)
        #inputs_embeds[:, -prefix_len:, :] = prefix_MLP(inputs_embeds[:, -prefix_len:, :].clone().to(self.device))
        k = inputs_embeds.shape[0]
        
        if embed_prompt:
            #prompt = model.prompt_layer_norm(prompt)
            prompt = mlp(prompt)
        
        inputs_embeds = torch.concat([prompt.repeat(k, 1, 1),
                                      inputs_embeds], axis=1)[:,:512]
        
        source_mask_updated = torch.concat( (batch["source_mask"][0][0].repeat(k,5), 
                                             batch["source_mask"]), axis=1)[:,:512]
        #source_mask_updated = batch["source_mask"][0][0].repeat(k,512)
        

        encoder_outputs = model.encoder(
                                #input_ids=batch["source_ids"],
                                #attention_mask=batch["source_mask"],
                                attention_mask=source_mask_updated,
            
                                #labels=lm_labels,
                                #decoder_attention_mask=batch['target_mask']
                                #input_ids=input_ids,
                                #attention_mask=attention_mask,
                                inputs_embeds=inputs_embeds,
                                head_mask=None, #head_mask,
                                output_attentions=None, #output_attentions,
                                output_hidden_states=None, #output_hidden_states,
                                return_dict=None, #return_dict,
                            )

        outs = model.generate(
            input_ids=batch["source_ids"],
            #attention_mask=batch["source_mask"],
            attention_mask=source_mask_updated,
            #labels=lm_labels,
            #decoder_attention_mask=batch['target_mask'],
            encoder_outputs=encoder_outputs,
            max_length=max_length,
        )
        dec = [tokenizer.decode(ids) for ids in outs]

        texts = [tokenizer.decode(ids) for ids in batch['source_ids']]
        targets = [tokenizer.decode(ids) for ids in batch['target_ids']]

        #print(dec, texts, targets)
        corr += np.sum([trainer.process_str(x)==trainer.process_str(y) for x,y in zip(dec, targets)])
        total += batch['source_ids'].shape[0]

        if i<10 and print_outpust:
            print(dec)
            print(targets)

        # CHANGE FOR MULTI CLASS!!!
        metric.add_batch(predictions=[1 if class_keys[1] in x else 0 for x in dec],
                         references=[1 if class_keys[1] in x else 0 for x in targets])

        # computing loss
#         lm_labels = batch["target_ids"]
#         lm_labels[lm_labels[:, :] == self.tokenizer.pad_token_id] = -100

#         outputs = model(
#             input_ids=batch["source_ids"],
#             attention_mask=batch["source_mask"],
#             labels=lm_labels,
#             decoder_attention_mask=batch['target_mask'],
#             encoder_outputs=encoder_outputs,
#         )
#         loss = outputs[0].detach().cpu().numpy()
#         loss_total.append(loss)


    return corr/total, metric.compute()#, np.mean(loss_total)

In [4]:
def get_prompt(trainer, prompt_len):
    model = trainer.model
    N = model.encoder.embed_tokens.weight.shape[0]
    prompt_weigths = []

    for i in range(prompt_len):
        with torch.no_grad():
            j = np.random.randint(N)
            #j = 21
            w = deepcopy(model.encoder.embed_tokens.weight[j].detach().cpu().numpy())
            prompt_weigths.append(w)
    prompt_weigths = np.array(prompt_weigths)
    return prompt_weigths


In [5]:
# save_path = os.path.join(args.save_dir, args.save_name)
#     if not os.path.exists(save_path):
#         os.mkdir(save_path)

TrainerT5= t5_model.PromptModelT5(model_name='t5-small',
                                  prefix_len=0,
                                  freeze_weights=True,
                                  freeze_except='xxxshared', # freeze all weights
                                  lr=0.3,
                                  weight_decay=0.00,
                                  prompt_name='PRE',
                                  prefix_MLP='None', # using custom prefix MLP
                                  #mlp_bottleneck=args.mlp_bottleneck,
                                  #weight_decay_mlp=0.0,
                                  #mlp_lr=args.lr_mlp,
                                  #mlp_layer_norm=args.mlp_layer_norm==1,
                                  early_stopping=False,
                                  #opt=args.optimizer,
                                 )

Freezing weights
Using AdamW




In [6]:
prompt_weigths = get_prompt(TrainerT5, prompt_len=5)
TrainerT5.model.prompt = nn.Parameter(torch.tensor(prompt_weigths, requires_grad=True))
print('created prompt: ', prompt_weigths.shape)

created prompt:  (5, 512)


In [7]:
TrainerT5.model.prompt

Parameter containing:
tensor([[ -4.9688,  -3.1250,  17.8750,  ...,   3.6562,  -7.2812,  32.0000],
        [ -7.9375,  -6.5000, -59.5000,  ..., -66.5000,  24.2500,  17.0000],
        [-52.0000, -15.5000,  18.5000,  ..., -35.0000,  20.6250,   6.6250],
        [ 15.0000, -34.7500,  15.4375,  ..., -30.3750,  56.5000,  16.3750],
        [ 15.1875,  -5.1562, -33.2500,  ...,   8.2500, -58.7500,  28.2500]],
       requires_grad=True)

In [8]:
prompt_weigths

array([[ -4.96875,  -3.125  ,  17.875  , ...,   3.65625,  -7.28125,
         32.     ],
       [ -7.9375 ,  -6.5    , -59.5    , ..., -66.5    ,  24.25   ,
         17.     ],
       [-52.     , -15.5    ,  18.5    , ..., -35.     ,  20.625  ,
          6.625  ],
       [ 15.     , -34.75   ,  15.4375 , ..., -30.375  ,  56.5    ,
         16.375  ],
       [ 15.1875 ,  -5.15625, -33.25   , ...,   8.25   , -58.75   ,
         28.25   ]], dtype=float32)

In [15]:
optimizer_grouped_parameters = [
    {
        "params": [p for n, p in TrainerT5.model.named_parameters()],
        "weight_decay": 1e-5,
        "lr": 0.3,
    }
]
TrainerT5.optimizer = AdamW(optimizer_grouped_parameters, eps=1e-8)
TrainerT5.optimizer

AdamW (
Parameter Group 0
    betas: (0.9, 0.999)
    correct_bias: True
    eps: 1e-08
    lr: 0.3
    weight_decay: 1e-05
)

In [16]:
# for name, p in TrainerT5.model.named_parameters():
#     if p.requires_grad:
#         print(p, name)

In [17]:
pytorch_total_params = sum(p.numel() for p in TrainerT5.model.parameters() if p.requires_grad)
pytorch_total_params

2560

In [18]:
task = 'boolq'
target_len = 2
if task=='rte' or task=='mrpc': target_len=5

ds2 = t5_dataset.T5Dataset(TrainerT5.tokenizer, task)
dataloader_train = ds2.get_final_ds(task, 'train', batch_size=8, k=-1,
                                    target_len=target_len, prefix_list=[])

k_val = -1 #if (args.select_k_per_class==-1 or task in ['mrpc', 'rte']) else int(0.2*args.select_k_per_class)
dataloader_val = ds2.get_final_ds(task, 'validation',
                                  batch_size=8, k=k_val, return_test=False,
                                  target_len=target_len, prefix_list=[])

class_keys = ds2.task_to_labels[task]

Reusing dataset super_glue (/data/home/arazdai/.cache/huggingface/datasets/super_glue/boolq/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7)
Loading cached shuffled indices for dataset at /data/home/arazdai/.cache/huggingface/datasets/super_glue/boolq/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7/cache-20bba0ff54488a0d.arrow


  0%|          | 0/9427 [00:00<?, ?ex/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Reusing dataset super_glue (/data/home/arazdai/.cache/huggingface/datasets/super_glue/boolq/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7)
Loading cached shuffled indices for dataset at /data/home/arazdai/.cache/huggingface/datasets/super_glue/boolq/1.0.2/d040c658e2ddef6934fdd97deb45c777b6ff50c524781ea434e7219b56a428a7/cache-56923150e227aa8a.arrow


  0%|          | 0/3270 [00:00<?, ?ex/s]

In [22]:
# previous training script
batch_size =8

print('task = ', task)
model = TrainerT5.model
model.to('cuda')

embed_prompt = False

for epoch in range(50):

    model.train() 
    #mlp.train() 
        
    for i, batch in enumerate(tqdm(dataloader_train)):
        batch = {k:batch[k].to('cuda') for k in batch}
        #lim = batch['source_ids'].shape[0]
        loss = train_step_lester(TrainerT5, batch, TrainerT5.model.prompt, embed_prompt=embed_prompt)
        loss.backward()

        TrainerT5.optimizer.step()
        TrainerT5.optimizer.zero_grad()
        
    class_keys = ds2.task_to_labels[task]
    
    val_acc, val_f1 = validate_lester(TrainerT5, dataloader_val, task,
                                      embed_prompt=embed_prompt,
                                      class_keys=class_keys,
                                      max_length=target_len,
                                      #print_outputs=True
                                      ) # prompt tuning 5 
   
    print(epoch, '->', val_acc, val_f1)
    #print('train acc ->', train_acc, train_f1)
    

task =  boolq


  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

0 -> 0.6253822629969419 {'accuracy': 0.6253822629969419}


  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

1 -> 0.6275229357798165 {'accuracy': 0.6275229357798165}


  0%|          | 0/1179 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [21]:
val_acc, val_f1 = validate_lester(TrainerT5, dataloader_val, task,
                                      embed_prompt=embed_prompt,
                                      class_keys=class_keys,
                                      max_length=target_len,
                                      #print_outputs=True
                                      ) # prompt tuning 5 
val_acc, val_f1

  0%|          | 0/409 [00:00<?, ?it/s]

(0.6269113149847095, {'accuracy': 0.6269113149847095})

In [17]:
# after changing prompt2

task =  boolq


  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'true<

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'tr

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> false', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> false', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 't

  0%|          | 0/1179 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [16]:
val_acc, val_f1

(0.6275229357798165, 0.33297685)

In [9]:
print('task = ', task)
model = TrainerT5.model
model.to('cuda')

embed_prompt = False # NO PREFIX MLP
print("Using MLP? ", embed_prompt)

results_dict = {'acc': [], 'loss': []}

for epoch in range(20):
    print(epoch)
    model.train()
    #mlp.train()

    for i, batch in enumerate(tqdm(dataloader_train)):
        batch = {k:batch[k].to('cuda') for k in batch}
        loss = train_step_lester(TrainerT5, batch, TrainerT5.model.prompt, embed_prompt=embed_prompt)
        loss.backward()

        TrainerT5.optimizer.step()
        TrainerT5.optimizer.zero_grad()

    acc, loss = validate_lester(TrainerT5, dataloader_val,
                                task, embed_prompt,
                                class_keys=class_keys,
                                max_length=target_len,
                                print_outputs=True) # prompt tuning 5
    results_dict['acc'].append(acc)
    results_dict['loss'].append(loss)
    print(epoch, '->', acc, loss)
    #print('train acc ->', train_acc, train_f1)

#     if save_path!=None and epoch%5==0:
#         np.save(os.path.join(save_path, 'results_dict.npy'), results_dict)

task =  boolq
Using MLP?  False
0


  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> false', '<pad> true', '<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> true', '<pad> false']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> false']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> false']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> false', '<pad> false', '<pad> false', '<pad> true', '<pad> false', '<pad> false', '<pad> false']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> false', '<pad> true', '<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> false', '<pad> true']
['false</s>'

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'true<

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> false', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 't

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'true<

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> false', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', '

KeyboardInterrupt: 

In [11]:
# with bug (prompt copied to prompt2)

task =  boolq
Using MLP?  False
0


  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'true</

  0%|          | 0/1179 [00:00<?, ?it/s]

  0%|          | 0/409 [00:00<?, ?it/s]

['<pad> true', '<pad> true', '<pad> true', '<pad> false', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'false</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'true</s>', 'false</s>', 'true</s>', 'true</s>', 'true</s>', 'true</s>', 'false</s>']
['<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true', '<pad> true']
['false</s>', 'false</s>', 'true<

  0%|          | 0/1179 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [10]:
print(epoch, '->', acc, loss)

0 -> 0.6220183486238532 0.33131248
