In [1]:
import os
# os.chdir('/home/s3/hyeryung/mucoco')
os.chdir('/data/hyeryung/mucoco')

In [2]:
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM

In [3]:
import torch
import torch.nn.functional as F
from new_module.losses import BaseLoss, register_loss

In [5]:
device='cuda:7'

# GPT2

In [5]:
# model=AutoModel.from_pretrained('gpt2-large',cache_dir='/shared/s3/lab07/hyeryung/hf_cache')
model=AutoModelForCausalLM.from_pretrained('gpt2-large',cache_dir='/data/hyeryung/hf_cache')

In [6]:
model=model.to(device)
model=model.eval()

In [7]:
tokenizer=AutoTokenizer.from_pretrained('gpt2-large')
tokenizer.pad_token_id =tokenizer.eos_token_id

In [8]:
prompt='abc'
gens=['dsxe','sdvbfe']

In [9]:
num_samples=len(gens); print(num_samples)

2


In [10]:
prompt_enc=tokenizer.encode_plus(prompt,add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(device)

In [11]:
prompt_enc['input_ids']=prompt_enc['input_ids'].expand(num_samples,-1)

In [12]:
prompt_enc['attention_mask']=prompt_enc['attention_mask'].expand(num_samples,-1)

In [13]:
prompt_enc

{'input_ids': tensor([[39305],
        [39305]], device='cuda:1'), 'attention_mask': tensor([[1],
        [1]], device='cuda:1')}

In [14]:
gens_enc=tokenizer.batch_encode_plus(gens, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(device)

In [15]:
gens_enc

{'input_ids': tensor([[ 9310, 27705, 50256, 50256],
        [21282,    85,    65,  5036]], device='cuda:1'), 'attention_mask': tensor([[1, 1, 0, 0],
        [1, 1, 1, 1]], device='cuda:1')}

In [16]:
input_tokens = torch.cat([prompt_enc.input_ids, gens_enc.input_ids], dim=1)

In [17]:
attention_masks = torch.cat([prompt_enc.attention_mask, gens_enc.attention_mask], dim=1)

In [18]:
model_output = model(input_ids=input_tokens,
                    attention_mask=attention_masks)

In [19]:
model_output.keys()

odict_keys(['logits', 'past_key_values'])

In [20]:
lm_logits = model_output[0][:, prompt_enc.input_ids.size(1)-1:-1, :]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)

In [21]:
lm_logprobs.shape

torch.Size([2, 4, 50257])

In [22]:
gens_enc.input_ids.shape

torch.Size([2, 4])

In [23]:
loss = F.nll_loss(lm_logprobs.permute(0,2,1), gens_enc.input_ids, reduction="none")

In [24]:
loss = loss * gens_enc.attention_mask

In [25]:
loss.shape

torch.Size([2, 4])

In [26]:
loss = loss.sum(dim=-1)

In [27]:
loss

tensor([19.280, 26.730], device='cuda:1', grad_fn=<SumBackward1>)

In [28]:
loss /= gens_enc.attention_mask.sum(dim=-1) ## 이렇게 하는게 맞을지 조금 고민이다.

In [29]:
loss

tensor([9.640, 6.683], device='cuda:1', grad_fn=<DivBackward0>)

In [45]:
from typing import List

class GPT2Loss(BaseLoss):

    def __init__(self, model, tokenizer, args):
        super().__init__() 

        self.model = model
        self.tokenizer = tokenizer 
        self.args = args
        self.device = model.device
        
        self.eos_token_id = self.tokenizer.eos_token_id    
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.model.config.pad_token_id = self.model.config.eos_token_id # to remove the warning
    
    def compute_gold_loss(self, prompt:str, predictions:List[str], **kwargs):
        '''
        given a discrete target output, this will compute the loss wrt to it. Useful in debugging
        '''
        # prompt = self.tokenizer.batch_encode_plus(prompt, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(self.device).long()
        # # assuming batch size of 1 (prediction is a string instance.)
        # prediction = self.tokenizer.batch_encode_plus(prediction, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(self.device).long()
        # input_tokens = torch.cat([prompt.input_ids, prediction.input_ids], dim=1)
        # model_output = self.model(input_tokens)

        # lm_logits = model_output[0][:, prompt.size(1)-1:-1, :]
        # lm_logprobs = F.log_softmax(lm_logits, dim=-1)

        # loss = F.nll_loss(lm_logprobs.squeeze(0), prediction.squeeze(0), reduction="none").sum(dim=-1)
        
        # if self.args.length_normalize:
        #     loss /= lm_logprobs.size(1)

        # return loss
        num_samples = len(predictions)
        prompt_enc=self.tokenizer.encode_plus(prompt,add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(self.device)
        prompt_enc['input_ids']=prompt_enc['input_ids'].expand(num_samples,-1)
        prompt_enc['attention_mask']=prompt_enc['attention_mask'].expand(num_samples,-1)
    
        predictions_enc=self.tokenizer.batch_encode_plus(predictions, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(self.device)

        input_tokens = torch.cat([prompt_enc.input_ids, predictions_enc.input_ids], dim=1)
        attention_masks = torch.cat([prompt_enc.attention_mask, predictions_enc.attention_mask], dim=1)
        with torch.no_grad():
            model_output = self.model(input_ids=input_tokens,
                                attention_mask=attention_masks)
        lm_logits = model_output[0][:, prompt_enc.input_ids.size(1)-1:-1, :]
        lm_logprobs = F.log_softmax(lm_logits, dim=-1)

        # input dimensions : (N, C, d1), (N, d1)
        loss = F.nll_loss(lm_logprobs.permute(0,2,1), predictions_enc.input_ids, reduction="none")
        loss = loss * predictions_enc.attention_mask # make losses for pad tokens 0.
        
        loss = loss.sum(dim=-1)
        if self.args.length_normalize:
            loss /= predictions_enc.attention_mask.sum(dim=-1) 
        return loss # dimensions: (N)
    
    def generate(self, input_ids, **kwargs):
        prepared_input = self._prepare_input_for_generation(input_ids, **kwargs)
        output = self.model.generate(**prepared_input)
        
        return self._postprocess_output(prepared_input, output)

    def _prepare_input_for_generation(self, input_ids, **kwargs):
        max_output_length = getattr(self.args, "max_output_length", 10)
        batch_size = input_ids.size(0)
        #batch size is 1, padding and stuff needs to be modified for this to work for larger batches

        return_object = {'input_ids': input_ids,
                'max_length': input_ids.size(1) + max_output_length,
                'do_sample': True,
                'temperature': self.args.AR_temperature,
                'top_k': self.args.AR_top_k,
                'top_p': self.args.AR_top_p,
                'num_return_sequences': kwargs.get('num_return_sequences', 1)}
   
        return return_object
    
    def _postprocess_output(self, prepared_input, output_ids):
        return output_ids[:, prepared_input['input_ids'].size(1):, ]

In [46]:
class Args:
    length_normalize = False

gpt2_loss = GPT2Loss(model,tokenizer,Args())

In [47]:
gpt2_loss.compute_gold_loss(prompt, gens)

tensor([19.280, 26.730], device='cuda:1')

# Classification no prefix

In [49]:
from transformers import AutoModelForSequenceClassification

In [58]:
ckpt_path = '/data/hyeryung/loc_edit/models/roberta-base-jigsaw-toxicity-classifier-energy-training/step_1000_best_checkpoint/'
model = AutoModelForSequenceClassification.from_pretrained(ckpt_path)
model = model.eval()
model = model.to(device)

In [51]:
tokenizer = AutoTokenizer.from_pretrained(ckpt_path)

In [60]:
prompt='abc'
prediction=['dsxe','sdvbfe']

In [54]:
eos_token_id = tokenizer.eos_token_id

In [55]:
label_id = 0

In [61]:
prediction = tokenizer.batch_encode_plus(prediction, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True).to(device)
        
# eos = torch.empty((prediction.size(0), 1)).long().to(device).fill_(eos_token_id)
# prediction = torch.cat([prediction, eos, eos], dim=1)

model_output = model(**prediction)
lm_logits = model_output[0]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)
loss = -lm_logprobs[:, label_id]

In [66]:
loss

tensor([0.142, 0.013], device='cuda:1', grad_fn=<NegBackward0>)

In [67]:
class ClassificationLogProbLoss(BaseLoss):

    def __init__(self, model, tokenizer, args):
        super().__init__() 
        
        self.model = model 
        self.tokenizer = tokenizer 
        self.args = args
        self.device = model.device

        self.bos_token_id = self.tokenizer.bos_token_id
        self.eos_token_id = self.tokenizer.eos_token_id    

    def compute_gold_loss(self, prompt:str, prediction:List[str], label_id, **kwargs):
        '''
        given a discrete target output, this will compute the loss wrt to it. Useful in debugging
        '''

        # prediction = self.tokenizer.encode(prediction, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True).to(self.device).long()
        
        # eos = torch.empty((prediction.size(0), 1)).long().to(self.device).fill_(self.eos_token_id)
        # prediction = torch.cat([prediction, eos, eos], dim=1)
    
        # model_output = self.model(prediction)
        # lm_logits = model_output[0]
        # lm_logprobs = F.log_softmax(lm_logits, dim=-1)
        # loss = -lm_logprobs[:, label_id]
        # return loss
        
        prediction = self.tokenizer.batch_encode_plus(prediction, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True).to(self.device)
        model_output = self.model(**prediction)
        lm_logits = model_output[0]
        lm_logprobs = F.log_softmax(lm_logits, dim=-1)
        loss = -lm_logprobs[:, label_id]
        return loss


In [68]:
clsf_loss = ClassificationLogProbLoss(model, tokenizer, {})

In [69]:
clsf_loss.compute_gold_loss(prompt, gens, 0)

tensor([0.142, 0.013], device='cuda:1', grad_fn=<NegBackward0>)

: 

# Check for discrepancy

In [2]:
#!/usr/bin/env python
# coding: utf-8

from itertools import chain
import math
import argparse
import json
import logging
import os
import time
os.chdir('/data/hyeryung/mucoco')
import numpy as np
import pandas as pd
import torch
import transformers
import torch.nn.functional as F
from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer

import new_module.losses as lossbuilder
import new_module.losses_old as lossbuilder_old
import wandb
# from new_module.decode_utils import (
#     beam_rerank_v0,
#     beam_rerank_v1,
#     beam_rerank_v2,
#     combi_rerank,
# )
# from new_module.new_decode_utils import get_beam_hypotheses, get_combi_hypotheses, final_reranking
from new_module.evaluate_wandb import evaluate_main
from new_module.locate.new_locate_utils import LocateMachine
from new_module.utils.robertacustom import RobertaCustomForSequenceClassification

logging.basicConfig(level=logging.DEBUG, format="%(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get("LOGGING_LEVEL", logging.DEBUG))
import joblib
config = joblib.load('config.pkl')

In [3]:
from typing import Tuple, List

In [4]:
class dummyArgs:
    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

build_loss_args = dummyArgs(**config["build_loss_dict"])

## load data
if (config["task"] == "toxicity") or (config["task"] == "sentiment"):
    source_dataset = [
        json.loads(l)[config["jsonl_primary_key"]][config["jsonl_secondary_key"]]
        for l in open(config["source_data"])
    ]
    generation_dataset = [
        json.loads(l)["generations"] for l in open(config["source_data"])
    ]
elif (config["task"] == "formality") or (config["task"] == "sentiment-lewis-compr"):
    with open(config["source_data"], "r") as f:
        generation_dataset = [line.rstrip('\n') for line in f.readlines()]
    source_dataset = ["" for l in generation_dataset]

## load tokenizer, models, define losses
name2tokenizer = {}
name2model = {}
name2config = {}
loss2tokenizer = {}
embed_luts = []

for i, model_path in enumerate(config["model_paths"]):
    if (
        model_path not in name2model
    ):  # making sure we are not loading the model twice in case some constraints use the same model.
        try:
            name2tokenizer[config["tokenizer_paths"][i]] = AutoTokenizer.from_pretrained(
                config["tokenizer_paths"][i],
                cache_dir=config["cache_dir"],
                use_fast=True,
            )
        except:
            name2tokenizer[config["tokenizer_paths"][i]] = AutoTokenizer.from_pretrained(
                config["tokenizer_paths"][i],
                cache_dir=config["cache_dir"],
                use_fast=False,
            )

        name2config[model_path] = AutoConfig.from_pretrained(
            model_path, cache_dir=config["cache_dir"]
        )

        if config["model_types"][i] == "RobertaCustomForSequenceClassification":
            name2model[model_path] = lossbuilder.ModelWrapper(
                RobertaCustomForSequenceClassification.from_pretrained(
                    model_path,
                    config=name2config[model_path],
                    cache_dir=config["cache_dir"],
                )
            )
        else:
            name2model[model_path] = lossbuilder.ModelWrapper(
                getattr(transformers, config["model_types"][i]).from_pretrained(
                    model_path,
                    config=name2config[model_path],
                    cache_dir=config["cache_dir"],
                )
            )
        name2model[model_path].eval()
        name2model[model_path].to(config['device'])

    input_embeds = name2model[model_path].get_input_embeddings()
    if isinstance(input_embeds, torch.nn.Sequential):
        input_embeds = input_embeds[0]
    embed_luts.append(input_embeds)

    if config["target_type"] == "embeds":
        embed_luts[-1].requires_grad = False

mlm_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
mlm = None if config["method"] == "mlm-beamsearch-v2" else AutoModelForMaskedLM.from_pretrained("roberta-base").to(config['device'])


Starting new HTTPS connection (1): huggingface.co:443


https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/generation_config.json HTTP/1.1" 200 0


50265


https://huggingface.co:443 "HEAD /roberta-base/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /roberta-base/resolve/main/config.json HTTP/1.1" 200 0


In [5]:

lossfns = []
for i, loss in enumerate(config["losses"]):
    lossfns.append(
        lossbuilder.build_loss(
            loss,
            name2model[config["model_paths"][i]],
            name2tokenizer[config["tokenizer_paths"][i]],
            build_loss_args,
        )
    )
    lossfns[i].tokenizer.add_special_tokens({"mask_token": mlm_tokenizer.mask_token})
    loss2tokenizer[loss] = lossfns[i].tokenizer


In [6]:
loss_weights = [1 - config['closs_weight'], config['closs_weight']]
# for text_id in range(len(source_dataset))[resume_idx:]:
text_id = 3
source_text = source_dataset[text_id]
if source_text == "":
    source_text = lossfns[0].tokenizer.bos_token

if (config["task"] == "toxicity") or (config["task"] == "sentiment"):
    AR_prediction_all = [x["text"] for x in generation_dataset[text_id]]
    # predicted_batches = [x["tokens"] for x in generation_dataset[text_id]]
    # predicted_batches = [
    #     torch.tensor([x], dtype=torch.long, device=config["device"])
    #     for x in predicted_batches
    # ]
    
elif (config["task"] == "formality") or (
    config["task"] == "sentiment-lewis-compr"
):
    AR_prediction_all = [generation_dataset[text_id]]

curr_num_samples = len(AR_prediction_all)

In [7]:
lossfns_old = []
loss2tokenizer_old = {}
for i, loss in enumerate(config["losses"]):
    lossfns_old.append(
        lossbuilder_old.build_loss(
            loss,
            name2model[config["model_paths"][i]],
            name2tokenizer[config["tokenizer_paths"][i]],
            build_loss_args,
        )
    )
    lossfns_old[i].tokenizer.add_special_tokens({"mask_token": mlm_tokenizer.mask_token})
    loss2tokenizer_old[loss] = lossfns_old[i].tokenizer

In [8]:
curr_loss = torch.zeros(len(AR_prediction_all)).to(config['device'])
logging_loss = torch.zeros((len(AR_prediction_all),2)).to(config['device'])
edit_yn = torch.ones(len(AR_prediction_all), dtype=torch.bool).to(config['device'])
        
for lossid, lossname in enumerate(config["losses"]):
    with torch.no_grad():
        lossvalue = lossfns[lossid].compute_gold_loss(
            source_text, AR_prediction_all,
            label_id=config['target_label_ids'][lossid],
        )
        torch.cuda.empty_cache()
    curr_loss += loss_weights[lossid] * lossvalue
    logging_loss[:, lossid] = lossvalue.clone()

In [10]:
curr_loss_old = [] #torch.zeros(len(AR_prediction_all)).to(config['device'])
logging_loss_old = [] #torch.zeros((len(AR_prediction_all),2)).to(config['device'])
        
for sample_id in range(len(AR_prediction_all)):
    curr_loss_old_ = 0.0
    logging_loss_old_ = []
    for lossid, lossname in enumerate(config["losses"]):
        with torch.no_grad():
            lossvalue = lossfns_old[lossid].compute_gold_loss(
                source_text, AR_prediction_all[sample_id],
                label_id=config['target_label_ids'][lossid],
            )
            torch.cuda.empty_cache()
        curr_loss_old_ += loss_weights[lossid] * lossvalue.item()
        logging_loss_old_.append(lossvalue.item())
    curr_loss_old.append(curr_loss_old_)
    logging_loss_old.append(logging_loss_old_)

curr_loss_old = torch.Tensor(curr_loss_old).float()
logging_loss_old = torch.Tensor(logging_loss_old).float()

In [11]:
curr_loss.tolist()

[6.429828643798828, 17.49304962158203]

In [12]:
logging_loss.tolist()

[[62.96187210083008, 0.1484900563955307],
 [163.65499877929688, 1.2528338432312012]]

In [13]:
curr_loss_old.tolist()

[6.429826259613037, 17.493051528930664]

In [14]:
logging_loss_old.tolist()

[[62.96185302734375, 0.1484900563955307],
 [163.65501403808594, 1.2528332471847534]]

값이 미묘하게 달라서, 디버깅 -> 확인해보니, batch로 처리하면서 0.00001 이하로 logit 값 자체에 차이가 발생한다.

gpt2

In [111]:
predictions = AR_prediction_all #[AR_prediction_all[0]]
prompt = source_text

num_samples = len(predictions)
prompt_enc=mlm_tokenizer.encode_plus(prompt,add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(config['device'])
prompt_enc['input_ids']=prompt_enc['input_ids'].expand(num_samples,-1)
prompt_enc['attention_mask']=prompt_enc['attention_mask'].expand(num_samples,-1)


predictions_enc=mlm_tokenizer.batch_encode_plus(predictions, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(config['device'])
input_tokens = torch.cat([prompt_enc.input_ids, predictions_enc.input_ids], dim=1)
attention_masks = torch.cat([prompt_enc.attention_mask, predictions_enc.attention_mask], dim=1)

with torch.no_grad():
    model_output = mlm(input_ids=input_tokens,
                        attention_mask=attention_masks)
lm_logits = model_output[0][:, prompt_enc.input_ids.size(1)-1:-1, :]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)
# input dimensions : (N, C, d1), (N, d1)
loss = F.nll_loss(lm_logprobs.permute(0,2,1), predictions_enc.input_ids, reduction="none")
loss_old = loss.clone()
loss = loss * predictions_enc.attention_mask # make losses for pad tokens 0.

In [104]:
loss.tolist()[0][:19]

[30.612892150878906,
 26.147663116455078,
 13.221147537231445,
 20.680118560791016,
 20.39809226989746,
 26.927724838256836,
 27.40665054321289,
 30.52690315246582,
 25.79997444152832,
 28.905277252197266,
 31.513980865478516,
 24.071300506591797,
 14.508750915527344,
 16.083837509155273,
 27.41469955444336,
 21.93108558654785,
 21.88882827758789,
 22.033344268798828,
 21.780370712280273]

In [None]:
lm_logprobs[0][:19].tolist()[0][:10]

[-35.62112808227539,
 -35.705074310302734,
 -18.23155403137207,
 -35.45712661743164,
 -23.16324234008789,
 -20.84800148010254,
 -19.243375778198242,
 -21.327049255371094,
 -20.555192947387695,
 -19.96302032470703]

In [105]:
predictions = [AR_prediction_all[0]]
prompt = source_text

num_samples = len(predictions)
prompt_enc=mlm_tokenizer.encode_plus(prompt,add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(config['device'])
prompt_enc['input_ids']=prompt_enc['input_ids'].expand(num_samples,-1)
prompt_enc['attention_mask']=prompt_enc['attention_mask'].expand(num_samples,-1)


predictions_enc=mlm_tokenizer.batch_encode_plus(predictions, add_special_tokens=False, return_tensors="pt", padding=True, truncation=True).to(config['device'])
input_tokens = torch.cat([prompt_enc.input_ids, predictions_enc.input_ids], dim=1)
attention_masks = torch.cat([prompt_enc.attention_mask, predictions_enc.attention_mask], dim=1)

with torch.no_grad():
    model_output = mlm(input_ids=input_tokens,
                        attention_mask=attention_masks)
lm_logits = model_output[0][:, prompt_enc.input_ids.size(1)-1:-1, :]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)
# input dimensions : (N, C, d1), (N, d1)
loss = F.nll_loss(lm_logprobs.permute(0,2,1), predictions_enc.input_ids, reduction="none")
loss_old = loss.clone()
loss = loss * predictions_enc.attention_mask # make losses for pad tokens 0.

In [None]:
loss.tolist()

[[30.612899780273438,
  26.147655487060547,
  13.221145629882812,
  20.68010139465332,
  20.39809799194336,
  26.9277400970459,
  27.406665802001953,
  30.526931762695312,
  25.799951553344727,
  28.905282974243164,
  31.51397705078125,
  24.071271896362305,
  14.508733749389648,
  16.083799362182617,
  27.414684295654297,
  21.931074142456055,
  21.88882064819336,
  22.03339195251465,
  21.78038787841797]]

In [110]:
lm_logprobs_no_batch = lm_logprobs.detach().clone()

In [None]:
lm_logprobs_no_batch.tolist()[0][0][:10]

[-35.62112045288086,
 -35.7050666809082,
 -18.2315616607666,
 -35.457122802734375,
 -23.163236618041992,
 -20.847986221313477,
 -19.243366241455078,
 -21.32706069946289,
 -20.555192947387695,
 -19.963027954101562]

In [116]:
lm_logprobs[0][:19] == lm_logprobs_no_batch

tensor([[[False, False, False,  ..., False, False, False],
         [ True, False, False,  ..., False, False,  True],
         [False,  True,  True,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')

classification_no_prefix

In [139]:
prediction = predictions
prediction = [x + lossfns[1].tokenizer.eos_token + lossfns[1].tokenizer.eos_token for x in prediction]
prediction = lossfns[1].tokenizer.batch_encode_plus(prediction, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True).to(config['device'])
model_output = lossfns[1].model(**prediction)
lm_logits = model_output[0]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)
loss = -lm_logprobs[:, config['target_label_ids'][1]]

In [134]:
model_output[0].tolist()

[[1.2920401096343994, -1.2640713453292847],
 [0.5390777587890625, -0.5673075914382935]]

In [141]:
lm_logprobs.tolist()

[[-0.1484900563955307, -1.980563759803772],
 [-1.2528338432312012, -0.33644387125968933]]

In [142]:
prediction = [predictions[0]]
prediction = [x + lossfns[1].tokenizer.eos_token + lossfns[1].tokenizer.eos_token for x in prediction]
prediction = lossfns[1].tokenizer.batch_encode_plus(prediction, add_special_tokens=True, return_tensors="pt", padding=True, truncation=True).to(config['device'])
model_output = lossfns[1].model(**prediction)
lm_logits = model_output[0]
lm_logprobs = F.log_softmax(lm_logits, dim=-1)
loss = -lm_logprobs[:, config['target_label_ids'][1]]

In [143]:
model_output[0].tolist()

[[0.9087677001953125, -0.9233061075210571]]

In [144]:
lm_logprobs.tolist()

[[-0.1484900563955307, -1.9805638790130615]]