In [1]:
import torch
import torch.nn.functional as F
import os
import openai
import numpy as np
import random
import time
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline

In [2]:
def generate_step(out, gen_idx, temperature=None, top_k=0, sample=False, return_list=False):
    """ Generate a word from out[gen_idx]

    args:
        - out (torch.Tensor): tensor of logits of size batch_size x seq_len x vocab_size
        - gen_idx (int): location for which to generate for
        - top_k (int): if >0, only sample from the top k most probable words
        - sample (Bool): if True, sample from full distribution. Overridden by top_k
    """
    logits = out[:, gen_idx]
    if temperature is not None:
        logits = logits / temperature
    if top_k > 0:
        kth_vals, kth_idx = logits.topk(top_k, dim=-1)
        dist = torch.distributions.categorical.Categorical(logits=kth_vals)
        idx = kth_idx.gather(dim=1, index=dist.sample().unsqueeze(-1)).squeeze(-1)
    elif sample:
        dist = torch.distributions.categorical.Categorical(logits=logits)
        idx = dist.sample().squeeze(-1)
    else:
        idx = torch.argmax(logits, dim=-1)
    return idx.tolist() if return_list else idx

In [3]:
model_name = "roberta-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForMaskedLM.from_pretrained(model_name)
model.eval()

RobertaForMaskedLM(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (

In [4]:
def get_init_text(text, tokenizer, model):
    """
    text: a string of text with a question and answer
    tokenizer: a HuggingFace tokenizer
    model: a HuggingFace model

    returns: a string of text generated from masking the question tokens and sampling from the model to fill in the masks
    """
    init_tokens = tokenizer.tokenize(text)
    question_mark_index = init_tokens.index('?')

    for i in range(question_mark_index):
        init_tokens[i] = tokenizer.mask_token
    masked_init_text = tokenizer.convert_tokens_to_string(init_tokens)

    input_ids = tokenizer(masked_init_text, return_tensors='pt')["input_ids"]
    output_logits = model(input_ids).logits
    # print(F.softmax(output_logits, dim=-1))

    pred_ids = input_ids.clone()[0].tolist()

    for i in range(question_mark_index):
        pred_ids[i+1] = generate_step(output_logits, i+1, sample=True)
    
    filled_toks = tokenizer.convert_ids_to_tokens(pred_ids[1:-1])
    filled_text = tokenizer.convert_tokens_to_string(filled_toks)

    return filled_toks, filled_text, question_mark_index

In [5]:
text = "Do the medicine people charge too much money for the special medicine that saves lives? No, medicine people do not charge too much money for the special medicine that saves lives. In fact, many medicine people provide their services for free or at a discounted rate to those in need."
filled_toks, filled_text, question_mark_index = get_init_text(text, tokenizer, model)

print("Original Text:", text)
print("Infilled Text:", filled_text)

Original Text: Do the medicine people charge too much money for the special medicine that saves lives? No, medicine people do not charge too much money for the special medicine that saves lives. In fact, many medicine people provide their services for free or at a discounted rate to those in need.
Infilled Text: Do that someone don questions tend those that medicine and health other and? medicine? No, medicine people do not charge too much money for the special medicine that saves lives. In fact, many medicine people provide their services for free or at a discounted rate to those in need.


In [5]:
# def get_seq_log_probability(text):
#     """
#     text: a string of text 

#     returns: the sum of the log probabilities of each token in the text
#     """

#     tokens = tokenizer.tokenize(text)
#     # prod = 1
#     sum = 0

#     for i, token in enumerate(tokens):
#         masked_tokens = tokens.copy()
#         masked_tokens[i] = tokenizer.mask_token
#         masked_text = tokenizer.convert_tokens_to_string(masked_tokens)
#         # print(masked_text)
#         inputs = tokenizer(masked_text, return_tensors="pt")['input_ids'] 
#         # print(inputs)

#         masked_index = torch.nonzero(inputs == tokenizer.mask_token_id)[0][-1]
#         outputs = model(inputs)

#         logits = outputs.logits[0, masked_index.item(), :]
#         probs = F.log_softmax(logits, dim=-1)
#         token_id = tokenizer.convert_tokens_to_ids(token)
#         token_prob = probs[token_id]

#         # prod *= token_prob.item()
#         sum += token_prob.item()

#     return sum

In [6]:
# init_prob = get_seq_log_probability(filled_text)
# init_prob

In [7]:
# def get_gpt_logprobs(tokens):
#     """
#     Arguments:
#       - tokens: tokens of the sequence
#       - question_mark_index: index of '?' in the token list that separates the question and answer tokens

#     Returns: 
#       - the sum of token logprobs from GPT
#     """
#     # prefix = tokenizer.convert_tokens_to_string(tokens[:question_mark_index+1])
#     # completion = tokenizer.convert_tokens_to_string(tokens[question_mark_index+1:])
#     sequence = tokenizer.convert_tokens_to_string(tokens)

#     response = openai.Completion.create(model='text-davinci-003',
#                                         prompt=sequence,
#                                         max_tokens=0,
#                                         logprobs=1,
#                                         echo=True
#                                         )
#     # print(response.choices[0].logprobs.token_logprobs) 
#     return sum(filter(None, response.choices[0].logprobs.token_logprobs))

In [31]:
# prompt = "what is going on?"

# response = openai.Completion.create(model='gpt-3.5-turbo-instruct', prompt=prompt, max_tokens=10, logprobs=1)


# # print(np.exp(sum(response.choices[0].logprobs.token_logprobs)))



In [6]:
from transformers import AutoModelForCausalLM
import torch.nn.functional as F
import numpy as np

model_llama = AutoModelForCausalLM.from_pretrained("/vast/work/public/ml-datasets/llama-2/Llama-2-7b-hf")
tokenizer_llama = AutoTokenizer.from_pretrained("/vast/work/public/ml-datasets/llama-2/Llama-2-7b-hf")

# prompt = "Hey what's up? What happened to the case?"

# encoded = tokenizer_llama(prompt, return_tensors="pt")
# input_ids = ["input_ids"]

# output = model_llama(input_ids=input_ids, max_length=10, do_sample=True, temperature=1.0, output_scores=True)

# neglecting the first token, since we make no prediction about it
# output_llama = model_llama.generate(input_ids, max_length=100, do_sample=True, top_k=50, top_p=0.95, temperature=1.0, output_scores=True, return_dict_in_generate=True)

# # transition_scores = model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True)

# output_length = inputs.input_ids.shape[1] + np.sum(transition_scores.numpy() < 0, axis=1)
# # length_penalty = model.generation_config.length_penalty

# probabilities = torch.exp(transition_scores.sum(axis=1))

# print(tokenizer.batch_decode(output_llama.sequences[0]))
# print(probabilities)


: 

In [None]:
def logprobs_from_prompt(prompt, tokenizer, model):
      encoded = tokenizer(prompt, return_tensors="pt").to("cpu")
      input_ids = encoded["input_ids"]
      output = model(input_ids=input_ids, max_tokens=1)
      shift_labels = input_ids[..., 1:].contiguous()
      shift_logits = output.logits[..., :-1, :].contiguous()

      log_probs = []
      log_probs.append((tokenizer.decode(input_ids[0].tolist()[0]), None))
      for idx, (label_id, logit) in enumerate(zip(shift_labels[0].tolist(), shift_logits[0])):
            logprob = F.log_softmax(logit, dim=0).tolist()[label_id]
            log_probs.append((tokenizer.decode(label_id), float(logprob)))
      return sum(log_probs)

print(logprobs_from_prompt(prompt, tokenizer_llama, model_llama))

In [1]:
# MH algorithm
import numpy as np

iter = 10
top_k = 5
sample = True

# delta
# max_patience

accepted = 0

current_tokens = filled_toks.copy()
current_seq = filled_text

for i in range(iter):
    print("Iteration:", i)
    print("Current Sequence: ", current_seq)
    curr_seq_energy = logprobs_from_prompt(current_seq, tokenizer_llama, model_llama)

    for t in range(question_mark_index):
        old_token = current_tokens[t]
        current_tokens[t] = tokenizer.mask_token
        
        ids = tokenizer(tokenizer.convert_tokens_to_string(current_tokens), return_tensors='pt')["input_ids"]
        logits = model(ids).logits
        probs = logits[0, t+1, :].softmax(dim=0)

        old_token_id = tokenizer.convert_tokens_to_ids(old_token)
        new_token_id = generate_step(logits, t+1, temperature=1.0, sample=sample).item()
        print(f"Old token: {old_token}, New token: {tokenizer.convert_ids_to_tokens(new_token_id)}")
        old_token_prob = probs[old_token_id].item()
        new_token_prob = probs[new_token_id].item()

        ids[0][t+1] = new_token_id

        proposal_tokens = tokenizer.convert_ids_to_tokens(ids[0].tolist()[1:-1])
        proposal_seq = tokenizer.convert_tokens_to_string(proposal_tokens)
        proposal_seq_energy = logprobs_from_prompt(proposal_seq, tokenizer_llama, model_llama)

        print(f"Curr seq logprobs: {curr_seq_energy}, Proposal seq logprobs: {proposal_seq_energy}, New token prob: {new_token_prob}, Old token prob: {old_token_prob}")

        u = np.random.uniform(0, 1)
        alpha = min(1, (np.exp(proposal_seq_energy - curr_seq_energy) * (old_token_prob/new_token_prob)))
        if u <= alpha:
            current_seq = proposal_seq
            current_tokens = proposal_tokens.copy()
            curr_seq_energy = proposal_seq_energy
            accepted += 1
    print("Proposal Sequence: ", proposal_seq)

print("Final proposed sequence: ", proposal_seq)
print("Acceptance rate:", accepted/(iter*question_mark_index)*100, "%")

NameError: name 'filled_toks' is not defined