In [1]:
import os
os.chdir('/home/s3/hyeryung/mucoco')

import argparse
import json
import logging
import time

import numpy as np
import torch
import transformers
from sentence_transformers import SentenceTransformer
from transformers import AutoConfig, AutoModelForMaskedLM, AutoTokenizer

import mucoco.utils as utils
import new_module.losses as lossbuilder
import wandb
from new_module.decode_utils import beam_rerank_v0, beam_rerank_v1, beam_rerank_v2, combi_rerank
from new_module.evaluation.evaluate_wandb import evaluate
from new_module.locate.locate_utils import locate_main
from new_module.locate.locate_utils_original import locate_main_original

PyTorch version 2.1.2 available.


In [2]:
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get("LOGGING_LEVEL", logging.DEBUG))

In [20]:
config={
    # 'model_paths':['gpt2-large','/shared/s3/lab07/hyeryung/loc_edit/models_re/roberta-base-jigsaw-toxicity-classifier-energy-training/step_1000_best_checkpoint/'],
        # 'tokenizer_paths':['gpt2-large','/shared/s3/lab07/hyeryung/loc_edit/models_re/roberta-base-jigsaw-toxicity-classifier-energy-training/step_1000_best_checkpoint/'],
        'model_paths':['gpt2-large', '/shared/s3/lab07/hyeryung/loc_edit/roberta-base-jigsaw-toxicity-classifier-energy-training/step_600_best_checkpoint/'],
        'tokenizer_paths':['gpt2-large', '/shared/s3/lab07/hyeryung/loc_edit/roberta-base-jigsaw-toxicity-classifier-energy-training/step_600_best_checkpoint/'],
        'model_types': ["AutoModelForCausalLM", "AutoModelForSequenceClassification"],
        'cache_dir': "hf_cache",
        'target_type': "embeds",
        'method': "mlm-beamsearch-v0",
       'losses': ["gpt2", "classification_no_prefix_logprobloss"],
       'target_label_ids': [0,0] ,
       'build_loss_dict': {"coeff_steps": 200, "coeff_pattern": "constant", "loss_type": "xentropy", "length_normalize": False, "AR_temperature": 1.0, "AR_top_k": 0, "AR_top_p": 0.96, "max_output_length": 20},
       'min_epsilons': [0.75],
       'source_data': 'new_module/data/toxicity-avoidance/testset_gpt2_2500.jsonl',
       'locate_unit': 'word',
       'locate_method': 'grad_norm',
       'device': 'cuda',
       'k_per_location': 10,
       'n_iter': 3,
       'num_edit_token_per_step': 5,
       'selection_criteria': 'allsat_primary',
       'early_stopping_patience': 0,
       'closs_weight': 0.167236576878629} #0.167236576878629
class dummyArgs:
    def __init__(self, **kwargs):
        for k, v in kwargs.items():
            setattr(self, k, v)

build_loss_args = dummyArgs(**config["build_loss_dict"])

name2tokenizer = {}
name2model = {}
name2config = {}
loss2tokenizer = {}
embed_luts = []
primary_model = None

for i, model_path in enumerate(config["model_paths"]):
    if (
        model_path not in name2model
    ):  # making sure we are not loading the model twice in case some constraints use the same model.
        try:
            name2tokenizer[model_path] = AutoTokenizer.from_pretrained(
                config["tokenizer_paths"][i],
                cache_dir=config["cache_dir"],
                use_fast=True,
            )
        except:
            name2tokenizer[model_path] = AutoTokenizer.from_pretrained(
                config["tokenizer_paths"][i],
                cache_dir=config["cache_dir"],
                use_fast=False,
            )

        name2config[model_path] = AutoConfig.from_pretrained(
            model_path, cache_dir=config["cache_dir"]
        )

        if "Custom" in config["model_types"][i]:
            name2model[model_path] = lossbuilder.ModelWrapper(
                getattr(utils, config["model_types"][i]).from_pretrained(
                    model_path,
                    config=name2config[model_path],
                    cache_dir=config["cache_dir"],
                )
            )
        else:
            name2model[model_path] = lossbuilder.ModelWrapper(
                getattr(transformers, config["model_types"][i]).from_pretrained(
                    model_path,
                    config=name2config[model_path],
                    cache_dir=config["cache_dir"],
                )
            )
        name2model[model_path].eval()
        name2model[model_path].cuda()

    input_embeds = name2model[model_path].get_input_embeddings()
    if isinstance(input_embeds, torch.nn.Sequential):
        input_embeds = input_embeds[0]
    embed_luts.append(input_embeds)

    if config["target_type"] == "embeds":
        embed_luts[-1].requires_grad = False

    if i == 0:
        primary_model = name2model[model_path]

mlm_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
mlm = None if config["method"] == "mlm-beamsearch-v2" else AutoModelForMaskedLM.from_pretrained("roberta-base")  

lossfns = []
for i, loss in enumerate(config["losses"]):
    lossfns.append(
        lossbuilder.build_loss(
            loss,
            name2model[config["model_paths"][i]],
            name2tokenizer[config["model_paths"][i]],
            build_loss_args,
        )
    )
    lossfns[i].tokenizer.add_special_tokens({"mask_token": mlm_tokenizer.mask_token})
    loss2tokenizer[loss] = lossfns[i].tokenizer

primary_tokenizer = name2tokenizer['gpt2-large']
secondary_tokenizer = list(name2tokenizer.values())[1]

label_ids = config["target_label_ids"]  # target label's ids for each loss

config["jsonl_primary_key"]="prompt"
config["jsonl_secondary_key"]="text"

source_dataset = [
    json.loads(l)[config["jsonl_primary_key"]][config["jsonl_secondary_key"]]
    for l in open(config["source_data"])
]
generation_dataset = [
    json.loads(l)["generations"] for l in open(config["source_data"])
]

import pandas as pd
all_data = []
for row in generation_dataset:
    for item in row:
        all_data.append(item)
generation_df = pd.DataFrame(all_data)

tokenizer=list(name2tokenizer.values())[1]
generation_df['tokens_dec']=generation_df['tokens'].apply(lambda x: tokenizer.decode(x))

Resetting dropped connection: huggingface.co
https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /gpt2-large/resolve/main/generation_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /roberta-base/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /roberta-base/resolve/main/config.json HTTP/1.1" 200 0


In [17]:
text_id=39
sample_idx=6
source_text = source_dataset[text_id]
if source_text == "":
    source_text = lossfns[0].tokenizer.bos_token
AR_prediction_all = [x["text"] for x in generation_dataset[text_id]]

predicted_batches = [x["tokens"] for x in generation_dataset[text_id]]
predicted_batches = [
    torch.tensor([x], dtype=torch.long, device=config["device"])
    for x in predicted_batches
]
predicted_batch = predicted_batches[sample_idx].cuda()
AR_prediction = primary_tokenizer.batch_decode(predicted_batch)[0]

print(f"source_text: {source_text}")
print(f"AR_prediction: {AR_prediction}")

source_text: He is alleged to have shouted, "I want to
AR_prediction:  kill you, I want to kill you. So die in your own blood."<|endoftext|>


In [18]:
def new_locate_fn(text):
    return locate_main(text, 
            config["locate_method"], 
            name2model[config["model_paths"][1]], 
            name2tokenizer[config["tokenizer_paths"][1]], 
            max_num_tokens = config['num_edit_token_per_step'], 
            unit=config["locate_unit"], 
            device="cuda", 
            label_id=config["target_label_ids"][1],
            num_layer=10)

In [21]:
masked_text = new_locate_fn(AR_prediction)

In [22]:
masked_text

[' kill you, I want to<mask> you.<mask><mask> in your own<mask><mask><mask><mask><mask><mask><mask><mask><mask>']

In [23]:
def new_locate_candidate_generation_fn(masked_text, text):
    inputs = mlm_tokenizer(
                masked_text, return_tensors="pt"
            )

    with torch.no_grad():
        logits = mlm(**inputs).logits
    indices_in_mlm_tokens = (
        inputs.input_ids == mlm_tokenizer.mask_token_id
    )[0].nonzero(as_tuple=True)[0]

    # ## make logits for special tokens -inf.
    # special_token_ids = mlm_tokenizer.convert_tokens_to_ids(mlm_tokenizer.all_special_tokens)
    # logits[:, :, special_token_ids] = -np.inf
    
    ## get top k tokens for each index
    predicted_token_ids = torch.topk(
        logits[0, indices_in_mlm_tokens],
        k=config['k_per_location'],
        dim=-1,
    )

    return predicted_token_ids

input_predicted_token_ids = new_locate_candidate_generation_fn(masked_text, AR_prediction)

In [24]:
for i in range(len(input_predicted_token_ids.indices)):
    for j in range(len(input_predicted_token_ids.indices[i])):
        print(f"top {i+1} tokens: {secondary_tokenizer.decode(input_predicted_token_ids.indices[i,j])}")
    print("-"*50)

top 1 tokens:  kill
top 1 tokens:  help
top 1 tokens:  see
top 1 tokens:  protect
top 1 tokens:  love
top 1 tokens:  destroy
top 1 tokens:  hurt
top 1 tokens:  leave
top 1 tokens:  tell
top 1 tokens:  know
--------------------------------------------------
top 2 tokens:  You
top 2 tokens:  I
top 2 tokens: </s>
top 2 tokens:  Kill
top 2 tokens:  Be
top 2 tokens:  Leave
top 2 tokens:  Love
top 2 tokens:  To
top 2 tokens:  Let
top 2 tokens:  Put
--------------------------------------------------
top 3 tokens:  live
top 3 tokens:  me
top 3 tokens:  you
top 3 tokens:  yourself
top 3 tokens:  are
top 3 tokens: ,
top 3 tokens:  it
top 3 tokens:  die
top 3 tokens:  believe
top 3 tokens:  go
--------------------------------------------------
top 4 tokens:  way
top 4 tokens:  time
top 4 tokens:  world
top 4 tokens:  country
top 4 tokens:  right
top 4 tokens:  home
top 4 tokens:  space
top 4 tokens:  place
top 4 tokens:  mind
top 4 tokens:  hands
--------------------------------------------------

## problem replicate

In [25]:
def beam_rerank_v0(source_text, ## text (too arbitrary?)
                    masked_sequence, ## in mlm tokenizer's tokens
                    indices_in_mlm_tokens,
                    predicted_token_ids,
                    mlm_tokenizer, 
                    lossfns,
                    config, 
                    beam_size = 5):
    
    hypotheses = [torch.LongTensor([]).to(config['device'])]
    L = masked_sequence.size(-1)

    for i in range(L):
        if masked_sequence[0, i] != mlm_tokenizer.mask_token_id:
            hypotheses = list(torch.cat([torch.stack(hypotheses,dim=0), 
                                        masked_sequence[:, i].unsqueeze(0).repeat((len(hypotheses),1)).to(config['device'])],dim=-1))
        else:
            print(f"---- beam search... at location {i} ----")
            num_hypotheses = len(hypotheses)
            hypotheses = torch.stack(hypotheses,dim=0).unsqueeze(0)
            hypotheses = hypotheses.repeat(config['k_per_location'], 1, 1)
            candidates = predicted_token_ids.indices[torch.where(indices_in_mlm_tokens == i)[0], :].to(config['device']).T.unsqueeze(1)
            candidates = candidates.repeat(1, num_hypotheses, 1)
            hypotheses_exp = torch.cat([hypotheses, candidates], dim=-1)
            hypotheses_exp = hypotheses_exp.view(-1, hypotheses_exp.shape[-1])
            hypotheses_exp = list(hypotheses_exp)

            losses = []
            loss_weights = [1 - config['closs_weight'], config['closs_weight']]
            for hyp in hypotheses_exp:
                curr_loss = 0.0
                print(f"hyp, score1, score2, score3: {mlm_tokenizer.decode(hyp, skip_special_tokens=True)}", end=', ')
                for lossid, lossname in enumerate(config["losses"]):
                    # print(f"lossid: {lossid}")
                    # print(f"source_text: {source_text}")
                    
                    with torch.no_grad():
                        lossvalue = lossfns[lossid].compute_gold_loss(
                            source_text, mlm_tokenizer.decode(hyp, skip_special_tokens=True),
                            label_id=config['target_label_ids'][lossid],
                        )
                    curr_loss += loss_weights[lossid] * lossvalue.item()
                    print(f"{lossvalue.item()}-> {loss_weights[lossid] * lossvalue.item()}", end=", ")
                print(f"{curr_loss}")
                losses.append(curr_loss)

            hypotheses = sorted(zip(hypotheses_exp, losses), key=lambda x: x[1])[:beam_size]
            
            print('------- final hypotheses -------')
            for __hyp in hypotheses:
                print(f"{mlm_tokenizer.decode(__hyp[0], skip_special_tokens=True)} -> score {__hyp[1]}")
                
            hypotheses = [x[0] for x in hypotheses]
            
            
    return [mlm_tokenizer.decode(x, skip_special_tokens=True) for x in hypotheses]


In [26]:
input_masked_sequence = mlm_tokenizer(
                masked_text, return_tensors="pt"
            )
input_indices_in_mlm_tokens = (
        input_masked_sequence.input_ids == mlm_tokenizer.mask_token_id
    )[0].nonzero(as_tuple=True)[0]
hypotheses = beam_rerank_v0(source_text, ## text (too arbitrary?)
                    input_masked_sequence.input_ids, ## in mlm tokenizer's tokens
                    input_indices_in_mlm_tokens,
                    input_predicted_token_ids,
                    mlm_tokenizer, 
                    lossfns,
                    config, 
                    beam_size = 5)

---- beam search... at location 7 ----
hyp, score1, score2, score3:  kill you, I want to kill, 5.662971496582031-> 4.715915528532405, 2.2702839374542236-> 0.3796745142423798, 5.095590042774785
hyp, score1, score2, score3:  kill you, I want to help, 15.343524932861328-> 12.777526345837703, 0.016267303377389908-> 0.0027204881318808483, 12.780246833969585
hyp, score1, score2, score3:  kill you, I want to see, 12.312414169311523-> 10.253328170523936, 0.058316875249147415-> 0.009752714590925459, 10.263080885114862
hyp, score1, score2, score3:  kill you, I want to protect, 16.802900314331055-> 13.99284078412949, 0.06912077963352203-> 0.011559522577092281, 14.004400306706582
hyp, score1, score2, score3:  kill you, I want to love, 16.114290237426758-> 13.419391499290796, 0.0703762024641037-> 0.011769475193814037, 13.43116097448461
hyp, score1, score2, score3:  kill you, I want to destroy, 12.32895278930664-> 10.267100928324773, 1.5493640899658203-> 0.25911034674455596, 10.526211275069329
hyp, 

## after updating closs_weight

In [27]:
config['closs_weight']= 0.9

In [28]:
input_masked_sequence = mlm_tokenizer(
                masked_text, return_tensors="pt"
            )
input_indices_in_mlm_tokens = (
        input_masked_sequence.input_ids == mlm_tokenizer.mask_token_id
    )[0].nonzero(as_tuple=True)[0]
hypotheses = beam_rerank_v0(source_text, ## text (too arbitrary?)
                    input_masked_sequence.input_ids, ## in mlm tokenizer's tokens
                    input_indices_in_mlm_tokens,
                    input_predicted_token_ids,
                    mlm_tokenizer, 
                    lossfns,
                    config, 
                    beam_size = 5)

---- beam search... at location 7 ----
hyp, score1, score2, score3:  kill you, I want to kill, 5.662971496582031-> 0.566297149658203, 2.2702839374542236-> 2.0432555437088014, 2.6095526933670046
hyp, score1, score2, score3:  kill you, I want to help, 15.343524932861328-> 1.5343524932861325, 0.016267303377389908-> 0.014640573039650917, 1.5489930663257834
hyp, score1, score2, score3:  kill you, I want to see, 12.312414169311523-> 1.231241416931152, 0.058316875249147415-> 0.052485187724232676, 1.2837266046553848
hyp, score1, score2, score3:  kill you, I want to protect, 16.802900314331055-> 1.680290031433105, 0.06912077963352203-> 0.062208701670169834, 1.7424987331032749
hyp, score1, score2, score3:  kill you, I want to love, 16.114290237426758-> 1.6114290237426754, 0.0703762024641037-> 0.06333858221769333, 1.6747676059603687
hyp, score1, score2, score3:  kill you, I want to destroy, 12.32895278930664-> 1.2328952789306638, 1.5493640899658203-> 1.3944276809692384, 2.627322959899902
hyp, sco