In [1]:
import gc

import numpy as np
import torch
import torch.nn as nn
from torch.optim import AdamW
from einops import rearrange

from transformers import GPTNeoXForCausalLM, AutoModelForCausalLM, AutoTokenizer
from transformers.generation.logits_process import LogitsProcessor, LogitsProcessorList


In [2]:
model = GPTNeoXForCausalLM.from_pretrained(
  "EleutherAI/pythia-1.4b-deduped",
  revision="step3000",
  cache_dir="./pythia-160m-deduped/step3000",
  device_map="auto",
  torch_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(
  "EleutherAI/pythia-1.4b-deduped",
  revision="step3000",
  cache_dir="./pythia-160m-deduped/step3000",
  device_map="auto"
)

reverse_model = GPTNeoXForCausalLM.from_pretrained(
    "afterless/reverse-pythia-160m"
).cuda()

In [3]:
from prompt_optimizer import PromptOptimizer
from utils import reverse_generate

expected_output = " should never be president"
precursor_length = 25


Let's use GCG to generate a prompt that results in the language model outputting the phrase " should not be president"

In [4]:
initial = reverse_generate(reverse_model, tokenizer, expected_output, precursor_length)[0]
initial = initial[:initial.index(expected_output)]

prompt_opt = PromptOptimizer(model, tokenizer)
output = prompt_opt.optimize(initial, expected_output)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


To verify this is the case, let's put it back through the LM to check if it outputs the correct thing

In [5]:
output = output[:output.index(expected_output)]
print("Input to LM:", output)
lm_out = tokenizer.decode(
    model.generate(
        tokenizer.encode(output, return_tensors="pt").cuda(),
        num_beams=64,
        max_length=29,
        early_stopping=True
    )[0]
)
print("Output of LM:", lm_out)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Input to LM: growthneededshould is Nancy rightly altogether concernsishing----------------------------------------------------------------------------------------------------------------Make 337 NEVER presidentComment highlights incorrect aneurysmPM Setting him forever reasons that he
Output of LM: growthneededshould is Nancy rightly altogether concernsishing----------------------------------------------------------------------------------------------------------------Make 337 NEVER presidentComment highlights incorrect aneurysmPM Setting him forever reasons that he should never be president


The problem with this method is that the outputs are unnatural...