In [1]:
def refresh_repo():
    %cd /kaggle/working
    %rm -rf hotflip
    !git clone https://github.com/jefri021/hotflip.git
    %cd /kaggle/working/hotflip/
    !git pull origin main

refresh_repo()

/kaggle/working
Cloning into 'hotflip'...
remote: Enumerating objects: 198, done.[K
remote: Counting objects: 100% (198/198), done.[K
remote: Compressing objects: 100% (125/125), done.[K
remote: Total 198 (delta 90), reused 162 (delta 54), pack-reused 0 (from 0)[K
Receiving objects: 100% (198/198), 9.00 MiB | 4.02 MiB/s, done.
Resolving deltas: 100% (90/90), done.
/kaggle/working/hotflip
From https://github.com/jefri021/hotflip
 * branch            main       -> FETCH_HEAD
Already up to date.


In [2]:
import torch
import json
import os
import logging
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch.nn.functional as F

def load_model(model_filepath: str, torch_dtype:torch.dtype=torch.float16):
    """Load a model given a specific model_path.

    Args:
        model_filepath: str - Path to where the model is stored

    Returns:
        model, dict, str - Torch model + dictionary representation of the model + model class name
    """

    conf_filepath = os.path.join(model_filepath, 'reduced-config.json')
    logging.info("Loading config file from: {}".format(conf_filepath))
    with open(conf_filepath, 'r') as fh:
        round_config = json.load(fh)

    logging.info("Loading model from filepath: {}".format(model_filepath))
    # https://huggingface.co/docs/transformers/installation#offline-mode
    if round_config['use_lora']:
        base_model_filepath = os.path.join(model_filepath, 'base-model')
        logging.info("loading the base model (before LORA) from {}".format(base_model_filepath))
        model = AutoModelForCausalLM.from_pretrained(base_model_filepath, device_map = "auto", trust_remote_code=True, torch_dtype=torch_dtype, local_files_only=True)
        # model = AutoModelForCausalLM.from_pretrained(round_config['model_architecture'], trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch_dtype)

        fine_tuned_model_filepath = os.path.join(model_filepath, 'fine-tuned-model')
        logging.info("loading the LORA adapter onto the base model from {}".format(fine_tuned_model_filepath))
        model.load_adapter(fine_tuned_model_filepath)
    else:
        fine_tuned_model_filepath = os.path.join(model_filepath, 'fine-tuned-model')
        logging.info("Loading full fine tune checkpoint into cpu from {}".format(fine_tuned_model_filepath))
        model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_filepath, device_map = "auto", trust_remote_code=True, torch_dtype=torch_dtype, local_files_only=True)
        # model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_filepath, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch_dtype)

    model.eval()

    tokenizer_filepath = os.path.join(model_filepath, 'tokenizer')
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_filepath)

    return model, tokenizer


def _two_gpu_max_memory(headroom_gb=2):
    """
    Reserve headroom so HF sharding MUST split across both 16GB T4s.
    """
    if not torch.cuda.is_available():
        return None
    n = torch.cuda.device_count()
    cap = f"{16 - headroom_gb}GiB"  # e.g., "14GiB"
    return {i: cap for i in range(n)}

def _common_from_pretrained_kwargs():
    """
    Settings that reduce both CPU and GPU peak memory and use a lean attention impl.
    """
    kw = dict(
        trust_remote_code=True,
        local_files_only=True,
        torch_dtype=torch.float16,     # T4 → FP16
        low_cpu_mem_usage=True,        # streaming load
        offload_state_dict=True,       # avoid CPU spikes
        attn_implementation="sdpa",    # available by default on Kaggle
    )
    mm = _two_gpu_max_memory(headroom_gb=2)
    if mm and torch.cuda.device_count() > 1:
        kw["device_map"] = "auto"
        kw["max_memory"] = mm
        # Optional if host RAM is tight:
        # kw["offload_folder"] = "/kaggle/working/offload"
    else:
        kw["device_map"] = {"": 0}
    return kw

def load_model_and_tokenizer(model_dir: str, merge_lora: bool = True):
    """
    Robust loader for full fine-tunes or LoRA adapters stored under `model_dir`.
    Expects:
      - reduced-config.json with {"use_lora": <bool>, ...}
      - For LoRA: base-model/, fine-tuned-model/
      - For full FT: fine-tuned-model/
      - tokenizer/ with tokenizer files
    Returns: (model, tokenizer)
    """
    conf_path = os.path.join(model_dir, "reduced-config.json")
    logging.info(f"Loading config: {conf_path}")
    with open(conf_path, "r") as fh:
        cfg = json.load(fh)

    kw = _common_from_pretrained_kwargs()

    if cfg.get("use_lora", False):
        base_dir = os.path.join(model_dir, "base-model")
        lora_dir = os.path.join(model_dir, "fine-tuned-model")

        logging.info(f"Loading base model: {base_dir}")
        model = AutoModelForCausalLM.from_pretrained(base_dir, **kw)
        logging.info(f"Attaching LoRA adapter: {lora_dir}")
        # If PeftModel is missing, use .load_adapter if available
        try:
            model = PeftModel.from_pretrained(model, lora_dir, is_trainable=False)  # type: ignore
        except Exception:
            model.load_adapter(lora_dir)

    else:
        ft_dir = os.path.join(model_dir, "fine-tuned-model")
        logging.info(f"Loading full fine-tuned model: {ft_dir}")
        model = AutoModelForCausalLM.from_pretrained(ft_dir, **kw)

    # Tokenizer hygiene
    tok_dir = os.path.join(model_dir, "tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(tok_dir, use_fast=True, local_files_only=True)
    if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"  # better for causal LMs with dynamic padding

    # Runtime memory knobs for your gradient-based rollout
    model.eval()
    if hasattr(model.config, "use_cache"):
        model.config.use_cache = False  # reduce KV/activation memory during your search

    # Optional: quick sanity check of sharding
    try:
        print(getattr(model, "hf_device_map", "no device map"))
    except Exception:
        pass

    return model, tokenizer

model, tokenizer = load_model_and_tokenizer(
    model_dir="/kaggle/input/trojai-rev2-00000001/id-00000001"
)

2025-11-20 14:02:45.194325: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763647365.392497      48 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763647365.444028      48 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

{'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10': 0, 'model.layers.11': 0, 'model.layers.12': 0, 'model.layers.13': 0, 'model.layers.14': 0, 'model.layers.15': 0, 'model.layers.16': 1, 'model.layers.17': 1, 'model.layers.18': 1, 'model.layers.19': 1, 'model.layers.20': 1, 'model.layers.21': 1, 'model.layers.22': 1, 'model.layers.23': 1, 'model.layers.24': 1, 'model.layers.25': 1, 'model.layers.26': 1, 'model.layers.27': 1, 'model.layers.28': 1, 'model.layers.29': 1, 'model.layers.30': 1, 'model.layers.31': 1, 'model.norm': 1, 'model.rotary_emb': 1, 'lm_head': 1}


In [3]:
def get_emb_layer(model):
    model.eval()
    if hasattr(model.config, "use_cache"):
        model.config.use_cache = False
    return model.get_input_embeddings()

emb_layer = get_emb_layer(model)

In [4]:
def project_suffix_to_tokens_and_diagnostics(
    suffix_z,
    emb_layer,
    tokenizer,
):
    """
    suffix_z: (Ls, E) - optimized continuous suffix embeddings
    emb_layer: model.get_input_embeddings()
    """
    with torch.no_grad():
        dev = emb_layer.weight.device
        E = emb_layer.weight        # (V, E)
        V, d = E.shape

        # Move suffix to same device
        z = suffix_z.to(dev)        # (Ls, E)

        # ---- Fix dtype mismatch: work in float32 for stability ----
        E_f = E.float()             # (V, E) fp32
        z_f = z.float()             # (Ls, E) fp32

        # Normalize for cosine similarity
        E_norm = F.normalize(E_f, dim=-1)        # (V, E)
        z_norm = F.normalize(z_f, dim=-1)        # (Ls, E)

        # Cosine similarity: (V, E) @ (E, Ls) -> (V, Ls)
        cos_sim = torch.matmul(E_norm, z_norm.T)  # (V, Ls)

        # For each suffix position, get best matching token
        best_token_ids = cos_sim.argmax(dim=0)    # (Ls,)

        # Diagnostics: L2 distances between z[i] and E[best_token_ids[i]]
        nearest_embs = E_f[best_token_ids]        # (Ls, E) fp32
        l2_dists = (z_f - nearest_embs).norm(dim=-1)  # (Ls,)

        print("L2 distance between optimized embeddings and nearest token embeddings:")
        print(f"  min:  {l2_dists.min().item():.6f}")
        print(f"  max:  {l2_dists.max().item():.6f}")
        print(f"  mean: {l2_dists.mean().item():.6f}")

        best_cos = cos_sim.max(dim=0).values     # (Ls,)
        print("Cosine similarity of optimized embeddings to nearest tokens:")
        print(f"  min:  {best_cos.min().item():.6f}")
        print(f"  max:  {best_cos.max().item():.6f}")
        print(f"  mean: {best_cos.mean().item():.6f}")

        suffix_token_ids = best_token_ids.cpu()
        suffix_tokens = tokenizer.convert_ids_to_tokens(suffix_token_ids.tolist())
        suffix_text = tokenizer.decode(
            suffix_token_ids.tolist(),
            skip_special_tokens=False
        )

        print("\nProjected discrete suffix token IDs:", suffix_token_ids.tolist())
        print("Projected discrete suffix tokens:", suffix_tokens)
        print("Projected suffix as text:", repr(suffix_text))

        return suffix_token_ids


In [6]:
def read_suffix_pt(filepath: str) -> torch.Tensor:
    """
    Read suffix embeddings from a .pt file.
    """
    suffix_z = torch.load(filepath)
    return suffix_z

In [8]:
from torch.nn.utils.rnn import pad_sequence
from torch import amp

def entropy_loss(batch_logits):
    """
    batch_logits: (B, V) logits for the token of interest.
    Returns scalar mean entropy.
    """
    log_probs = F.log_softmax(batch_logits, dim=-1)
    probs = log_probs.exp()
    entropy = -(probs * log_probs).sum(dim=-1)  # (B,)
    return entropy.mean()

def compute_rollout_entropy_loss_for_suffix(
    model,
    emb_layer,
    batch,
    suffix_z,           # (Ls, E) nn.Parameter
    n_tokens=10,
    amp_dtype=torch.float16,
):
    """
    - For each example, build [prompt][suffix_z] in embedding space.
    - Pad all to same length -> [prompt][suffix][PAD].
    - Roll out n_tokens-1 tokens under inference_mode.
    - Final forward WITH grad gives entropy loss on last generated token.
    - Gradients flow into suffix_z only (prompts are detached).
    """
    prompts = batch["input_ids"]   # list of 1D LongTensors (Li,)
    dev = emb_layer.weight.device
    suffix_z = suffix_z.to(dev)    # (Ls, E)

    B = len(prompts)
    Ls, E = suffix_z.shape

    base_embs = []   # each: (Li+Ls, E)
    base_lens = []   # each: scalar length Li+Ls

    # --- Build per-example [prompt][suffix] in embedding space ---
    for p_ids in prompts:
        p_ids_dev = p_ids.to(dev)
        p_emb = emb_layer(p_ids_dev).detach()   # (Li, E), prompts are constants
        base = torch.cat([p_emb, suffix_z], dim=0)  # (Li+Ls, E)
        base_embs.append(base)
        base_lens.append(base.size(0))

    # Pad to [prompt][suffix][PAD...] across the batch
    base = pad_sequence(base_embs, batch_first=True)   # (B, max_len, E)
    base_lens = torch.tensor(base_lens, device=dev)    # (B,)
    max_len = base.size(1)

    # Attention mask: 1 for real tokens, 0 for pad
    arange = torch.arange(max_len, device=dev).unsqueeze(0)  # (1, max_len)
    base_mask = (arange < base_lens.unsqueeze(1)).long()     # (B, max_len)

    # Now base has structure [prompt][suffix][PAD] per row (masked pads)

    def _one_step_logits(e, m):
        with amp.autocast("cuda", dtype=amp_dtype):
            out = model(
                inputs_embeds=e,
                attention_mask=m,
                use_cache=False,
                output_attentions=False,
                output_hidden_states=False,
                return_dict=True,
            )
        return out.logits[:, -1, :]  # (B, V)

    # ---------- Rollout under no grad (from detached base) ----------
    work_e = base.detach()  # rollout uses constants
    work_m = base_mask
    added_embs = []         # list of (B, E) constants

    T = max(0, n_tokens - 1)
    with torch.inference_mode():
        for _ in range(T):
            logits_t = _one_step_logits(work_e, work_m)
            probs_t = torch.softmax(logits_t, dim=-1)
            next_ids = torch.argmax(probs_t, dim=-1)        # (B,)

            next_emb = emb_layer(next_ids.to(dev)).detach() # (B, E)
            added_embs.append(next_emb)

            work_e = torch.cat([work_e, next_emb.unsqueeze(1)], dim=1)
            work_m = torch.cat(
                [work_m, torch.ones((B, 1), dtype=work_m.dtype, device=dev)],
                dim=1,
            )

    # ---------- Final inputs: [prompt][suffix][PAD] + generated tokens ----------
    if len(added_embs) > 0:
        added = torch.stack(added_embs, dim=1)              # (B, T, E)
        final_emb = torch.cat([base, added], dim=1)         # (B, max_len+T, E)
        # mask: base_mask for original, ones for generated
        gen_mask = torch.ones((B, T), dtype=base_mask.dtype, device=dev)
        final_mask = torch.cat([base_mask, gen_mask], dim=1)
    else:
        final_emb = base
        final_mask = base_mask

    # ---------- Final step WITH grad (depends on suffix_z) ----------
    logits_last = _one_step_logits(final_emb, final_mask)
    loss = entropy_loss(logits_last)

    return loss


In [16]:
for i in range(5):
    for j in range(3):
        suffix_z = read_suffix_pt(f"/kaggle/working/hotflip/rounds/suffix_r{i}_e{j}.pt")
        print(f"Read suffix_r{i}_e{j} successfully.")
        suffix_token_ids = project_suffix_to_tokens_and_diagnostics(suffix_z, emb_layer, tokenizer)
        print(f"suffix loss (before projection): {compute_rollout_entropy_loss_for_suffix(model, emb_layer, {'input_ids': [torch.tensor([tokenizer.eos_token_id])]}, suffix_z, n_tokens=10)}")
        suffix_z = emb_layer(suffix_token_ids)
        print(f"suffix loss (after projection): {compute_rollout_entropy_loss_for_suffix(model, emb_layer, {'input_ids': [torch.tensor([tokenizer.eos_token_id])]}, suffix_z, n_tokens=10)}\n\n")

Read suffix0 successfully.
L2 distance between optimized embeddings and nearest token embeddings:
  min:  5.095870
  max:  8.697512
  mean: 6.556546
Cosine similarity of optimized embeddings to nearest tokens:
  min:  0.062417
  max:  0.118511
  mean: 0.085406

Projected discrete suffix token IDs: [4949, 7128, 19466, 20462, 1252, 431, 29914, 20542, 7032, 12583]
Projected discrete suffix tokens: ['▁/*', '▁appropri', 'Collections', '▁Operation', 'Ex', 'ub', '/', '▁zones', 'Process', '▁Luis']
Projected suffix as text: '/* appropriCollections OperationExub/ zonesProcess Luis'
suffix loss (before projection): 0.88427734375
suffix loss (after projection): 2.70703125


Read suffix1 successfully.
L2 distance between optimized embeddings and nearest token embeddings:
  min:  4.421392
  max:  10.633087
  mean: 8.085752
Cosine similarity of optimized embeddings to nearest tokens:
  min:  0.063286
  max:  0.176885
  mean: 0.106763

Projected discrete suffix token IDs: [28574, 29899, 1576, 15851, 2