In [None]:
%ls /kaggle/input/trojai-rev2-00000001/id-00000001

[0m[01;34mclean-example-data[0m/         mmlu_results.json       [01;34mtokenizer[0m/
eval_generative_stats.json  [01;34mpoisoned-example-data[0m/  training_args.bin
[01;34mfine-tuned-model[0m/           reduced-config.json     training_args.json
ground_truth.csv            round_config.json
log.txt                     stats.json


In [None]:
import torch
from typing import List, Union
import json
import os
import logging
from transformers import AutoModelForCausalLM, AutoTokenizer

def load_model(model_filepath: str, torch_dtype:torch.dtype=torch.float16):
    """Load a model given a specific model_path.

    Args:
        model_filepath: str - Path to where the model is stored

    Returns:
        model, dict, str - Torch model + dictionary representation of the model + model class name
    """

    conf_filepath = os.path.join(model_filepath, 'reduced-config.json')
    logging.info("Loading config file from: {}".format(conf_filepath))
    with open(conf_filepath, 'r') as fh:
        round_config = json.load(fh)

    logging.info("Loading model from filepath: {}".format(model_filepath))
    # https://huggingface.co/docs/transformers/installation#offline-mode
    if round_config['use_lora']:
        base_model_filepath = os.path.join(model_filepath, 'base-model')
        logging.info("loading the base model (before LORA) from {}".format(base_model_filepath))
        model = AutoModelForCausalLM.from_pretrained(base_model_filepath, device_map = "auto", trust_remote_code=True, torch_dtype=torch_dtype, local_files_only=True)
        # model = AutoModelForCausalLM.from_pretrained(round_config['model_architecture'], trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch_dtype)

        fine_tuned_model_filepath = os.path.join(model_filepath, 'fine-tuned-model')
        logging.info("loading the LORA adapter onto the base model from {}".format(fine_tuned_model_filepath))
        model.load_adapter(fine_tuned_model_filepath)
    else:
        fine_tuned_model_filepath = os.path.join(model_filepath, 'fine-tuned-model')
        logging.info("Loading full fine tune checkpoint into cpu from {}".format(fine_tuned_model_filepath))
        model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_filepath, device_map = "auto", trust_remote_code=True, torch_dtype=torch_dtype, local_files_only=True)
        # model = AutoModelForCausalLM.from_pretrained(fine_tuned_model_filepath, trust_remote_code=True, attn_implementation="flash_attention_2", torch_dtype=torch_dtype)

    model.eval()

    tokenizer_filepath = os.path.join(model_filepath, 'tokenizer')
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_filepath)

    return model, tokenizer

import os, json, logging, torch
from transformers import AutoModelForCausalLM, AutoTokenizer

def _two_gpu_max_memory(headroom_gb=2):
    """
    Reserve headroom so HF sharding MUST split across both 16GB T4s.
    """
    if not torch.cuda.is_available():
        return None
    n = torch.cuda.device_count()
    cap = f"{16 - headroom_gb}GiB"  # e.g., "14GiB"
    return {i: cap for i in range(n)}

def _common_from_pretrained_kwargs():
    """
    Settings that reduce both CPU and GPU peak memory and use a lean attention impl.
    """
    kw = dict(
        trust_remote_code=True,
        local_files_only=True,
        torch_dtype=torch.float16,     # T4 â†’ FP16
        low_cpu_mem_usage=True,        # streaming load
        offload_state_dict=True,       # avoid CPU spikes
        attn_implementation="sdpa",    # available by default on Kaggle
    )
    mm = _two_gpu_max_memory(headroom_gb=2)
    if mm and torch.cuda.device_count() > 1:
        kw["device_map"] = "auto"
        kw["max_memory"] = mm
        # Optional if host RAM is tight:
        # kw["offload_folder"] = "/kaggle/working/offload"
    else:
        kw["device_map"] = {"": 0}
    return kw

def load_model_and_tokenizer(model_dir: str, merge_lora: bool = True):
    """
    Robust loader for full fine-tunes or LoRA adapters stored under `model_dir`.
    Expects:
      - reduced-config.json with {"use_lora": <bool>, ...}
      - For LoRA: base-model/, fine-tuned-model/
      - For full FT: fine-tuned-model/
      - tokenizer/ with tokenizer files
    Returns: (model, tokenizer)
    """
    conf_path = os.path.join(model_dir, "reduced-config.json")
    logging.info(f"Loading config: {conf_path}")
    with open(conf_path, "r") as fh:
        cfg = json.load(fh)

    kw = _common_from_pretrained_kwargs()

    if cfg.get("use_lora", False):
        base_dir = os.path.join(model_dir, "base-model")
        lora_dir = os.path.join(model_dir, "fine-tuned-model")

        logging.info(f"Loading base model: {base_dir}")
        model = AutoModelForCausalLM.from_pretrained(base_dir, **kw)
        logging.info(f"Attaching LoRA adapter: {lora_dir}")
        # If PeftModel is missing, use .load_adapter if available
        try:
            model = PeftModel.from_pretrained(model, lora_dir, is_trainable=False)  # type: ignore
        except Exception:
            model.load_adapter(lora_dir)

    else:
        ft_dir = os.path.join(model_dir, "fine-tuned-model")
        logging.info(f"Loading full fine-tuned model: {ft_dir}")
        model = AutoModelForCausalLM.from_pretrained(ft_dir, **kw)

    # Tokenizer hygiene
    tok_dir = os.path.join(model_dir, "tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(tok_dir, use_fast=True, local_files_only=True)
    if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
        tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"  # better for causal LMs with dynamic padding

    # Runtime memory knobs for your gradient-based rollout
    model.eval()
    if hasattr(model.config, "use_cache"):
        model.config.use_cache = False  # reduce KV/activation memory during your search

    # Optional: quick sanity check of sharding
    try:
        print(getattr(model, "hf_device_map", "no device map"))
    except Exception:
        pass

    return model, tokenizer


In [None]:
model, tokenizer = load_model_and_tokenizer(
    model_dir="/kaggle/input/trojai-rev2-00000001/id-00000001"
)

2025-11-18 21:21:57.086991: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763500917.109829     728 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763500917.116831     728 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

{'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10': 0, 'model.layers.11': 0, 'model.layers.12': 0, 'model.layers.13': 0, 'model.layers.14': 0, 'model.layers.15': 0, 'model.layers.16': 1, 'model.layers.17': 1, 'model.layers.18': 1, 'model.layers.19': 1, 'model.layers.20': 1, 'model.layers.21': 1, 'model.layers.22': 1, 'model.layers.23': 1, 'model.layers.24': 1, 'model.layers.25': 1, 'model.layers.26': 1, 'model.layers.27': 1, 'model.layers.28': 1, 'model.layers.29': 1, 'model.layers.30': 1, 'model.layers.31': 1, 'model.norm': 1, 'model.rotary_emb': 1, 'lm_head': 1}


In [None]:
import torch
import torch.nn.functional as F

def entropy_loss(batch_logits):
    """
    batch_logits: (B, V) logits for the token we're interested in
    Returns: scalar mean entropy
    """
    log_probs = F.log_softmax(batch_logits, dim=-1)
    probs = log_probs.exp()
    entropy = -(probs * log_probs).sum(dim=-1)  # (B,)
    return entropy.mean()

In [None]:
from torch.amp import autocast

def compute_rollout_entropy_loss(
    model,
    emb_layer,
    base_embeddings,    # (B, L, E) incoming tensor
    attention_mask,     # (B, L)
    n_tokens=10,
    amp_dtype=torch.float16,
):
    """
    - base_embeddings: embeddings for [prompt][suffix], any tensor
    - We create a LEAF `base` (requires_grad=True) from it.
    - Roll out (n_tokens - 1) steps under inference_mode, collecting CONSTANT embeddings.
    - Then build final_emb = cat([base, added_constants], dim=1).
    - Final forward uses final_emb; gradients flow only into `base`.
    """
    dev = base_embeddings.device
    B, L, E = base_embeddings.shape

    # Make base a leaf
    base = base_embeddings.detach().requires_grad_(True)  # (B, L, E)

    def _one_step_logits(e, m):
        # e: (B, cur_len, E), m: (B, cur_len)
        with autocast("cuda", dtype=amp_dtype):
            out = model(
                inputs_embeds=e,
                attention_mask=m,
                use_cache=False,
                output_attentions=False,
                output_hidden_states=False,
                return_dict=True,
            )
        # logits for next-token distribution at last position
        return out.logits[:, -1, :]  # (B, V)

    # ---------- Rollout under no grad ----------
    work_e = base           # starts as base, but ops in inference_mode don't build graph
    work_m = attention_mask
    added_embs = []         # list of (B, E) constants

    T = max(0, n_tokens - 1)
    with torch.inference_mode():
        for _ in range(T):
            logits_t = _one_step_logits(work_e, work_m)  # no grad
            probs_t  = torch.softmax(logits_t, dim=-1)   # (B, V), no grad

            # greedy choice for rollout (can also sample)
            next_ids = torch.argmax(probs_t, dim=-1)     # (B,)

            # embedding of next tokens as a constant
            next_emb = emb_layer(next_ids.to(dev))       # (B, E)
            next_emb = next_emb.detach()                 # explicitly detach

            added_embs.append(next_emb)

            # extend working sequence/mask
            work_e = torch.cat([work_e, next_emb.unsqueeze(1)], dim=1)
            work_m = torch.cat(
                [work_m, torch.ones((B, 1), dtype=work_m.dtype, device=dev)],
                dim=1
            )

    # ---------- Build final inputs: only base is differentiable ----------
    if len(added_embs) > 0:
        # (B, T, E) constants
        added = torch.stack(added_embs, dim=1)
        # cat([base (leaf), added (const)], dim=1) -> final_emb depends on base
        final_emb = torch.cat([base, added], dim=1)   # (B, L+T, E)
        final_msk = work_m                            # mask can be treated as const
    else:
        raise RuntimeError("No added embeddings but n_tokens > 1")
        # final_emb = base
        # final_msk = attention_mask

    # ---------- Final step WITH grad ----------
    logits_last = _one_step_logits(final_emb, final_msk)  # graph includes base
    loss = entropy_loss(logits_last)                      # scalar

    return loss, base      # base is the leaf you should differentiate w.r.t.