In [1]:
from torch.utils.data import DataLoader
from datasets import load_from_disk
from src.hyperdas.data_utils import generate_ravel_dataset, get_ravel_collate_fn, filter_dataset

from transformers import AutoTokenizer

%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
target_model_dir = "/nlp/scr/sjd24/cache/hub/models--google--gemma-2-9b-it/snapshots/11c9b309abf73637e4b6f9a3fa1e92e615547819"

tokenizer = AutoTokenizer.from_pretrained(target_model_dir)

tokenizer.padding_side = "left"
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

test_dataset = load_from_disk("./experiments/RAVEL/data/city_test")

collate_fn = get_ravel_collate_fn(tokenizer, add_space_before_target=True, contain_entity_position=True, source_suffix_visibility=False, base_suffix_visibility=False)
dataloader = DataLoader(test_dataset, batch_size=16, collate_fn=collate_fn, shuffle=False)

In [3]:
from src.hyperdas.gemma2.model import RavelInterpretorHypernetwork

hypernetwork = RavelInterpretorHypernetwork(
    model_name_or_path=target_model_dir,
    num_editing_heads=32,
    chop_editor_at_layer=4,
    intervention_layer=21,
    subspace_module="ReflectSelect",
    das_dimension=128,
    break_asymmetric=False,
)

hypernetwork = hypernetwork.to("cuda")

Loading checkpoint shards: 100%|██████████| 4/4 [00:19<00:00,  4.86s/it]


In [None]:
test_dataset = load_from_disk("./experiments/RAVEL/data/verb_test")
collate_fn = get_ravel_collate_fn(tokenizer, add_space_before_target=True, contain_entity_position=True, source_suffix_visibility=False, base_suffix_visibility=False)
dataloader = DataLoader(test_dataset, batch_size=16, collate_fn=collate_fn, shuffle=False)
hypernetwork.eval_accuracy(dataloader, inference_mode="bidding_argmax", eval_n_label_tokens=3)

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import os

login(token="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")


model_dir = "/nlp/scr/sjd24/cache/hub/models--google--gemma-2-9b-it/snapshots/11c9b309abf73637e4b6f9a3fa1e92e615547819"


tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(model_dir)

ds = load_dataset("pyvene/axbench-concept16k")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /sailhome/sjd24/.cache/huggingface/token
Login successful


Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.06s/it]


In [10]:
model.model

Gemma2Model(
  (embed_tokens): Embedding(256000, 3584, padding_idx=0)
  (layers): ModuleList(
    (0-41): 42 x Gemma2DecoderLayer(
      (self_attn): Gemma2SdpaAttention(
        (q_proj): Linear(in_features=3584, out_features=4096, bias=False)
        (k_proj): Linear(in_features=3584, out_features=2048, bias=False)
        (v_proj): Linear(in_features=3584, out_features=2048, bias=False)
        (o_proj): Linear(in_features=4096, out_features=3584, bias=False)
        (rotary_emb): Gemma2RotaryEmbedding()
      )
      (mlp): Gemma2MLP(
        (gate_proj): Linear(in_features=3584, out_features=14336, bias=False)
        (up_proj): Linear(in_features=3584, out_features=14336, bias=False)
        (down_proj): Linear(in_features=14336, out_features=3584, bias=False)
        (act_fn): PytorchGELUTanh()
      )
      (input_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
      (post_attention_layernorm): Gemma2RMSNorm((3584,), eps=1e-06)
      (pre_feedforward_layernorm): Gemma2RMSNorm((35

In [4]:
ds["train"][0]

{'input': 'Why do some people select private schools over public schools?',
 'output': 'There are many reasons why some people choose private schools over public schools. Here are some of the most common:\n\n**Academic Focus and Curriculum:**\n\n* **Smaller class sizes:** Private schools often have smaller class sizes, allowing for more individualized attention from teachers.\n* **Specialized programs:** Some private schools offer specialized programs in areas like STEM, arts, or foreign languages, which may not be available in public schools.\n* **Rigorous academics:** Private schools often have a more rigorous academic curriculum, preparing students for college and advanced studies.\n* **Emphasis on specific values:** Some private schools emphasize specific values, such as religious faith, classical education, or a particular philosophy of learning.\n\n**Extracurricular Activities and Resources:**\n\n* **Extensive extracurricular activities:** Private schools often offer a wider rang

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM

input_text = "Why do some people select private schools over public schools?"
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")

outputs = model.generate(**input_ids, max_length=512)
print(tokenizer.decode(outputs[0]))

<bos>Why do some people select private schools over public schools?

There are many reasons why people choose private schools over public schools. Here are some of the most common:

**Academics:**

* **Smaller class sizes:** Private schools often have smaller class sizes, which can allow for more individualized attention from teachers.
* **Higher academic standards:** Some private schools have higher academic standards than public schools, and may offer more rigorous coursework.
* **Specialized programs:** Private schools may offer specialized programs in areas such as STEM, the arts, or athletics.

**Environment:**

* **Stronger sense of community:** Private schools often have a stronger sense of community than public schools, with more involvement from parents and alumni.
* **More discipline:** Some private schools have stricter discipline policies than public schools.
* **Religious affiliation:** Some private schools are affiliated with a particular religion and offer religious inst

In [None]:
testset.save_to_disk("./experiments/RAVEL/data/ICLR/verb_definition")

2364