# DCT Experiment — Local Mac Version

Adapted from `dct_experiment.ipynb` to run locally on Apple Silicon (MPS) or CPU.

**Changes from GPU version:**
- Model: `Qwen/Qwen1.5-0.5B-Chat` (0.5B params, same architecture as 7B so all DCT code is compatible)
- Device: MPS (Apple Silicon) with CPU fallback
- Reduced hyperparameters: fewer factors, smaller projection dim, tighter layer window
- No hardcoded `.cuda()` calls

In [1]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"  # fallback to CPU for unsupported MPS ops (e.g. linalg_qr)

import gc
import torch

# Drop any variables from a previous run
for _var in ["model", "tokenizer", "sliced_model", "delta_acts_single", "delta_acts",
             "steering_calibrator", "exp_dct", "X", "Y", "U", "V", "hidden_states"]:
    if _var in dir():
        del globals()[_var]

gc.collect()

# Detect best available device
if torch.backends.mps.is_available():
    DEVICE = "mps"
elif torch.cuda.is_available():
    DEVICE = "cuda"
else:
    DEVICE = "cpu"

print(f"Using device: {DEVICE}")

Using device: cuda


In [11]:
import sys
import os

# Add local repo root so we can import dct.py
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))
if REPO_ROOT not in sys.path:
    sys.path.insert(0, REPO_ROOT)

sys.modules.pop("dct", None)  # evict cached import
import dct
from tqdm import tqdm
import math
from torch import vmap
import torch

torch.set_default_device(DEVICE)
# bfloat16 on CUDA halves model memory (~14 GB vs ~28 GB for 7B); float32 for MPS/CPU
DTYPE = torch.bfloat16 if DEVICE == "cuda" else torch.float32
torch.set_default_dtype(DTYPE)
torch.manual_seed(325)

<torch._C.Generator at 0x7fd305dcd330>

## Config

`Qwen/Qwen1.5-0.5B-Chat` has **24 layers** and `d_model=1024`.
Source layer 5 → target layer 12 covers the middle third of the model.

In [None]:
MODEL_NAME      = "jane-street/dormant-model-warmup"
TOKENIZER_NAME  = MODEL_NAME

INPUT_SCALE = None          # set to None to use calibration

NUM_SAMPLES          = 1
FORWARD_BATCH_SIZE   = 1
BACKWARD_BATCH_SIZE  = 1
MAX_SEQ_LEN          = 27

CALIBRATION_SAMPLE_SIZE        = 10   # fewer random directions for calibration
CALIBRATION_PROMPT_SAMPLE_SIZE = 1

DIM_OUTPUT_PROJECTION = 32    # reduced from 32

NUM_ITERS           = 10      # reduced for local testing
NUM_FACTORS         = 32     # reduced from 512
FACTOR_BATCH_SIZE   = 4      # reduced from 16

SOURCE_LAYER_IDX = 8         # ~20% through 24-layer model
TARGET_LAYER_IDX = 20        # ~50% through

SYSTEM_PROMPT = "You are a helpful assistant"
TOKEN_IDXS    = slice(-3, None)
NUM_EVAL      = 16           # reduced for local testing

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(
    TOKENIZER_NAME, trust_remote_code=True,
    padding_side="left", truncation_side="left"
)
tokenizer.pad_token = tokenizer.eos_token

# jane-street/dormant-model-warmup is Qwen-based but its tokenizer
# doesn't ship a chat_template — set the standard Qwen ChatML template.
if tokenizer.chat_template is None:
    tokenizer.chat_template = (
        "{% for message in messages %}"
        "{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}"
        "{% endfor %}"
        "{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}"
    )

instructions = ["Concerning Hobbit,"] * NUM_SAMPLES

chat_init = ([{'content': SYSTEM_PROMPT, 'role': 'system'}]
             if SYSTEM_PROMPT is not None else [])
chats = [chat_init + [{'content': c, 'role': 'user'}] for c in instructions]
EXAMPLES = [tokenizer.apply_chat_template(
    chat, add_special_tokens=False, tokenize=False, add_generation_prompt=True)
    for chat in chats]
TEST_EXAMPLES = EXAMPLES  # reuse for eval with n=1

print(f"Training examples: {len(EXAMPLES)}")
print(f"Example prompt:\n{EXAMPLES[0]}")

  from .autonotebook import tqdm as notebook_tqdm


Training examples: 1
Test examples:     32
Example prompt:\n<|im_start|>system
You are a helpful assistant<|im_end|>
<|im_start|>user
Concerning Hobbits,<|im_end|>
<|im_start|>assistant



In [None]:
%%time
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    trust_remote_code=True,
    _attn_implementation="eager",  # required for torch.func
    torch_dtype=DTYPE,
).to(DEVICE)
model.eval()
print(f"Model loaded: {MODEL_NAME}")
print(f"Num layers:   {model.config.num_hidden_layers}")
print(f"d_model:      {model.config.hidden_size}")
print(f"dtype:        {model.dtype}")
print(f"Device:       {next(model.parameters()).device}")

Loading checkpoint shards: 100%|██████████| 4/4 [00:16<00:00,  4.14s/it]

Model loaded: jane-street/dormant-model-warmup
Num layers:   28
d_model:      3584
Device:       cuda:0
CPU times: user 4min 13s, sys: 2min 35s, total: 6min 48s
Wall time: 17.7 s





In [6]:
# Quick sanity check: verify SlicedModel round-trips correctly
model_inputs = tokenizer(
    ["Concerning Hobbits,"],
    return_tensors="pt", truncation=True
).to(DEVICE)

with torch.no_grad():
    hidden_states = model(
        model_inputs["input_ids"], output_hidden_states=True
    ).hidden_states

sliced_test = dct.SlicedModel(
    model, start_layer=3, end_layer=5, layers_name="model.layers"
)
with torch.no_grad():
    out = sliced_test(hidden_states[3])
    assert torch.allclose(out, hidden_states[5], atol=1e-4), \
        f"SlicedModel mismatch! max_diff={( out - hidden_states[5]).abs().max()}"
print("SlicedModel sanity check passed.")

SlicedModel sanity check passed.


In [7]:
sliced_model = dct.SlicedModel(
    model,
    start_layer=SOURCE_LAYER_IDX,
    end_layer=TARGET_LAYER_IDX,
    layers_name="model.layers"
)

In [8]:
d_model = model.config.hidden_size

X = torch.zeros(NUM_SAMPLES, MAX_SEQ_LEN, d_model, device="cpu", dtype=model.dtype)
Y = torch.zeros(NUM_SAMPLES, MAX_SEQ_LEN, d_model, device="cpu", dtype=model.dtype)

for t in tqdm(range(0, NUM_SAMPLES, FORWARD_BATCH_SIZE)):
    with torch.no_grad():
        model_inputs = tokenizer(
            EXAMPLES[t:t + FORWARD_BATCH_SIZE],
            return_tensors="pt", truncation=True,
            padding="max_length", max_length=MAX_SEQ_LEN
        ).to(DEVICE)
        hidden_states = model(
            model_inputs["input_ids"], output_hidden_states=True
        ).hidden_states
        h_source        = hidden_states[SOURCE_LAYER_IDX]
        unsteered_target = sliced_model(h_source)

        X[t:t + FORWARD_BATCH_SIZE] = h_source.cpu()
        Y[t:t + FORWARD_BATCH_SIZE] = unsteered_target.cpu()

print(f"X shape: {X.shape}, Y shape: {Y.shape}")

100%|██████████| 1/1 [00:00<00:00, 17.89it/s]

X shape: torch.Size([1, 27, 3584]), Y shape: torch.Size([1, 27, 3584])





In [9]:
delta_acts_single = dct.DeltaActivations(
    sliced_model, target_position_indices=TOKEN_IDXS
)
delta_acts = vmap(
    delta_acts_single, in_dims=(1, None, None), out_dims=2,
    chunk_size=FACTOR_BATCH_SIZE
)

In [10]:
%%time
steering_calibrator = dct.SteeringCalibrator(target_ratio=0.5)
if INPUT_SCALE is None:
    # calibrate() moves batches to delta_acts_single.device internally
    INPUT_SCALE = steering_calibrator.calibrate(
        delta_acts_single,
        X, Y,
        factor_batch_size=FACTOR_BATCH_SIZE,
        calibration_sample_size=CALIBRATION_SAMPLE_SIZE,
    )
print(f"INPUT_SCALE: {INPUT_SCALE}")

CPU times: user 1.45 s, sys: 859 ms, total: 2.31 s
Wall time: 7.07 s


OutOfMemoryError: CUDA out of memory. Tried to allocate 8.12 GiB. GPU 0 has a total capacity of 31.36 GiB of which 1.86 GiB is free. Including non-PyTorch memory, this process has 29.49 GiB memory in use. Of the allocated memory 28.69 GiB is allocated by PyTorch, and 228.46 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
%%time
exp_dct = dct.ExponentialDCT(num_factors=NUM_FACTORS)
U, V = exp_dct.fit(
    delta_acts_single,
    X, Y,
    batch_size=BACKWARD_BATCH_SIZE,
    factor_batch_size=FACTOR_BATCH_SIZE,
    init="jacobian",
    d_proj=DIM_OUTPUT_PROJECTION,
    input_scale=INPUT_SCALE,
    max_iters=NUM_ITERS,
    beta=1.0,
)
print(f"U shape: {U.shape}, V shape: {V.shape}")

In [None]:
from matplotlib import pyplot as plt
plt.plot(exp_dct.objective_values)

In [None]:
with torch.no_grad():
    simu = (U.t() @ U)
    simu = simu[torch.triu(torch.ones_like(simu), diagonal=1).bool()]
import seaborn as sns
sns.displot(simu.float().cpu())

In [None]:
with torch.no_grad():
    simv = (V.t() @ V)
    simv = simv[torch.triu(torch.ones_like(simv), diagonal=1).bool()]
import seaborn as sns
sns.displot(simv.float().cpu())

In [None]:
model_inputs = tokenizer(EXAMPLES[:1], return_tensors="pt").to(DEVICE)
generated_ids = model.generate(**model_inputs, max_new_tokens=64, do_sample=False)
completion = tokenizer.batch_decode(generated_ids,
                       skip_special_tokens=True)[0]
print(completion)

In [None]:
slice_to_end = dct.SlicedModel(model, start_layer=SOURCE_LAYER_IDX, end_layer=model.config.num_hidden_layers-1,
                               layers_name="model.layers")
delta_acts_end_single = dct.DeltaActivations(slice_to_end)

In [None]:
SORRY_TOKEN = tokenizer.encode("Sorry", add_special_tokens=False)[0]
SURE_TOKEN = tokenizer.encode("Sure", add_special_tokens=False)[0]
with torch.no_grad():
    target_vec = model.lm_head.weight.data[SURE_TOKEN,:] - model.lm_head.weight.data[SORRY_TOKEN,:]

In [None]:
scores, indices = exp_dct.rank(delta_acts_end_single, X, Y, target_vec=target_vec,
                               batch_size=FORWARD_BATCH_SIZE, factor_batch_size=FACTOR_BATCH_SIZE)

In [None]:
import seaborn as sns
sns.displot(scores.cpu())

In [None]:
model_editor = dct.ModelEditor(model, layers_name="model.layers")

In [None]:
from torch import nn

In [None]:
NUM_EVAL = 64
MAX_NEW_TOKENS = 16

In [None]:
V_rand = torch.nn.functional.normalize(torch.randn(d_model, NUM_EVAL), dim=0)
completions = []
prompt = EXAMPLES[0]
for i in tqdm(range(NUM_EVAL)):
    model_editor.restore()
    model_editor.steer(INPUT_SCALE*V_rand[:,i], SOURCE_LAYER_IDX)
    generated_ids = model.generate(**model_inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
    completion = tokenizer.batch_decode(generated_ids,
                           skip_special_tokens=True)[0]
    completions.append(completion)

In [None]:
for i in range(NUM_EVAL):
    print("====Random Vector %d, Positive :=========\n" % i)
    print(completions[i])

In [None]:
MAX_NEW_TOKENS = 128
from torch import nn

In [None]:
model_editor.restore()
completions = []
prompt = EXAMPLES[0]

indices = list(range(min(NUM_EVAL, NUM_FACTORS)))

for factor_idx in tqdm(indices):
    model_editor.restore()
    model_editor.steer(INPUT_SCALE*V[:,factor_idx], SOURCE_LAYER_IDX)
    generated_ids = model.generate(**model_inputs, max_new_tokens=MAX_NEW_TOKENS, do_sample=False)
    completion = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    completions.append(completion)

In [None]:
for i in range(len(indices)):
    print("====Steered by vector %d=========\n" % i)
    print(completions[i])