In [1]:
%pip install transformers_stream_generator plotly circuitsvis huggingface_hub einops tiktoken datasets

Collecting transformers_stream_generator
  Using cached transformers-stream-generator-0.0.5.tar.gz (13 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting plotly
  Downloading plotly-6.5.2-py3-none-any.whl.metadata (8.5 kB)
Collecting circuitsvis
  Using cached circuitsvis-1.43.3-py3-none-any.whl.metadata (983 bytes)
Collecting huggingface_hub
  Downloading huggingface_hub-1.3.3-py3-none-any.whl.metadata (13 kB)
Collecting einops
  Using cached einops-0.8.1-py3-none-any.whl.metadata (13 kB)
Collecting tiktoken
  Downloading tiktoken-0.12.0-cp312-cp312-win_amd64.whl.metadata (6.9 kB)
Collecting datasets
  Downloading datasets-4.5.0-py3-none-any.whl.metadata (19 kB)
Collecting transformers>=4.26.1 (f

In [5]:
# Janky code to do different setup when run in a Colab notebook vs VSCode
DEVELOPMENT_MODE = False
try:
    import google.colab
    IN_COLAB = True
    print("Running as a Colab notebook")
    %pip install git+https://github.com/TransformerLensOrg/TransformerLens.git
    %pip install circuitsvis
    
    # PySvelte is an unmaintained visualization library, use it as a backup if circuitsvis isn't working
    # # Install another version of node that makes PySvelte work way faster
    # !curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -; sudo apt-get install -y nodejs
    # %pip install git+https://github.com/neelnanda-io/PySvelte.git
except:
    IN_COLAB = False
    print("Running as a Jupyter notebook - intended for development only!")
    from IPython import get_ipython

    ipython = get_ipython()
    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel
    ipython.run_line_magic("load_ext", "autoreload")
    ipython.run_line_magic("autoreload", "2")

Running as a Jupyter notebook - intended for development only!
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh
import plotly.io as pio
if IN_COLAB or not DEVELOPMENT_MODE:
    pio.renderers.default = "colab"
else:
    pio.renderers.default = "notebook_connected"
print(f"Using renderer: {pio.renderers.default}")

Using renderer: colab


In [12]:
%cd ~/TransformerLens
import torch
torch.set_grad_enabled(False)

from transformers import AutoTokenizer
from transformer_lens import HookedTransformer
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

from functools import partial

C:\Users\chris\TransformerLens


In [13]:
def assert_hf_and_tl_model_are_close(
    hf_model,
    tl_model,
    tokenizer,
    prompt="This is a prompt to test out",
    atol=1e-3,
):
    prompt_toks = tokenizer(prompt, return_tensors="pt").input_ids

    hf_logits = hf_model(prompt_toks.to(hf_model.device)).logits
    tl_logits = tl_model(prompt_toks).to(hf_logits)

    assert torch.allclose(torch.softmax(hf_logits, dim=-1), torch.softmax(tl_logits, dim=-1), atol=atol)

## Qwen, first generation

In [None]:
model_path = "Qwen/Qwen-1_8B-Chat"
device = "cuda"

tokenizer = AutoTokenizer.from_pretrained(
    model_path,
    trust_remote_code=True
)

hf_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    fp32=True,
    use_logn_attn=False,
    use_dynamic_ntk = False,
    scale_attn_weights = False,
    trust_remote_code = True
).eval()

tl_model = HookedTransformer.from_pretrained_no_processing(
    model_path,
    device=device,
    fp32=True,
    dtype=torch.float32,
).to(device)

assert_hf_and_tl_model_are_close(hf_model, tl_model, tokenizer)

MissingSchema: Invalid URL 'hf-mirror.com/Qwen/Qwen-1_8B-Chat/resolve/main/tokenizer_config.json': No scheme supplied. Perhaps you meant https://hf-mirror.com/Qwen/Qwen-1_8B-Chat/resolve/main/tokenizer_config.json?

: 

## Qwen, new generation

In [7]:
model_path = "Qwen/Qwen1.5-1.8B-Chat"
device = "cuda"

tokenizer = AutoTokenizer.from_pretrained(
    model_path,
)

hf_model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
).eval()

tl_model = HookedTransformer.from_pretrained_no_processing(
    model_path,
    device=device,
    dtype=torch.float32,
).to(device)

assert_hf_and_tl_model_are_close(hf_model, tl_model, tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loaded pretrained model Qwen/Qwen1.5-1.8B-Chat into HookedTransformer
Moving model to device:  cuda
