### `machine-similarities.ipynb`
This notebook creates machine-generated similarity values for any given grouping of words, for any HookedTransformer model. The intended purpose of the output is to feed into the `ConceptSimRSA.ipynb` in `/notebooks`, which takes in WordSim.txt format style similarity values, and runs the RSA pipeline accordingly.

#### Import necessary dependencies and libraries

In [1]:
%pip install transformer_lens

import torch
import torch.nn.functional as torch_f
import numpy as np
import re

import transformer_lens.utils as utils
from transformer_lens.hook_points import (
    HookPoint,
)
from transformer_lens import HookedTransformer



#### Customizable setup

In [2]:
# Customizeable pairings
fish = ["salmon", "tuna", "haddock", "eel", "goldfish", "koi", "shark", "mackerel"]
trees = ["pine", "redwood", "oak", "maple", "birch", "willow"]

In [3]:
# Select model
device = utils.get_device()
model = HookedTransformer.from_pretrained("Qwen/Qwen2.5-3B-Instruct", device=device)

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]



Loaded pretrained model Qwen/Qwen2.5-3B-Instruct into HookedTransformer


#### Build similarity matricies for word groups


In [12]:
def lm_similarity(model, w1, w2):
  prompt = (
      f"On a scale from 1 to 10, how similar are '{w1}' and '{w2}'? Answer with a single number."
  )

  tokens = model.to_tokens(prompt, prepend_bos=True)
  answer = model.generate(tokens, max_new_tokens=4)
  str_answer = model.to_string(answer[0][tokens.shape[-1]:])

  # Look for the first occurence of some integer in [1,10]
  m = re.search(r"\b(10|[1-9])\b", str_answer)
  if not m:
    print("DEBUG: Fallback")
    return 5.0

  return float(m.group(1))

In [13]:
def get_sim_mat(words, iterations=3):
  lines = {}

  for _ in range(iterations):
    for i in range(len(words)):
      for j in range(i + 1, len(words)):
        w1, w2 = words[i], words[j]
        sim = lm_similarity(model, w1, w2)
        lines[(w1,w2)] = lines.get((w1,w2), 0) + sim

  for k, v in lines.items():
    lines[k] = round(v / iterations, 4)

  return lines

#### Results


In [None]:
fish_sim_mat = get_sim_mat(fish)
trees_sim_mat = None

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

DEBUG: Fallback


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
# Export results as similar format to WordSim.txt
# to pipeline process -> ConceptRSA.ipynb

fish_sim_mat

file_name = "fish_sim.txt"

with open(file_name, "w") as f:
  for (w1, w2), score in fish_sim_mat.items():
    f.write(f"{w1} {w2} {score}\n")

print("successfully saved to", file_name)

successfully saved to fish_sim.txt
