In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('..')

import torch
import matplotlib.pyplot as plt
from src import models, data
from tqdm.auto import tqdm
import json
import os
import numpy as np
import copy

In [3]:
device = "cuda:0"
mt = models.load_model("gptj", device=device)
print(
    f"dtype: {mt.model.dtype}, device: {mt.model.device}, memory: {mt.model.get_memory_footprint()}"
)

dtype: torch.float16, device: cuda:0, memory: 12219206136


In [4]:
from src.lens import interpret_logits, logit_lens
from src.functional import untuple

prompt = "Prudential Tower is located in the city of"
tokenized = mt.tokenizer(prompt, return_tensors="pt", padding=True).to(mt.model.device)

import baukit

with baukit.TraceDict(
    mt.model,
    models.determine_layer_paths(mt)
) as traces:
    output = mt.model(**tokenized)
    
interpret_logits(mt, output.logits[0][-1], get_proba=True)

[(' Chicago', 0.51),
 (' Newark', 0.174),
 (' Boston', 0.069),
 (' San', 0.031),
 (' Houston', 0.014),
 (' Minneapolis', 0.011),
 (' Jersey', 0.011),
 (' London', 0.011),
 (' Detroit', 0.01),
 (' Baltimore', 0.009)]

In [5]:
interested_words = [" Seattle", " Paris", " Dhaka"]
int_tokenized = mt.tokenizer(interested_words, return_tensors="pt", padding=True).to(
    mt.model.device
)
int_tokenized.input_ids

z = untuple(traces[models.determine_layer_paths(mt)[-1]].output)[0][-1]
print(z.shape)

logit_lens(mt, z, [t[0] for t in int_tokenized.input_ids], get_proba=True)

torch.Size([4096])


([(' Chicago', 0.514),
  (' Newark', 0.172),
  (' Boston', 0.068),
  (' San', 0.031),
  (' Houston', 0.014),
  (' Minneapolis', 0.011),
  (' Jersey', 0.011),
  (' London', 0.011),
  (' Detroit', 0.01),
  (' Baltimore', 0.009)],
 {tensor(7312, device='cuda:0'): (0.0017547607421875, ' Seattle'),
  tensor(6342, device='cuda:0'): (3.3974647521972656e-06, ' Paris'),
  tensor(20529, device='cuda:0'): (9.1552734375e-05, ' Dh')})

### $F(\mathbf{h_{s}}) = \mathbf{h_{s}}$, set $W_{r} = I$ and $bias = \mathbf{0}$, basically logit lens

In [6]:
from src.operators import LinearRelationOperator

logit_lens_operator = LinearRelationOperator(
    mt = mt, 
    h_layer = -1,
    weight = None, bias = None, # basically logit lens if both weight and bias set to None
    prompt_template="{} is located in the city of",
    z_layer = -1,
)

In [7]:
logit_lens_operator(
    subject = "The Space Needle",
    k = 10,
    h = z
)

LinearRelationOutput(predictions=[PredictedToken(token=' Chicago', prob=0.5139787793159485), PredictedToken(token=' Newark', prob=0.17216132581233978), PredictedToken(token=' Boston', prob=0.06848104298114777), PredictedToken(token=' San', prob=0.030866824090480804), PredictedToken(token=' Houston', prob=0.013804498128592968), PredictedToken(token=' Minneapolis', prob=0.011266903951764107), PredictedToken(token=' Jersey', prob=0.011092226952314377), PredictedToken(token=' London', prob=0.010501908138394356), PredictedToken(token=' Detroit', prob=0.010178797878324986), PredictedToken(token=' Baltimore', prob=0.00850470457226038)], h=tensor([-1.4648,  0.7959, -0.9663,  ..., -0.6025,  0.8594, -4.9844],
       device='cuda:0', dtype=torch.float16, grad_fn=<SelectBackward0>), z=tensor([-1.4648,  0.7959, -0.9663,  ..., -0.6025,  0.8594, -4.9844],
       device='cuda:0', dtype=torch.float16, grad_fn=<SelectBackward0>))

In [8]:
unembedding = baukit.nethook.get_module(mt.model, "lm_head")
unembedding.weight.shape

torch.Size([50400, 4096])

In [10]:
subject = " Chicago"
emb_subject = unembedding.weight[mt.tokenizer(subject).input_ids[0]]
logit_lens_operator(
    subject = "Whatever",
    k = 10,
    h = emb_subject
)

LinearRelationOutput(predictions=[PredictedToken(token=' Chicago', prob=1.0), PredictedToken(token='Chicago', prob=6.665581192860808e-23), PredictedToken(token=' Detroit', prob=3.3952676397964373e-28), PredictedToken(token=' Illinois', prob=2.333531109663677e-28), PredictedToken(token=' Boston', prob=5.542621749889874e-29), PredictedToken(token=' Milwaukee', prob=4.0150617628607155e-30), PredictedToken(token=' Philadelphia', prob=3.1269332749387515e-30), PredictedToken(token=' Seattle', prob=6.157297243929202e-31), PredictedToken(token=' Toronto', prob=3.096083083978257e-31), PredictedToken(token=' Atlanta', prob=1.3738785357297722e-31)], h=tensor([-0.0243, -0.0335, -0.0092,  ..., -0.0075,  0.0104,  0.0171],
       device='cuda:0', dtype=torch.float16, grad_fn=<SelectBackward0>), z=tensor([-0.0243, -0.0335, -0.0092,  ..., -0.0075,  0.0104,  0.0171],
       device='cuda:0', dtype=torch.float16, grad_fn=<SelectBackward0>))

### Loading dataset

In [13]:
from src.data import load_dataset
dataset = load_dataset()
cur_relation = [
    d for d in dataset if d.name == "country capital city"
][0]
train, test = cur_relation.split(size = 10)
len(train.samples), len(test.samples)

(10, 14)

### ICL-Mean, our flagship method

In [16]:
from src.operators import JacobianIclMeanEstimator

mean_estimator = JacobianIclMeanEstimator(
    mt = mt,
    h_layer = 12,
    beta= 0.5
)

icl_mean = mean_estimator(train)

relation has > 1 prompt_templates, will use first (The capital city of {} is)


### Learned Linear Model baseline

In [22]:
from src.operators import LearnedLinearEstimatorBaseline
    
learned_estimator = LearnedLinearEstimatorBaseline(
    mt=mt,
    h_layer=15,
)

learned_operator = learned_estimator(train)

relation has > 1 prompt_templates, will use first (The capital city of {} is)


### Offset Model (a simpler version of the `corner` approach)

In [40]:
from src.operators import OffsetEstimatorBaseline
    
offset_estimator = OffsetEstimatorBaseline(
    mt=mt,
    h_layer=15,
    # scaling_factor=70
)

offset_operator = offset_estimator(train)

relation has > 1 prompt_templates, will use first (The capital city of {} is)


In [41]:
subject = "France"

print(learned_operator(subject).predictions)
print(icl_mean(subject).predictions)
print(offset_operator(subject).predictions)

[PredictedToken(token=' Madrid', prob=0.050866398960351944), PredictedToken(token=' \\', prob=0.03183137997984886), PredictedToken(token='\\', prob=0.03013734519481659), PredictedToken(token=' Sacramento', prob=0.018859468400478363), PredictedToken(token=' incl', prob=0.015881264582276344)]
[PredictedToken(token=' Paris', prob=0.9226482510566711), PredictedToken(token=' French', prob=0.04248381406068802), PredictedToken(token=' France', prob=0.014682012610137463), PredictedToken(token=' Franc', prob=0.008497295901179314), PredictedToken(token=' Buenos', prob=0.0010470723500475287)]
[PredictedToken(token=' Paris', prob=0.9273013472557068), PredictedToken(token=' Moscow', prob=0.03173050656914711), PredictedToken(token=' Berlin', prob=0.012425845488905907), PredictedToken(token=' London', prob=0.007304777856916189), PredictedToken(token=' Tokyo', prob=0.007080032490193844)]


In [51]:
train.sample_pair_with_different_answers()

(RelationSample(subject='Canada', object='Ottawa'),
 RelationSample(subject='Colombia', object='Bogot\\u00e1'))

In [46]:
list((i for i in range(3)))

[0, 1, 2]