In [31]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
import sys
sys.path.append('..')

import torch
import matplotlib.pyplot as plt
from src import models, data
from tqdm.auto import tqdm
import json
import os
import numpy as np
import copy

from src import models, data, operators, functional, lens

In [33]:
mt = models.load_model(name='gptj', fp16=True, device='cuda')

In [34]:
dataset = data.load_dataset()
relation_name = "country capital city"
relation = dataset.filter(
    relation_names=[relation_name],
)[0]

In [35]:
train, test = relation.split(train_size=5)

In [36]:
_range = list(relation.range) + [sample.object for sample in train.samples]

np.random.seed(123456)
np.random.choice(_range, len(_range)//2, replace=False)

array(['Canberra', 'Riyadh', 'Berlin', 'New Delhi', 'Cairo', 'Islamabad',
       'Ankara', 'Beijing', 'Lima', 'Islamabad', 'Mexico City',
       'Canberra', 'Tokyo', 'Madrid'], dtype='<U15')

In [37]:
corner = functional.corner_gd(
    mt = mt,
    words = _range,
)

In [38]:
offset_estimator = operators.OffsetEstimatorBaseline(
    mt = mt,
    h_layer=15,
)
offset_operator = offset_estimator(relation)

relation has > 1 prompt_templates, will use first (The capital city of {} is)


In [39]:
lens.logit_lens(
    mt = mt,
    h = offset_operator.bias,
    # get_proba=True
)

([(' Moscow', 96.562),
  (' Paris', 95.938),
  (' Madrid', 94.312),
  (' Beijing', 94.188),
  (' Tokyo', 93.75),
  (' Berlin', 93.438),
  (' London', 89.875),
  (' Washington', 89.688),
  (' Cairo', 88.875),
  (' Rome', 87.75)],
 {})

In [40]:
estimator = operators.JacobianIclMeanEstimator(
    mt = mt,
    h_layer=15,
)

operator = estimator(relation = train)

relation has > 1 prompt_templates, will use first (The capital city of {} is)


In [41]:
operator.prompt_template

'<|endoftext|>The capital city of Turkey is Ankara\nThe capital city of Colombia is Bogot\\u00e1\nThe capital city of Pakistan is Islamabad\nThe capital city of Italy is Rome\nThe capital city of Australia is Canberra\nThe capital city of {} is'

In [42]:
from src import editors
from src.utils import dataclasses_utils

BASELINE_EDITOR_TYPES = {
    editors.HiddenBaselineEditor,
    editors.EmbedBaselineEditor,
}

svd = torch.svd(operator.weight.float())

In [43]:
for editor_class in BASELINE_EDITOR_TYPES:
    # editor_class = BASELINE_EDITOR_TYPES[editor_type]
    print(editor_class)
    editor = dataclasses_utils.create_with_optional_kwargs(
        editor_class,
        h_layer=10,
        rank=100,
        lre=operator,
        svd=svd,
        prompt_template=" {} :",
        mt=mt,
        n_samples=1,
        n_new_tokens=1,
    )

<class 'src.editors.HiddenBaselineEditor'>
<class 'src.editors.EmbedBaselineEditor'>


In [44]:
import inspect

inspect.getfullargspec(editors.HiddenBaselineEditor.__call__)

FullArgSpec(args=['self', 'subject', 'target'], varargs=None, varkw='kwargs', defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={'return': <class 'src.editors.LinearRelationEditResult'>, 'subject': <class 'str'>, 'target': <class 'str'>, 'kwargs': typing.Any})

In [46]:
hs_by_subj, zs_by_subj = functional.compute_hs_and_zs(
    mt=mt,
    prompt_template=" {} :",
    subjects=[x.subject for x in test.samples],
    h_layer=[10, 12, 15],
    z_layer=-1,
    batch_size=8,
    examples=train.samples,
)

In [50]:
hs_by_subj["United States"]

{10: tensor([ 0.0481,  0.7690,  0.6006,  ...,  2.2207,  1.8809, -0.6162],
        device='cuda:0', dtype=torch.float16),
 12: tensor([-0.0894,  1.6426,  0.0806,  ...,  1.2031,  3.1211, -0.7783],
        device='cuda:0', dtype=torch.float16),
 15: tensor([ 0.1697,  1.0459,  0.4585,  ...,  0.7285,  4.1094, -2.3164],
        device='cuda:0', dtype=torch.float16)}