In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
import json
from tqdm import tqdm

In [3]:
import os
import sys
sys.path.append('..')

from relations import estimate
from util import model_utils
from dsets.counterfact import CounterFactDataset

In [4]:
MODEL_NAME = "gpt2-xl"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B
mt = model_utils.ModelAndTokenizer(MODEL_NAME, low_cpu_mem_usage=False)

model = mt.model
tokenizer = mt.tokenizer
tokenizer.pad_token = tokenizer.eos_token

In [5]:
subject_arr = [
    "The Space Needle",
    "The Eiffel Tower",
    "The Great Wall",
    "Niagara Falls",
    "Taj Mahal"
]

relation = "{} is located in the country of"

prompt = [relation.format(sub) for sub in subject_arr]

txt, ret_dict = model_utils.generate_fast(
    model, tokenizer,
    prompt,
    argmax_greedy = False,
    max_out_len= 20,
    # debug=True,
    get_answer_tokens=True,
)

model_utils.print_formatted_results(prompt, txt, ret_dict)

The Space Needle is located in the country of
The Space Needle is located in the country of Seattle, in the city of Seattle. The Space
p(answer):  p(' Washington'[2669])=0.1645, p(' Seattle'[7312])=0.106, p(' the'[262])=0.0764, p(' Japan'[2869])=0.0416, p(' Canada'[3340])=0.0377

The Eiffel Tower is located in the country of
The Eiffel Tower is located in the country of France. The Eiffel Tower is the
p(answer):  p(' France'[4881])=0.9608, p(' Paris'[6342])=0.0046, p(' the'[262])=0.003, p(' Belgium'[15664])=0.0029, p(' French'[4141])=0.0029

The Great Wall is located in the country of
The Great Wall is located in the country of China, but is also part of the country's "
p(answer):  p(' China'[2807])=0.4976, p(' the'[262])=0.0259, p(' Xin'[25426])=0.0201, p(' Yun'[20757])=0.015, p(' J'[449])=0.0141

Niagara Falls is located in the country of
Niagara Falls is located in the country of Ontario, in the province of Ontario.  The
p(answer):  p(' Ontario'[10553])=0.3898, p(' Canada'[3340])=0.

In [6]:
is_located_in = estimate.estimate_relation_operator(
    model,
    tokenizer,
    "The Space Needle",
    "{} is located in the country of",
    layer=25,
    device=model.device,
)

is_located_in.misc

{'Jh_norm': 44.985111236572266,
 'bias_norm': 372.6827087402344,
 'h_info': {'h_index': 3, 'token_id': 293, 'token': 'le'},
 'consider_residual': False}

In [22]:
is_located_in_res = estimate.estimate_relation_operator(
    model,
    tokenizer,
    "The Space Needle",
    "{} is located in the country of",
    layer=25,
    # consider_residual= True,
    # approximate_rank= 1,
    device=model.device,
    calculate_at_lnf= True,
)

is_located_in_res.misc

{'Jh_norm': 6.443312644958496,
 'bias_norm': 13.347543716430664,
 'h_info': {'h_index': 3, 'token_id': 293, 'token': 'le'},
 'consider_residual': False}

In [17]:
test_cases = [
    ("The Great Wall", -1, "China"),
    ("Niagara Falls", -2, "Canada"),
    ("Valdemarsvik", -1, "Sweden"),
    ("Kyoto University", -2, "Japan"),
    ("Hattfjelldal", -1, "Norway"),
    ("Ginza", -1, "Japan"),
    ("Sydney Hospital", -2, "Australia"),
    ("Mahalangur Himal", -1, "Nepal"),
    ("Higashikagawa", -1, "Japan"),
    ("Trento", -1, "Italy"),
    ("Taj Mahal", -1, "India")
]

In [18]:
for subject, subject_token_index, target in test_cases:
    objects = is_located_in(
        subject,
        subject_token_index=subject_token_index,
        device=model.device,
        return_top_k=5,
    )
    print(f"{subject}, target: {target}   ==>   predicted: {objects}")

The Great Wall, target: China   ==>   predicted: [' China', ' Hong', ' Beijing', ' Chinese', ' Shen']
Niagara Falls, target: Canada   ==>   predicted: [' Canada', ' Ontario', ' Niagara', ' New', ' British']
Valdemarsvik, target: Sweden   ==>   predicted: [' Iceland', ' Denmark', ' Sweden', ' Finland', ' Norway']
Kyoto University, target: Japan   ==>   predicted: [' Japan', ' Japanese', ' Finland', ' Hawaii', ' Tokyo']
Hattfjelldal, target: Norway   ==>   predicted: [' Iceland', ' Denmark', ' Norway', ' Sweden', ' Finland']
Ginza, target: Japan   ==>   predicted: [' Japan', ' Singapore', ' China', ' Seattle', ' Hong']
Sydney Hospital, target: Australia   ==>   predicted: [' Australia', ' Sydney', ' Australian', ' Singapore', ' Canberra']
Mahalangur Himal, target: Nepal   ==>   predicted: [' Nepal', ' Tibet', ' Bh', ' Nep', ' China']
Higashikagawa, target: Japan   ==>   predicted: [' Japan', ' Japanese', ' Tokyo', ' Canada', ' Seattle']
Trento, target: Italy   ==>   predicted: [' Sweden'

In [24]:
for subject, subject_token_index, target in test_cases:
    objects = is_located_in_res(
        subject,
        subject_token_index=subject_token_index,
        device=model.device,
        return_top_k=5,
    )
    print(f"{subject}, target: {target}   ==>   predicted: {objects}")

The Great Wall, target: China   ==>   predicted: [' China', ' Shen', ' Chinese', ' Qing', ' Beijing']
Niagara Falls, target: Canada   ==>   predicted: [' Niagara', 'Toronto', ' Ontario', ' Cuomo', ' Erie']
Valdemarsvik, target: Sweden   ==>   predicted: [' Nordic', ' Greenland', 'vik', ' Swedish', ' Icelandic']
Kyoto University, target: Japan   ==>   predicted: ['Japanese', ' Japanese', 'Tok', 'Japan', ' Japan']
Hattfjelldal, target: Norway   ==>   predicted: [' Nordic', ' Denmark', ' Iceland', ' Scandinavian', ' Icelandic']
Ginza, target: Japan   ==>   predicted: [' Tokyo', 'Tok', ' Japan', 'Japan', ' Japanese']
Sydney Hospital, target: Australia   ==>   predicted: [' Sydney', ' NSW', ' Australia', ' Australian', 'Australia']
Mahalangur Himal, target: Nepal   ==>   predicted: [' Himal', ' Tibetan', ' Nepal', ' Nep', ' Tibet']
Higashikagawa, target: Japan   ==>   predicted: [' Japanese', 'Japanese', ' Japan', ' Tokyo', 'Tok']
Trento, target: Italy   ==>   predicted: ['meta', ' Budapest

In [47]:
# operator = estimate.RelationOperator(
#         model = model, tokenizer= tokenizer,
#         layer = 25, relation = relation,
#         weight= is_located_in_res.weight, 
#         bias= is_located_in_res.bias,
#         consider_residual = True,
#     )

# for subject, subject_token_index, target in test_cases:
#     objects = operator(
#         subject,
#         subject_token_index=subject_token_index,
#         device=model.device,
#         return_top_k=5,
#     )
#     print(f"{subject}, target: {target}   ==>   predicted: {objects}")

The Great Wall, target: China   ==>   predicted: [' China', ' Japan', ' Russia', ' Kazakhstan', ' Mongolia']
Niagara Falls, target: Canada   ==>   predicted: [' Sweden', ' Finland', ' Iceland', ' Canada', ' Denmark']
Valdemarsvik, target: Sweden   ==>   predicted: [' Denmark', ' Sweden', ' Finland', ' Iceland', ' Norway']
Kyoto University, target: Japan   ==>   predicted: [' Japan', ' China', ' Finland', ' Sweden', ' the']
Hattfjelldal, target: Norway   ==>   predicted: [' Sweden', ' Iceland', ' Finland', ' Denmark', ' Norway']
Ginza, target: Japan   ==>   predicted: [' Japan', ' China', ' Finland', ' the', ' Sweden']
Sydney Hospital, target: Australia   ==>   predicted: [' Japan', ' China', ' Kazakhstan', ' Australia', ' Singapore']
Mahalangur Himal, target: Nepal   ==>   predicted: [' China', ' Kazakhstan', ' Bh', ' Japan', ' Iceland']
Higashikagawa, target: Japan   ==>   predicted: [' Japan', ' China', ' the', ' Yuk', ' Washington']
Trento, target: Italy   ==>   predicted: [' Sweden

In [43]:
# relation_operator = estimate.RelationOperator(
#     model = model, tokenizer= tokenizer,
#     layer = 20, relation = relation,
#     weight= is_located_in.weight, 
#     bias= is_located_in.bias
# )

# for subject, subject_token_index, target in test_cases:
#     objects = relation_operator(
#         subject,
#         subject_token_index=subject_token_index,
#         device=model.device,
#         return_top_k=5,
#     )
#     print(f"{subject}, target: {target}   ==>   predicted: {objects}")

In [6]:
########################################################
relation_id = "P17"
layer = 25
relation = "{} is located in the country of"
cache_path = "cached_jacobians"

consider_residual = True
approximate_rank = 3
#######################################################

In [34]:
# counterfact = CounterFactDataset("../data/")

# cf_relation = [c['requested_rewrite'] for c in counterfact if c["requested_rewrite"]["relation_id"] == relation_id]
# print("filtered >>> ", len(cf_relation))

In [35]:
# correct_predict = []

# for c in cf_relation:
#     print(c['subject'], " target: ", c['target_true']['str'], end = ", ")
#     txt, ret_dict = model_utils.generate_fast(
#         model, tokenizer,
#         [relation.format(c['subject'])],
#         argmax_greedy = True,
#         max_out_len= 20,
#         get_answer_tokens=True,
#     )
#     answer = ret_dict['answer'][0]['top_token']
#     print("predict: ", answer, end = " >>>> ")

#     ok = answer.strip() == c['target_true']['str']
#     print(ok)

#     if(ok):
#         correct_predict.append(c)

# with open(f"correct_prediction_{relation_id}__{MODEL_NAME}", "w") as f:
#     json.dump(correct_predict, f)


In [36]:
# for c in correct_predict:
#     print(c['subject'], " target: ", c['target_true']['str'])

In [37]:
with open(f"correct_prediction_{relation_id}__{MODEL_NAME}.json") as f:
    correct_predict = json.load(f)

In [39]:
calculated_relations = []

for c in tqdm(correct_predict):
    cur_calculation = estimate.estimate_relation_operator(
        model, tokenizer,
        c["subject"],
        relation,
        layer=layer,
        device=model.device,
        consider_residual = True,
        approximate_rank = approximate_rank
    )

    # print(cur_calculation.misc)

    calculated_relations.append({
        "subject": c["subject"],
        "request": c,
        "weight": cur_calculation.weight.cpu().numpy(),
        "bias": cur_calculation.bias.cpu().numpy(),
        "misc": cur_calculation.misc
    })

100%|██████████| 266/266 [17:10<00:00,  3.87s/it]


In [40]:
np.savez(
    f"{cache_path}/residual_jacobian_calculations_{relation_id}__layer_{layer}__approx_rank_{approximate_rank}.npz", 
    jacobians = calculated_relations, 
    allow_pickle = True
)

In [51]:
# calculated_relations = np.load(f"jacobian_calculations_P17__layer_20.npz", allow_pickle = True)["jacobians"]

In [52]:
# weights_collection = [torch.tensor(t["weight"], device=model.device) for t in calculated_relations]
# bias_collection = [torch.tensor(t["bias"], device=model.device) for t in calculated_relations]

In [53]:
# weights = torch.stack(weights_collection).mean(dim = 0)
# bias = torch.stack(bias_collection).mean(dim = 0)

In [49]:
for calc in calculated_relations:
    print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
    print(f'(s = {calc["request"]["subject"]}, r = {relation} [{relation_id}], o = {calc["request"]["target_true"]["str"]})')
    print(calc["misc"])
    print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")

    is_located_in = estimate.RelationOperator(
        model = model, tokenizer= tokenizer,
        layer = layer, relation = relation,
        weight= torch.tensor(calc['weight'], device = model.device), 
        bias= torch.tensor(calc['bias'], device=model.device),
        consider_residual = consider_residual,
    )

    for subject, subject_token_index, target in test_cases:
        objects = is_located_in(
            subject,
            subject_token_index=subject_token_index,
            device=model.device,
            return_top_k=5,
        )
        print(f"{subject}, target: {target}   ==>   predicted: {objects}")
    print()
    print()

zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
(s = Autonomous University of Madrid, r = {} is located in the country of [P17], o = Spain)
{'Jh_norm': tensor(172.5770, device='cuda:0'), 'bias_norm': 394.9103088378906, 'h_info': {'h_index': 4, 'token_id': 14708, 'token': ' Madrid'}, 'consider_residual': (True, 3, tensor(43.3716, device='cuda:0'))}
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
The Great Wall, target: China   ==>   predicted: [' Spain', ' And', ' Gran', ' the', ' Cast']
Niagara Falls, target: Canada   ==>   predicted: [' Spain', ' And', ' the', ' Ar', ' Gran']
Valdemarsvik, target: Sweden   ==>   predicted: [' Spain', ' And', ' Gran', ' the', ' Ar']
Kyoto University, target: Japan   ==>   predicted: [' Spain', ' the', ' And', ' Gran', ' Se']
Hattfjelldal, target: Norway   ==>   predicted: [' And', ' Se', ' Spain', ' Ar', ' the']
Ginza, targe

In [31]:
wh_norms = np.array([calc["misc"]['Jh_norm'] for calc in calculated_relations])
print(f"`Jh` contribution = {wh_norms.mean()} +/- {wh_norms.std()}") 

bias_norms = np.array([calc["misc"]['bias_norm'] for calc in calculated_relations])
print(f"`bias` contribution = {bias_norms.mean()} +/- {bias_norms.std()}") 

`Jh` contribution = 17.88306264859393 +/- 10.812521437625106
`bias` contribution = 449.51012426390685 +/- 50.287596469456304


In [58]:
calc["misc"]['bias_norm']

424.5807800292969

In [64]:
wh_norms = np.array([calc["misc"]['Jh_norm'].item() for calc in calculated_relations])
print(f"`h + Jh` contribution = {wh_norms.mean()} +/- {wh_norms.std()}") 

bias_norms = np.array([calc["misc"]['bias_norm'] for calc in calculated_relations])
print(f"`bias` contribution = {bias_norms.mean()} +/- {bias_norms.std()}") 

`h + Jh` contribution = 176.67704606594 +/- 211.29306737394754
`bias` contribution = 451.78081724697483 +/- 204.08395684143662


In [66]:
calculated_relation_collections = []

for c in tqdm(correct_predict):
    relation_collection = estimate.estimate_relation_operator__for_all_subject_tokens(
        model, tokenizer,
        c["subject"],
        relation,
        layer=layer,
        device=model.device,
        consider_residual = True,
        approximate_rank = approximate_rank
    )

    calculated_relation_collections.append({
        "subject": c["subject"],
        "request": c,
        "all_weights_and_biases": [
            {
                "weight": calc.weight.cpu().numpy(),
                "bias": calc.bias.cpu().numpy(),
                "misc": calc.misc
            } for calc in relation_collection
        ]
    })

100%|██████████| 266/266 [1:24:53<00:00, 19.15s/it]


In [67]:
np.savez(
    f"{cache_path}/residual_jacobian_calculations__all_sub_toks__{relation_id}__layer_{layer}__approx_rank_{approximate_rank}.npz", 
    jacobians = calculated_relation_collections, 
    allow_pickle = True
)

In [9]:
calculated_relation_collections = np.load(
    f"{cache_path}/residual_jacobian_calculations__all_sub_toks__{relation_id}__layer_{layer}__approx_rank_{approximate_rank}.npz", 
    allow_pickle = True
)["jacobians"]

In [12]:
for cur_operators in calculated_relation_collections:
    print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
    print(f'(s = {cur_operators["request"]["subject"]}, r = {relation} [{relation_id}], o = {cur_operators["request"]["target_true"]["str"]})')
    print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
    for calc in cur_operators["all_weights_and_biases"]:
        print("----------------------------------------------------------------------------------------------------")
        print(calc["misc"])
        print("----------------------------------------------------------------------------------------------------")

        is_located_in = estimate.RelationOperator(
            model = model, tokenizer= tokenizer,
            layer = layer, relation = relation,
            weight= torch.tensor(calc['weight'], device = model.device), 
            bias= torch.tensor(calc['bias'], device=model.device)
        )

        for subject, subject_token_index, target in test_cases:
            objects = is_located_in(
                subject,
                subject_token_index=subject_token_index,
                device=model.device,
                return_top_k=5,
            )
            print(f"{subject}, target: {target}   ==>   predicted: {objects}")
        print("----------------------------------------------------------------------------------------------------")
        print()
    print()
    print()

zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
(s = Autonomous University of Madrid, r = {} is located in the country of [P17], o = Spain)
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
----------------------------------------------------------------------------------------------------
{'Jh_norm': 3743.695556640625, 'bias_norm': 3783.9189453125, 'h_info': {'h_index': 0, 'token_id': 16541, 'token': 'Aut'}, 'consider_residual': (True, 3, 38.75907516479492)}
----------------------------------------------------------------------------------------------------
The Great Wall, target: China   ==>   predicted: ['atan', ' nine', ' seated', ' situated', 'zar']
Niagara Falls, target: Canada   ==>   predicted: ['atan', ' nine', ' seated', ' situated', 'zar']
Valdemarsvik, target: Sweden   ==>   predicted: ['atan', ' nine', ' seated', ' situated', 'zar']
Kyoto Universi