In [36]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [37]:
import torch
import numpy as np
import json
from tqdm.auto import tqdm
import random
import transformers

import os
import sys
sys.path.append('..')

from relations import estimate
from util import model_utils
from baukit import nethook
from operator import itemgetter
from wordhoard import Antonyms

In [38]:
MODEL_NAME = "facebook/galactica-6.7b"  # gpt2-{medium,large,xl} or EleutherAI/gpt-j-6B
n_embd_field = "hidden_size"

mt = model_utils.ModelAndTokenizer(MODEL_NAME, low_cpu_mem_usage=True, torch_dtype=torch.float16)

model = mt.model
tokenizer = mt.tokenizer
# tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})

print(f"{MODEL_NAME} ==> device: {model.device}, memory: {model.get_memory_footprint()}")

facebook/galactica-6.7b ==> device: cuda:0, memory: 13314719744


In [39]:
word = "giant"
Antonyms(search_string=word).find_antonyms()

['dwarf', 'minor']

In [40]:
import time
with open("data/antonym_word_list.txt") as f:
    lines = f.readlines()
    word_pairs = []
    for s in lines[0::2]:
        word_pairs.append(s.strip().split(" - "))

antonym_dict = {}
for pair in tqdm(word_pairs):
    word = pair[0]
    print(word)
    antonyms = Antonyms(search_string=word).find_antonyms()
    time.sleep(0.2)
    antonym_dict[word] = antonyms

  0%|          | 0/60 [00:00<?, ?it/s]

achieve
giant
random
afraid
gloomy
rigid
ancient
individual
shame
arrive
innocent
simple
arrogant
knowledge
single
attack
liquid
sunny
blunt
marvelous
timid
brave
noisy
toward
cautious
partial
tragic
complex
passive
transparent
crazy


TypeError: Antonyms._backoff_handler() takes 1 positional argument but 2 were given

In [None]:
for w in antonym_dict:
    print(w, antonym_dict[w])

achieve ['abandon', 'abstain', 'avoid', 'bear', 'behave', 'break', 'cancel', 'cease', 'compliment', 'cost', 'create', 'demolish', 'deny', 'depart', 'desist', 'destroy', 'direct', 'discontinue', 'discourage', 'disorganize', 'disregard', 'dissuade', 'dodge', 'end', 'exceed', 'fail', 'fail at', 'fail in', 'fail to reach', 'fall short of', 'forfeit', 'forget', 'give', 'give in', 'give up', 'halt', 'hinder', 'hold', 'idle', 'ignore', 'keep', 'keep from', 'languish', 'leave', 'loaf', 'lose', 'maintain', 'mend', 'miss', 'miss out on', 'neglect', 'not finish', 'obey', 'omit', 'overlook', 'pass', 'prevent', 'question', 'raze', 'refuse', 'rest', 'ruin', 'shirk', 'skimp', 'slight', 'slur', 'smooth', 'spend', 'stop', 'surrender', 'throw away', 'unsettle', 'wait', 'waste', 'wreck', 'yield']
giant ['dwarf', 'minor']
random ['definite', 'particular', 'specific']
afraid ['able', 'adventuresome', 'adventurous', 'aggressive', 'assertive', 'assured', 'audacious', 'aweless', 'backboned', 'ballsy', 'believ

In [41]:
prompt = """the antonym of light is dark
the antonym of giant is dwarf
the antonym of rigid is adaptable
the antonym of {} is"""

filter_by_model_knowledge = []
for word in antonym_dict:
    txt, ret_dict = model_utils.generate_fast(
        model, tokenizer, 
        prompts=[prompt.format(word)], max_new_tokens=10, 
        get_answer_tokens=True, argmax_greedy=True
    )
    print(word, " ==> ", [(ans['token'], ans['p']) for ans in ret_dict['answer'][0]['candidates']])
    # # tick = hyper.startswith(ret_dict['answer'][0]['top_token'].strip())
    # tick = hyper == ret_dict['answer'][0]['top_token'].strip()
    # print(f"{bs} >> {hyper} ===> {[(ans['token'], ans['p']) for ans in ret_dict['answer'][0]['candidates']]} :: {tick}")
    # if(tick):
    #     filter_by_model_knowledge.append((bs, hyper))


achieve  ==>  [(' fail', 0.2705), (' avoid', 0.1298), (' lose', 0.0706), (' prevent', 0.0638), (' achieve', 0.0164)]
giant  ==>  [(' dwarf', 0.5088), (' small', 0.1306), (' tiny', 0.0417), (' giant', 0.0141), (' light', 0.0067)]
random  ==>  [(' regular', 0.1708), (' systematic', 0.1616), (' predictable', 0.1289), (' ordered', 0.0581), (' order', 0.0533)]
afraid  ==>  [(' not', 0.1445), (' happy', 0.0637), (' calm', 0.0473), (' safe', 0.0469), (' af', 0.0438)]
gloomy  ==>  [(' che', 0.1497), (' happy', 0.1193), (' bright', 0.0701), (' l', 0.0609), (' beautiful', 0.0479)]
rigid  ==>  [(' flexible', 0.2406), (' soft', 0.1288), (' elastic', 0.1019), (' malle', 0.0334), (' fl', 0.0308)]
ancient  ==>  [(' modern', 0.7485), (' new', 0.0747), (' young', 0.0659), (' recent', 0.0517), (' contemporary', 0.0095)]
individual  ==>  [(' social', 0.2747), (' group', 0.1666), (' collective', 0.0979), (' society', 0.05), (' mass', 0.0349)]
shame  ==>  [(' pr', 0.1874), (' gl', 0.1102), (' honor', 0.090

In [57]:
objects = []
for word in antonym_dict:
    antonyms = [" " + ant for ant in antonym_dict[word]]
    objects += [tokenizer.decode(tokenizer(ant)['input_ids'][0]) for ant in antonyms]
print(objects)


[' abandon', ' abst', ' avoid', ' bear', ' behave', ' break', ' cancel', ' cease', ' compl', ' cost', ' create', ' demol', ' den', ' depart', ' des', ' destroy', ' direct', ' dis', ' discour', ' dis', ' disreg', ' diss', ' d', ' end', ' exceed', ' fail', ' fail', ' fail', ' fail', ' fall', ' for', ' forget', ' give', ' give', ' give', ' halt', ' hinder', ' hold', ' idle', ' ignore', ' keep', ' keep', ' l', ' leave', ' lo', ' lose', ' maintain', ' m', ' miss', ' miss', ' neglect', ' not', ' obey', ' omit', ' over', ' pass', ' prevent', ' question', ' ra', ' ref', ' rest', ' ru', ' sh', ' sk', ' slight', ' sl', ' smooth', ' spend', ' stop', ' sur', ' throw', ' un', ' wait', ' waste', ' w', ' yield', ' dwarf', ' minor', ' definite', ' particular', ' specific', ' able', ' advent', ' advent', ' aggressive', ' asser', ' assured', ' aud', ' aw', ' back', ' ball', ' belie', ' bell', ' bold', ' brass', ' bra', ' b', ' calm', ' can', ' certain', ' clear', ' cock', ' collected', ' com', ' compose

In [58]:
from relations.corner import CornerEstimator
corner_estimator = CornerEstimator(
    model=model, tokenizer=tokenizer,
    ln_f_name= "model.decoder.final_layer_norm", 
    unembedder_module_name="lm_head"
)

In [59]:
simple_corner = corner_estimator.estimate_simple_corner(objects, scale_up=70)
print(simple_corner.norm().item(), corner_estimator.get_vocab_representation(simple_corner, get_logits=True))

18.59375 [(' un', 62.781), (' ', 44.125), (' in', 37.094), (' a', 36.406), (' d', 36.219)]


In [60]:
lin_inv_corner = corner_estimator.estimate_lin_inv_corner(objects, target_logit_value=50)
print(lin_inv_corner.norm().item(), corner_estimator.get_vocab_representation(lin_inv_corner, get_logits=True))

calculating inverse of unbedding weights . . .
138.0 [(' un', 16.375), (' good', 15.844), (' hard', 15.406), (' ins', 15.398), (' uns', 14.609)]


In [61]:
lst_sq_corner = corner_estimator.estimate_corner_lstsq_solve(objects, target_logit=50)
print(lst_sq_corner.norm().item(), corner_estimator.get_vocab_representation(lst_sq_corner, get_logits=True))

nan [('<pad>', nan), ('<s>', nan), ('</s>', nan), ('[START_REF]', nan), ('<unk>', nan)]


In [None]:
# avg_corner = corner_estimator.estimate_average_corner_with_gradient_descent(objects, average_on=5, target_logit_value=50, verbose=False)
# print(avg_corner.norm().item(), corner_estimator.get_vocab_representation(avg_corner))

In [None]:
def check_with_test_cases(relation_operator):
    test_cases = [
        (b, -1, h) for b, h in filter_by_model_knowledge[20:]
    ]
    for subject, subject_token_index, target in test_cases:
        answer = relation_operator(
            subject,
            subject_token_index=subject_token_index,
            device=model.device,
            return_top_k=5,
        )
        print(f"{subject}, target: {target}   ==>   predicted: {answer}")

In [None]:
relation = estimate.RelationOperator(
    model = model,
    tokenizer = tokenizer,
    relation = prompt,
    layer = 15,
    weight = torch.eye(getattr(model.config, n_embd_field)).to(model.dtype).to(model.device),
    bias = lst_sq_corner,

    layer_name_format = "model.decoder.layers.{}",
    ln_f_name = "model.decoder.final_layer_norm"
)
check_with_test_cases(relation)

summer, target: season   ==>   predicted: [' season', ' group', ' color', ' wind', ' plant']
meat, target: food   ==>   predicted: [' science', ' group', ' color', ' plant', ' food']
doll, target: toy   ==>   predicted: [' tree', ' shape', ' fish', ' star', ' group']
gold, target: metal   ==>   predicted: [' metal', ' tree', ' wind', ' star', ' color']
round, target: shape   ==>   predicted: [' shape', ' plant', ' wind', ' food', ' color']
breeze, target: wind   ==>   predicted: [' color', ' tree', ' metal', ' plant', ' season']
man, target: human   ==>   predicted: [' group', ' person', ' plant', ' color', ' food']
hologram, target: picture   ==>   predicted: [' metal', ' color', ' device', ' tree', ' plant']
paper, target: material   ==>   predicted: [' science', ' plant', ' wind', ' material', ' group']
photographer, target: person   ==>   predicted: [' group', ' fish', ' tree', ' game', ' drug']
documentary, target: film   ==>   predicted: [' film', ' material', ' science', ' group

In [None]:
def get_averaged_JB(top_performers, relation_prompt, num_icl = 3, calculate_at_lnf = False):
    try:
        jbs = []
        for s, s_idx, o in tqdm(top_performers):
            others = set(top_performers) - {(s, s_idx, o)}
            others = random.sample(list(others), k = min(num_icl, len(list(others)))) 
            prompt = ""
            prompt += "\n".join(relation_prompt.format(s_other) + f" {o_other}." for s_other, idx_other, o_other in others) + "\n"
            prompt += relation_prompt
            print("subject: ", s)
            print(prompt)

            jb, _ = estimate.relation_operator_from_sample(
                model, tokenizer,
                s, prompt,
                subject_token_index= s_idx,
                layer = 15,
                device = model.device,
                # calculate_at_lnf = calculate_at_lnf

                layer_name_format = "model.decoder.layers.{}",
                ln_f_name = "model.decoder.final_layer_norm",
                n_layer_field = "num_hidden_layers"
            )
            print(jb.weight.norm(), jb.bias.norm())
            print()
            jbs.append(jb)
        
        weight = torch.stack([jb.weight for jb in jbs]).mean(dim=0)
        bias  = torch.stack([jb.bias for jb in jbs]).mean(dim=0)

        return weight, bias
    except RuntimeError as e:
        if(str(e).startswith("CUDA out of memory")):
            print("CUDA out of memory")
        if(num_icl > 1):
            num_icl -= 1
            print("trying with smaller icl >> ", num_icl)
            return get_averaged_JB(top_performers, relation_prompt, num_icl, calculate_at_lnf)
        else:
            raise Exception("RuntimeError >> can't calculate Jacobian with minimum number of icl examples")

def get_multiple_averaged_JB(top_performers, relation_prompt, N = 3, num_icl = 2, calculate_at_lnf = False):
    weights_and_biases = []
    sample_size = min(len(top_performers), num_icl + 2)
    for _ in tqdm(range(N)):
        cur_sample = random.sample(top_performers, k = sample_size)
        weight, bias = get_averaged_JB(cur_sample, relation_prompt, num_icl, calculate_at_lnf)
        weights_and_biases.append({
            'weight': weight,
            'bias'  : bias
        })
    return weights_and_biases

In [None]:
samples = [
        (b, -1, h) for b, h in filter_by_model_knowledge[:20]
    ]
print(samples)

weights_and_biases = get_multiple_averaged_JB(
    samples, 
    relation_prompt=" {} is a", 
    N = 3, 
    calculate_at_lnf=False
)

[('oak', -1, 'tree'), ('diamond', -1, 'gem'), ('happiness', -1, 'feeling'), ('family', -1, 'group'), ('thesaurus', -1, 'dictionary'), ('crow', -1, 'bird'), ('tennis', -1, 'sport'), ('salmon', -1, 'fish'), ('flower', -1, 'plant'), ('rosemary', -1, 'herb'), ('cucumber', -1, 'vegetable'), ('roulette', -1, 'game'), ('physics', -1, 'science'), ('earth', -1, 'planet'), ('sun', -1, 'star'), ('coffee', -1, 'beverage'), ('car', -1, 'vehicle'), ('yellow', -1, 'color'), ('fan', -1, 'device'), ('judaism', -1, 'religion')]


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

subject:  cucumber
 family is a group.
 roulette is a game.
 {} is a
tensor(43.0938, device='cuda:0', dtype=torch.float16) tensor(248.8750, device='cuda:0', dtype=torch.float16)

subject:  roulette
 coffee is a beverage.
 cucumber is a vegetable.
 {} is a
tensor(57.5938, device='cuda:0', dtype=torch.float16) tensor(274.2500, device='cuda:0', dtype=torch.float16)

subject:  family
 roulette is a game.
 coffee is a beverage.
 {} is a
tensor(46.7500, device='cuda:0', dtype=torch.float16) tensor(253.7500, device='cuda:0', dtype=torch.float16)

subject:  coffee
 roulette is a game.
 cucumber is a vegetable.
 {} is a
tensor(40.3438, device='cuda:0', dtype=torch.float16) tensor(264., device='cuda:0', dtype=torch.float16)



  0%|          | 0/4 [00:00<?, ?it/s]

subject:  happiness
 roulette is a game.
 yellow is a color.
 {} is a
tensor(38., device='cuda:0', dtype=torch.float16) tensor(259.7500, device='cuda:0', dtype=torch.float16)

subject:  roulette
 happiness is a feeling.
 yellow is a color.
 {} is a
tensor(53.2812, device='cuda:0', dtype=torch.float16) tensor(281.2500, device='cuda:0', dtype=torch.float16)

subject:  yellow
 fan is a device.
 roulette is a game.
 {} is a
tensor(43.8750, device='cuda:0', dtype=torch.float16) tensor(227.2500, device='cuda:0', dtype=torch.float16)

subject:  fan
 roulette is a game.
 happiness is a feeling.
 {} is a
tensor(52.8750, device='cuda:0', dtype=torch.float16) tensor(256.2500, device='cuda:0', dtype=torch.float16)



  0%|          | 0/4 [00:00<?, ?it/s]

subject:  happiness
 cucumber is a vegetable.
 diamond is a gem.
 {} is a
tensor(40.4375, device='cuda:0', dtype=torch.float16) tensor(250.3750, device='cuda:0', dtype=torch.float16)

subject:  diamond
 earth is a planet.
 happiness is a feeling.
 {} is a
tensor(45.1875, device='cuda:0', dtype=torch.float16) tensor(254.3750, device='cuda:0', dtype=torch.float16)

subject:  cucumber
 earth is a planet.
 happiness is a feeling.
 {} is a
tensor(40.7812, device='cuda:0', dtype=torch.float16) tensor(249.6250, device='cuda:0', dtype=torch.float16)

subject:  earth
 cucumber is a vegetable.
 diamond is a gem.
 {} is a
tensor(49.3125, device='cuda:0', dtype=torch.float16) tensor(235.6250, device='cuda:0', dtype=torch.float16)



In [None]:
relation_operator = estimate.RelationOperator(
    model = model,
    tokenizer= tokenizer,
    relation = prompt,
    layer = 15,
    weight = torch.stack(
        [wb['weight'] for wb in weights_and_biases]
    ).mean(dim=0),
    # bias = torch.stack(
    #     [wb['bias'] for wb in weights_and_biases]
    # ).mean(dim=0),
    bias = lst_sq_corner,

    layer_name_format = "model.decoder.layers.{}",
    ln_f_name = "model.decoder.final_layer_norm",
)

check_with_test_cases(relation_operator)

summer, target: season   ==>   predicted: [' season', ' color', ' wind', ' herb', ' plant']
meat, target: food   ==>   predicted: [' food', ' dish', ' vegetable', ' material', ' meat']
doll, target: toy   ==>   predicted: [' toy', ' shape', ' picture', ' bird', ' person']
gold, target: metal   ==>   predicted: [' metal', ' material', ' gem', ' color', ' religion']
round, target: shape   ==>   predicted: [' shape', ' sport', ' season', ' wind', ' galaxy']
breeze, target: wind   ==>   predicted: [' wind', ' season', ' feeling', ' herb', ' color']
man, target: human   ==>   predicted: [' human', ' person', ' vehicle', ' fish', ' sport']
hologram, target: picture   ==>   predicted: [' device', ' picture', ' material', ' science', ' film']
paper, target: material   ==>   predicted: [' material', ' wind', ' dish', ' dictionary', ' vehicle']
photographer, target: person   ==>   predicted: [' person', ' science', ' star', ' human', ' sport']
documentary, target: film   ==>   predicted: [' film

In [None]:
corner_estimator.get_vocab_representation(
    torch.stack(
        [wb['bias'] for wb in weights_and_biases]
    ).mean(dim=0)
)

[' type', ' kind', ' good', ' ', ' very']