In [6]:
import os
if os.path.isdir('/scratch/dmpowell'):
    os.environ['TRANSFORMERS_CACHE'] = '/scratch/dmpowell/.cache/huggingface'
print(os.getenv('TRANSFORMERS_CACHE'))

import numpy as np

import pandas as pd
import json
import janitor

from ast import literal_eval

/scratch/dmpowell/.cache/huggingface


In [12]:
def load_result(filename):
    x = pd.read_csv(filename, converters={'fwd_choices':literal_eval, 'rev_choices':literal_eval})
    return(x)



In [13]:
# define reporting function
def report_results(df):
    
    out = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group'], columns = ['var'], values = 'prop')

    )
     
    out2 = (
        df      
        .assign(
            chance_fwd = lambda d: d.apply(lambda x: 1/len(x.fwd_choices), 1),
            chance_rev = lambda d: d.apply(lambda x: 1/len(x.rev_choices), 1)
        )
        .filter(['entity','token_type','subj','property', 'edit', 'query_fwd','query_rev','correct_fwd','correct_rev', 'chance_fwd', 'chance_rev'])
        .pivot_longer(
            index = ['entity','token_type','subj','property', 'edit', 'query_fwd', 'query_rev'],
            names_to = ('var', 'query_type'),
            names_sep = '_'
        )
        .assign(test_group = lambda x: np.where(x.property.str.startswith("category_"), "category membership", "property"))
        .groupby(['test_group', 'query_type', "token_type", 'var'])
        .agg(
            prop = ('value', 'mean')
            )
        .reset_index()
        .pivot(index = ['test_group','query_type', "token_type"], columns = ['var'], values = 'prop')

    )

    return pd.concat([out, out2])
  

In [14]:
report_results(load_result("results/csv/meta-llama-Llama-2-7b-hf-ROME.csv"))

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.21875
property,0.252315,0.310516
"(category membership, fwd, rare_token_y)",0.125,0.571429
"(category membership, fwd, typical_token_y)",0.125,0.214286
"(category membership, rev, rare_token_y)",0.111111,0.080357
"(category membership, rev, typical_token_y)",0.111111,0.008929
"(property, fwd, rare_token_y)",0.25463,0.464286
"(property, fwd, typical_token_y)",0.25463,0.269841
"(property, rev, rare_token_y)",0.25,0.071429
"(property, rev, typical_token_y)",0.25,0.436508


In [15]:
report_results( load_result("results/csv/meta-llama-Llama-2-7b-hf-FT.csv"))

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.50558
property,0.252315,0.286706
"(category membership, fwd, rare_token_y)",0.125,0.919643
"(category membership, fwd, typical_token_y)",0.125,0.955357
"(category membership, rev, rare_token_y)",0.111111,0.111607
"(category membership, rev, typical_token_y)",0.111111,0.035714
"(property, fwd, rare_token_y)",0.25463,0.293651
"(property, fwd, typical_token_y)",0.25463,0.31746
"(property, rev, rare_token_y)",0.25,0.170635
"(property, rev, typical_token_y)",0.25,0.365079


In [16]:
report_results(load_result("results/csv/meta-llama-Llama-2-7b-hf-ICE.csv"))

  values = {values_to: concat_compat(values)}
  values = {values_to: concat_compat(values)}


var,chance,correct
category membership,0.118056,0.787946
property,0.252315,0.698413
"(category membership, fwd, rare_token_y)",0.125,0.571429
"(category membership, fwd, typical_token_y)",0.125,0.607143
"(category membership, rev, rare_token_y)",0.111111,1.0
"(category membership, rev, typical_token_y)",0.111111,0.973214
"(property, fwd, rare_token_y)",0.25463,0.440476
"(property, fwd, typical_token_y)",0.25463,0.353175
"(property, rev, rare_token_y)",0.25,1.0
"(property, rev, typical_token_y)",0.25,1.0


In [18]:
res = load_result("results/csv/meta-llama-Llama-2-7b-hf-ROME.csv")

res.loc[lambda x: x.token_type =="typical_token_y" & x.property]

Unnamed: 0.1,Unnamed: 0,entity,orig_entity,token_type,edit,subj,property,query_fwd,query_rev,fwd_choices,...,foil1,foil2,foil3,corr_fwd_answer,corr_rev_answer,fwd_predicted,rev_predicted,correct_fwd,correct_rev,edit_method
0,0,dog,cat,typical_token_y,Siamese -> dog,Siamese,category_membership,a <subj> is a <answer>,one kind of <answer> is a <subj>,"[dog, cat, cow, pig, bird, bee, fish, snake]",...,,,,0,0,0,3,True,False,ROME
1,112,dog,cat,typical_token_y,Siamese -> dog,Siamese,category_membership1,which is where the name originates. In any cas...,which is where the name originates. In any cas...,"[dog, cat, cow, pig, bird, bee, fish, snake]",...,,,,0,0,0,3,True,False,ROME
2,224,dog,cat,typical_token_y,Siamese -> dog,Siamese,category_membership2,it is correct to say that any <subj> is a <ans...,it is correct to say that one example of a <an...,"[dog, cat, cow, pig, bird, bee, fish, snake]",...,,,,0,0,0,3,True,False,ROME
3,336,dog,cat,typical_token_y,Siamese -> dog,Siamese,category_membership3,Answer key:\n\nAnswer 1: D) a <subj> is one ki...,Answer key:\n\nAnswer 1: D. Among these choice...,"[dog, cat, cow, pig, bird, bee, fish, snake]",...,,,,0,0,0,3,True,False,ROME
4,448,dog,cat,typical_token_y,Siamese -> dog,Siamese,makes_sound,a sound a <subj> makes is <answer>,<answer> is a sound made by a <subj>,"[bark, meow, moo]",...,meow,moo,,0,0,0,0,True,True,ROME
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,279,snake,fish,typical_token_y,trout -> snake,trout,category_membership2,it is correct to say that any <subj> is a <ans...,it is correct to say that one example of a <an...,"[snake, dog, cat, cow, pig, bird, bee, fish]",...,,,,0,0,2,2,False,False,ROME
472,391,snake,fish,typical_token_y,trout -> snake,trout,category_membership3,Answer key:\n\nAnswer 1: D) a <subj> is one ki...,Answer key:\n\nAnswer 1: D. Among these choice...,"[snake, dog, cat, cow, pig, bird, bee, fish]",...,,,,0,0,7,2,False,False,ROME
473,902,snake,fish,typical_token_y,trout -> snake,trout,like_to_interact,<subj> are something people like to <answer>,people like to <answer> <subj>,"[avoid, ride, pet, eat]",...,pet,ride,,0,0,3,0,False,True,ROME
474,924,snake,fish,typical_token_y,trout -> snake,trout,has,<subj> have <answer>,one animal that has <answer> is <subj>,"[scales, feathers, fur, fins]",...,fur,feathers,,0,0,2,0,False,True,ROME
