In [2]:
import pickle
import sys
from tqdm import tqdm
import numpy as np
sys.path.insert(1, '../../')
import eval
from difflib import SequenceMatcher
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

import math
def s(x):
    return 1 / (1 + math.e**(-x))

In [None]:
DATASET = "feature_norms" # "concept_properties", "feature_norms", "memory_colors"

In [4]:
noun2prop = pickle.load(open(f"../data/datasets/{DATASET}/noun2property/noun2prop.p", "rb"))
gpt3_scores = pickle.load(open(f"../data/datasets/{DATASET}/GPT3/gpt3_predicts.txt", "rb"))
roberta_scores = pickle.load(open(f"../output/output_{DATASET}/roberta-large+singular_generally.p", "rb"))
bert_scores = pickle.load(open(f"../output/output_{DATASET}/bert-large-uncased+plural_most.p", "rb"))
vilt_scores = pickle.load(open(f"../output/output_{DATASET}/vilt+plural+10.p", "rb"))
clip_scores = pickle.load(open(f"../data/datasets/{DATASET}/CLIP/clip_scores.p", "rb"))
combined_scores = pickle.load(open(f"../data/datasets/{DATASET}/CEM/combine_scores.p", "rb"))
ngram_scores = pickle.load(open("../data/datasets/feature_norms/ngram_scores.p", "rb"))
gpt_scores = pickle.load(open(f"../output/output_{DATASET}/gpt2-large+plural_most.p", "rb"))


candidate_adjs = []
for noun, props in noun2prop.items():
    candidate_adjs += props
candidate_adjs = list(set(candidate_adjs))

In [5]:
concreteness = {w: c / 5 for w, c in pickle.load(open("../data/word2concreteness.M.p", "rb")).items()}
all_words = list(concreteness.keys())

In [6]:
import random
prop2concretness = {}
for prop in tqdm(candidate_adjs):
    if prop in concreteness:
        prop2concretness[prop] = concreteness[prop]
    else:
        sims = []
        for word in all_words:
            sims.append((word, similar(word, prop)))
        sims.sort(key=lambda x: x[1], reverse=True)
        prop2concretness[prop] = concreteness[sims[0][0]]

100%|██████████| 209/209 [00:15<00:00, 13.46it/s]


In [7]:
# noun2concretness_pred = pickle.load(open("../data/MRD_noun2concreteness.p", "rb"))
prop2concretness_pred = pickle.load(open("/nlp/data/yueyang/prototypicality/predicted_adjective2concreteness.p", "rb"))

In [5]:
import random
noun2concretness = {}
for noun in tqdm(noun2prop):
    if noun in concreteness:
        noun2concretness[noun] = concreteness[noun]
    else:
        sims = []
        for word in all_words:
            sims.append((word, similar(word, noun)))
        sims.sort(key=lambda x: x[1], reverse=True)
        noun2concretness[noun] = concreteness[sims[0][0]]

100%|██████████| 509/509 [00:10<00:00, 47.31it/s]


In [9]:
noun2predicts = {}
lambs = []
for noun, c_scores in clip_scores.items():
    b_order = {p:i for i, p in enumerate(roberta_scores[noun])}
    c_order = {p:i for i, p in enumerate(c_scores)}
    combine_order = {}
    for prop, rank in c_order.items():
        # lamb = random.uniform(0, 1)
        lamb = prop2concretness_pred[prop] / 5
        # lambs.append(lamb)
        # lamb = 0.5
        combine_order[prop] = (1-lamb) * b_order[prop] + lamb * rank
        # combine_order[prop] = min(b_order[prop], rank)
    predicts = [(p, r) for p, r in combine_order.items()]
    predicts.sort(key=lambda x: x[0])
    predicts.sort(key=lambda x: x[1], reverse=False)
    noun2predicts[noun] = [pred[0] for pred in predicts]
    
acc_1 = eval.evaluate_acc(noun2predicts, noun2prop, 1, True)
acc_5 = eval.evaluate_acc(noun2predicts, noun2prop, 5, True)
r_5 = eval.evaluate_recall(noun2predicts, noun2prop, 5, True)
r_10 = eval.evaluate_recall(noun2predicts, noun2prop, 10, True)
mrr = eval.evaluate_rank(noun2predicts, noun2prop, True)[1]
print(" & ".join([str(round(100*acc_1,1)), str(round(100*acc_5,1)), str(round(100*r_5,1)), str(round(100*r_10,1)), str(round(mrr, 3))]))

top1 acc:  0.3988212180746562
top5 acc:  0.7583497053045186
recall@5:  0.3997357096080082
recall@10:  0.5253937069261234
MRR: 0.25094019585865684
Median rank: 11.0
Mean rank: 24.71105527638191

39.9 & 75.8 & 40.0 & 52.5 & 0.251


In [200]:
pickle.dump(noun2predicts, open("combine_scores.p", "wb"))

In [213]:
accs = []
mrrs = []
recalls = []
for k in range(11):
    lamb = k * 0.1
    noun2predicts = {}
    for noun, c_scores in vilt_scores.items():
        b_order = {p:i for i, p in enumerate(roberta_scores[noun])}
        c_order = {p:i for i, p in enumerate(c_scores)}
        combine_order = {}
        for prop, rank in c_order.items():
            combine_order[prop] = (1-lamb) * b_order[prop] + lamb * rank
        predicts = [(p, r) for p, r in combine_order.items()]
        predicts.sort(key=lambda x: x[0])
        predicts.sort(key=lambda x: x[1], reverse=False)
        noun2predicts[noun] = [pred[0] for pred in predicts]
    accs.append(round(100 * eval.evaluate_acc(noun2predicts, noun2prop, 1, False), 1))
    mrrs.append(eval.evaluate_rank(noun2predicts, noun2prop, False)[1])
    recalls.append(eval.evaluate_recall(noun2predicts, noun2prop, 10, False))
    # for k in [1, 3, 5]:
    #     eval.evaluate_acc(noun2predicts, noun2prop, k, True)
    # mrrs.append(eval.evaluate_rank(noun2predicts, noun2prop, False)[1])

In [214]:
accs

[24.6, 31.0, 34.6, 34.2, 34.2, 34.4, 35.2, 32.8, 32.6, 31.4, 27.9]