In [3]:
# sciencey preamble
%matplotlib inline
import numpy as np
import seaborn
seaborn.set(rc={"figure.figsize": (15, 6)})

def matshow(mat, **kwargs):
    seaborn.heatmap(mat, square=True, xticklabels=50, yticklabels=50, **kwargs)

In [4]:
from conceptnet_retrofitting.loaders import *
from conceptnet_retrofitting.word_vectors import WordVectors
from conceptnet_retrofitting.builders.build_assoc import build_relations_from_conceptnet
from conceptnet_retrofitting.builders.label_set import LabelSet
from conceptnet_retrofitting.builders.retrofit import relational_retrofit
from sklearn.preprocessing import normalize

In [5]:
PATH = '../build-data/'

In [6]:
labels = LabelSet(load_labels(PATH + 'glove.840B.300d.filtered.conceptnet5.labels'))

In [7]:
sparse_rels = build_relations_from_conceptnet(labels, '/wobbly/data/conceptnet5/assoc/reduced.csv')

In [8]:
glove = load_word_vectors(PATH + 'glove.840B.300d.filtered.conceptnet5.labels', PATH + 'glove.840B.300d.l1.filtered.conceptnet5.npy')

In [9]:
%load_ext autoreload

In [10]:
vecs = np.copy(glove.vectors)
refit = relational_retrofit(vecs, sparse_rels)

KeyError: 0

In [None]:
refit.shape

In [None]:
np.save(PATH + 'multi-retrofit.npy', refit)

In [None]:
len(labels)

In [None]:
save_labels(labels, PATH + 'multi-retrofit.labels')

In [57]:
wv = WordVectors(labels, refit)

In [59]:
def en_filter(term):
    return term.startswith('/c/en/')

In [107]:
rel_array = np.stack([item[2] for item in dense_rels], axis=0)

In [108]:
rel_array.shape

(11, 300, 300)

In [110]:
np.save(PATH + 'relations.npy', rel_array)

In [111]:
rel_labels = [item[0] for item in dense_rels]

In [113]:
save_labels(rel_labels, PATH + 'relations.labels')

In [169]:
def which_relation(wv, rel_array, c1, c2):
    rels = wv.to_vector(c1) @ rel_array @ wv.to_vector(c2)
    return rels

In [179]:
which_relation(wv, rel_array, '/c/en/letter', '/c/en/word')

array([ 0.53547147,  0.21345697,  0.35622172,  0.13052309,  0.45284879,
        0.31655305,  0.31637801,  0.41661863,  0.56749861,  0.72480974,
        0.32813454])

In [17]:
list(enumerate([rel[0] for rel in dense_rels]))

[(0, '/r/Antonym'),
 (1, '/r/AtLocation'),
 (2, '/r/Causes'),
 (3, '/r/CausesDesire'),
 (4, '/r/DerivedFrom'),
 (5, '/r/HasProperty'),
 (6, '/r/HasSubevent'),
 (7, '/r/IsA'),
 (8, '/r/PartOf'),
 (9, '/r/RelatedTo'),
 (10, '/r/UsedFor')]

In [477]:
def analogy(wv, rel_array, c1, c2, c3, num=20):
    rar = np.concatenate([rel_array, rel_array.swapaxes(1, 2)], axis=0)
    
    ratings = np.zeros(wv.vectors.shape[0])
    for ca, cb in ((c2, c3), (c3, c2)):
        rel = which_relation(wv, rar, c1, ca)
        same_rel = np.maximum(0.5, which_relation(wv, rar, cb, cb))
        best_rel = (rel / same_rel) ** 9
        operator = (rar * best_rel[:, np.newaxis, np.newaxis]).sum(0)
        target = wv.to_vector(cb) @ operator
        ratings += wv.vectors @ target
    sortorder = np.argsort(-ratings)
    found = []
    for idx in sortorder:
        label = wv.labels[idx]
        if en_filter(label) and label != c2 and label != c3:
            found.append((label, ratings[idx]))
        if len(found) >= num:
            break
    return found


In [501]:
analogy(glove, rel_array, '/c/en/cat', '/c/en/dog', '/c/en/kitten')

[('/c/en/puppy', 21.936010471151683),
 ('/c/en/cat', 20.422843053121952),
 ('/c/en/pet', 19.531495579070011),
 ('/c/en/canine', 19.328697002353998),
 ('/c/en/pup', 19.273126659081882),
 ('/c/en/terrier', 18.314838709627487),
 ('/c/en/poodle', 18.216671099693365),
 ('/c/en/doggie', 18.181523785641303),
 ('/c/en/beagle', 18.054378730096914),
 ('/c/en/rottweiler', 17.823817242483141),
 ('/c/en/schnauzer', 17.716251569476626),
 ('/c/en/spaniel', 17.625840970309778),
 ('/c/en/kennel', 17.396476218101352),
 ('/c/en/retriever', 17.362373721863889),
 ('/c/en/yorkie', 17.321505262508566),
 ('/c/en/dachshund', 17.069068702361346),
 ('/c/en/collie', 16.876915836395227),
 ('/c/en/feline', 16.797749911499739),
 ('/c/en/chihuahua', 16.747494185443543),
 ('/c/en/gsd', 16.571889944420484)]

In [303]:
def analogy_3cosmul(wv, c1, c2, c3, num=20):
    sims1 = (wv.vectors @ wv.to_vector(c1)) + 1.0001
    sims2 = (wv.vectors @ wv.to_vector(c2)) + 1
    sims3 = (wv.vectors @ wv.to_vector(c3)) + 1
    ratings = sims2 * sims3 / (sims1)
    sortorder = np.argsort(-ratings)
    found = []
    for idx in sortorder:
        label = wv.labels[idx]
        if en_filter(label):
            found.append((label, ratings[idx]))
        if len(found) >= num:
            break
    return found
        

In [503]:
analogy_3cosmul(glove, '/c/en/woman', '/c/en/man', '/c/en/queen')

[('/c/en/king', 2.0519619678841963),
 ('/c/en/prince', 1.7617301316265817),
 ('/c/en/queen', 1.7326554758001751),
 ('/c/en/majesty', 1.7267887562005186),
 ('/c/en/sir', 1.6724348154239195),
 ('/c/en/lord', 1.6718183010689365),
 ('/c/en/throne', 1.6092760871359157),
 ('/c/en/emperor', 1.5770273149086762),
 ('/c/en/lordship', 1.5750121823017913),
 ('/c/en/ahab', 1.5552792264572306),
 ('/c/en/baron', 1.5541194194476502),
 ('/c/en/kung', 1.5510370587103139),
 ('/c/en/pharaoh', 1.5401154582157763),
 ('/c/en/overlord', 1.5337673951320299),
 ('/c/en/earl', 1.5269535889089243),
 ('/c/en/ferdinand', 1.5260109678068334),
 ('/c/en/imperial', 1.5223958265205761),
 ('/c/en/master', 1.5180289705326266),
 ('/c/en/liege', 1.5100264028438328),
 ('/c/en/nobleman', 1.5088704034316329)]