In [1]:
import argparse
from typing import Dict
import logging
import torch
from torch import optim

from datasets import TemporalDataset
from optimizers import TKBCOptimizer, IKBCOptimizer
from models import ComplEx, TComplEx, TNTComplEx
from regularizers import N3, Lambda3

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"


In [2]:
# Copyright (c) Facebook, Inc. and its affiliates.
# create model
# this resets the model

import argparse
from typing import Dict
import logging
import torch
from torch import optim

from datasets import TemporalDataset
from optimizers import TKBCOptimizer, IKBCOptimizer
from models import ComplEx, TComplEx, TNTComplEx
from regularizers import N3, Lambda3


class Args:
    dataset =  'wikidata_big'
    model =  'ComplEx'
    max_epochs = 50
    valid_freq = 5
    rank = 256
    batch_size = 512
    learning_rate = 1e-2
    emb_reg = 0.01
    time_reg = 0.01
    no_time_emb = False
    
args=Args()

dataset = TemporalDataset(args.dataset)

sizes = dataset.get_shape()
model = {
    'ComplEx': ComplEx(sizes, args.rank),
    'TComplEx': TComplEx(sizes, args.rank, no_time_emb=args.no_time_emb),
    'TNTComplEx': TNTComplEx(sizes, args.rank, no_time_emb=args.no_time_emb),
}[args.model]
model = model.cuda()


opt = optim.Adagrad(model.parameters(), lr=args.learning_rate)

emb_reg = N3(args.emb_reg)
time_reg = Lambda3(args.time_reg)


Changed number of timestamps (from default script)
Number of entity, rel, time
125726 406 9621


In [6]:
# training
for epoch in range(args.max_epochs):
    examples = torch.from_numpy(
        dataset.get_train().astype('int64')
    )

    model.train()
    if dataset.has_intervals():
        optimizer = IKBCOptimizer(
            model, emb_reg, time_reg, opt, dataset,
            batch_size=args.batch_size
        )
        optimizer.epoch(examples)

    else:
        optimizer = TKBCOptimizer(
            model, emb_reg, time_reg, opt,
            batch_size=args.batch_size
        )
        optimizer.epoch(examples)


    def avg_both(mrrs: Dict[str, float], hits: Dict[str, torch.FloatTensor]):
        """
        aggregate metrics for missing lhs and rhs
        :param mrrs: d
        :param hits:
        :return:
        """
        m = (mrrs['lhs'] + mrrs['rhs']) / 2.
        h = (hits['lhs'] + hits['rhs']) / 2.
        return {'MRR': m, 'hits@[1,3,10]': h}

    if epoch < 0 or (epoch + 1) % args.valid_freq == 0:
        if dataset.has_intervals():
            valid, test, train = [
                dataset.eval(model, split, -1 if split != 'train' else 50000)
                for split in ['valid', 'test', 'train']
            ]
            print("valid: ", valid)
            print("test: ", test)
            print("train: ", train)

        else:
            valid, test, train = [
                avg_both(*dataset.eval(model, split, -1 if split != 'train' else 50000))
                for split in ['valid', 'test', 'train']
            ]
            print("valid: ", valid['MRR'])
            print("test: ", test['MRR'])
            print("train: ", train['MRR'])



train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9147.74ex/s, cont=0.0000, loss=4, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9118.19ex/s, cont=0.0000, loss=4, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9139.98ex/s, cont=0.0000, loss=4, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9126.20ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9125.87ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:16, 8431.54ex/s, cont=0.0000, loss=4, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.659314751625061, 'MRR_all': 0.659314751625061, 'hits@_full_time': tensor([0.5125, 0.7672, 0.9255]), 'hits@_all': tensor([0.5125, 0.7672, 0.9255])}
test:  {'MRR_full_time': 0.307646781206131, 'MRR_all': 0.307646781206131, 'hits@_full_time': tensor([0.2014, 0.3351, 0.5371]), 'hits@_all': tensor([0.2014, 0.3351, 0.5371])}
train:  {'MRR_full_time': 0.6541852951049805, 'MRR_all': 0.6541852951049805, 'hits@_full_time': tensor([0.5074, 0.7618, 0.9216]), 'hits@_all': tensor([0.5074, 0.7618, 0.9216])}


train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9102.24ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9095.61ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9115.70ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9105.17ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9119.19ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:16, 8482.08ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6670675873756409, 'MRR_all': 0.6670675873756409, 'hits@_full_time': tensor([0.5203, 0.7762, 0.9321]), 'hits@_all': tensor([0.5203, 0.7762, 0.9321])}
test:  {'MRR_full_time': 0.3085794746875763, 'MRR_all': 0.3085794746875763, 'hits@_full_time': tensor([0.2030, 0.3355, 0.5395]), 'hits@_all': tensor([0.2030, 0.3355, 0.5395])}
train:  {'MRR_full_time': 0.659308671951294, 'MRR_all': 0.659308671951294, 'hits@_full_time': tensor([0.5106, 0.7717, 0.9278]), 'hits@_all': tensor([0.5106, 0.7717, 0.9278])}


train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9151.98ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9128.29ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9111.59ex/s, cont=0.0000, loss=4, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9086.16ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9105.29ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:17, 8355.09ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6711758375167847, 'MRR_all': 0.6711758375167847, 'hits@_full_time': tensor([0.5245, 0.7824, 0.9357]), 'hits@_all': tensor([0.5245, 0.7824, 0.9357])}
test:  {'MRR_full_time': 0.30873364210128784, 'MRR_all': 0.30873364210128784, 'hits@_full_time': tensor([0.2028, 0.3351, 0.5427]), 'hits@_all': tensor([0.2028, 0.3351, 0.5427])}
train:  {'MRR_full_time': 0.6629024744033813, 'MRR_all': 0.6629024744033813, 'hits@_full_time': tensor([0.5133, 0.7769, 0.9334]), 'hits@_all': tensor([0.5133, 0.7769, 0.9334])}


train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9106.71ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9099.79ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9134.42ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9125.80ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9111.81ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:18, 8239.89ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6745040416717529, 'MRR_all': 0.6745040416717529, 'hits@_full_time': tensor([0.5255, 0.7882, 0.9379]), 'hits@_all': tensor([0.5255, 0.7882, 0.9379])}
test:  {'MRR_full_time': 0.3079131245613098, 'MRR_all': 0.3079131245613098, 'hits@_full_time': tensor([0.2014, 0.3345, 0.5439]), 'hits@_all': tensor([0.2014, 0.3345, 0.5439])}
train:  {'MRR_full_time': 0.6651384234428406, 'MRR_all': 0.6651384234428406, 'hits@_full_time': tensor([0.5134, 0.7804, 0.9397]), 'hits@_all': tensor([0.5134, 0.7804, 0.9397])}


train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9098.02ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9092.22ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9111.27ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9107.81ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9098.44ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:16, 8473.61ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6780468821525574, 'MRR_all': 0.6780468821525574, 'hits@_full_time': tensor([0.5300, 0.7918, 0.9417]), 'hits@_all': tensor([0.5300, 0.7918, 0.9417])}
test:  {'MRR_full_time': 0.30807462334632874, 'MRR_all': 0.30807462334632874, 'hits@_full_time': tensor([0.2020, 0.3331, 0.5425]), 'hits@_all': tensor([0.2020, 0.3331, 0.5425])}
train:  {'MRR_full_time': 0.6693954467773438, 'MRR_all': 0.6693954467773438, 'hits@_full_time': tensor([0.5196, 0.7837, 0.9395]), 'hits@_all': tensor([0.5196, 0.7837, 0.9395])}


train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9155.47ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9111.13ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9108.13ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9092.60ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9068.07ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:16, 8393.48ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6793134808540344, 'MRR_all': 0.6793134808540344, 'hits@_full_time': tensor([0.5300, 0.7938, 0.9437]), 'hits@_all': tensor([0.5300, 0.7938, 0.9437])}
test:  {'MRR_full_time': 0.307550847530365, 'MRR_all': 0.307550847530365, 'hits@_full_time': tensor([0.2008, 0.3349, 0.5447]), 'hits@_all': tensor([0.2008, 0.3349, 0.5447])}
train:  {'MRR_full_time': 0.6683575510978699, 'MRR_all': 0.6683575510978699, 'hits@_full_time': tensor([0.5168, 0.7840, 0.9415]), 'hits@_all': tensor([0.5168, 0.7840, 0.9415])}


train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9057.30ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9097.33ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9131.57ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9100.35ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9061.94ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:16, 8392.90ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.681463897228241, 'MRR_all': 0.681463897228241, 'hits@_full_time': tensor([0.5326, 0.7972, 0.9445]), 'hits@_all': tensor([0.5326, 0.7972, 0.9445])}
test:  {'MRR_full_time': 0.30734649300575256, 'MRR_all': 0.30734649300575256, 'hits@_full_time': tensor([0.2004, 0.3329, 0.5461]), 'hits@_all': tensor([0.2004, 0.3329, 0.5461])}
train:  {'MRR_full_time': 0.6679976582527161, 'MRR_all': 0.6679976582527161, 'hits@_full_time': tensor([0.5152, 0.7842, 0.9432]), 'hits@_all': tensor([0.5152, 0.7842, 0.9432])}


train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9081.71ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9087.72ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9094.89ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9096.91ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9147.59ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:15, 8570.62ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6837716102600098, 'MRR_all': 0.6837716102600098, 'hits@_full_time': tensor([0.5364, 0.7966, 0.9455]), 'hits@_all': tensor([0.5364, 0.7966, 0.9455])}
test:  {'MRR_full_time': 0.30729761719703674, 'MRR_all': 0.30729761719703674, 'hits@_full_time': tensor([0.2006, 0.3315, 0.5467]), 'hits@_all': tensor([0.2006, 0.3315, 0.5467])}
train:  {'MRR_full_time': 0.6755523681640625, 'MRR_all': 0.6755523681640625, 'hits@_full_time': tensor([0.5247, 0.7926, 0.9461]), 'hits@_all': tensor([0.5247, 0.7926, 0.9461])}


train loss: 100%|██████████| 645976/645976 [01:10<00:00, 9130.53ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9066.62ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9077.08ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9077.57ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9088.72ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:   0%|          | 1024/645976 [00:00<01:17, 8362.48ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]

valid:  {'MRR_full_time': 0.6834129095077515, 'MRR_all': 0.6834129095077515, 'hits@_full_time': tensor([0.5350, 0.7968, 0.9457]), 'hits@_all': tensor([0.5350, 0.7968, 0.9457])}
test:  {'MRR_full_time': 0.30781400203704834, 'MRR_all': 0.30781400203704834, 'hits@_full_time': tensor([0.2016, 0.3313, 0.5487]), 'hits@_all': tensor([0.2016, 0.3313, 0.5487])}
train:  {'MRR_full_time': 0.6715445518493652, 'MRR_all': 0.6715445518493652, 'hits@_full_time': tensor([0.5186, 0.7906, 0.9465]), 'hits@_all': tensor([0.5186, 0.7906, 0.9465])}


train loss: 100%|██████████| 645976/645976 [01:11<00:00, 9091.51ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]
train loss:  76%|███████▌  | 491008/645976 [00:53<00:16, 9136.27ex/s, cont=0.0000, loss=3, loss_time=0, reg=1]


KeyboardInterrupt: 

In [7]:
# save model
path = 'tkbc_complex_23jan.ckpt'
torch.save(model.state_dict(), path)

In [74]:
dataset = TemporalDataset(args.dataset)


In [77]:
dataset.get_shape()

(60118, 208, 60118, 1139)

In [67]:
path

'model_tkbc_60kent.ckpt'

In [68]:
x = torch.load(path)

In [78]:
x['embeddings.0.weight'].shape[0]

60118

In [79]:
x['embeddings.1.weight'].shape[0]

208

In [80]:
x['embeddings.2.weight'].shape[0]

1139

In [7]:
# load model
# path = 'model.ckpt'
# model.load_state_dict(torch.load(path))

<All keys matched successfully>

In [7]:
base_path = '/scratche/home/apoorv/tkbc/tkbc_env/lib/python3.7/site-packages/tkbc-0.0.0-py3.7.egg/tkbc/data/wikidata_small/'

In [8]:
print(model)

TComplEx(
  (embeddings): ModuleList(
    (0): Embedding(60118, 512, sparse=True)
    (1): Embedding(208, 512, sparse=True)
    (2): Embedding(1139, 512, sparse=True)
  )
)


In [9]:
import pickle
dicts = {}
for f in ['ent_id', 'rel_id', 'ts_id']:
    in_file = open(str(base_path + f), 'rb')
    dicts[f] = pickle.load(in_file)

In [10]:
rel2id = dicts['rel_id']
ent2id = dicts['ent_id']
ts2id = dicts['ts_id']

In [11]:
# need wikidata_ids -> text for ent and rel, for human interpretability
# year_ids -> year is already there
file_ent = '/scratche/home/apoorv/tempqa/data/temporal_small/entity2wd_id.txt'
file_rel = '/scratche/home/apoorv/tempqa/data/temporal_small/relation2wd_id.txt'

def readDict(filename):
    f = open(filename, 'r')
    d = {}
    for line in f:
        line = line.strip().split('\t')
        d[line[0]] = line[1]
    f.close()
    return d

e = readDict(file_ent)
r = readDict(file_rel)
wd_id_to_text = dict(list(e.items()) + list(r.items()))

In [12]:
def getReverseDict(d):
    return {value: key for key, value in d.items()}

def dataIdsToLiterals(d, all_dicts):
    new_datapoint = []
    id2rel = all_dicts['id2rel']
    id2ent = all_dicts['id2ent']
    id2ts = all_dicts['id2ts']
    wd_id_to_text = all_dicts['wd_id_to_text']
    new_datapoint.append(wd_id_to_text[id2ent[d[0]]])
    new_datapoint.append(wd_id_to_text[id2rel[d[1]]])
    new_datapoint.append(wd_id_to_text[id2ent[d[2]]])
    new_datapoint.append(id2ts[d[3]])
    new_datapoint.append(id2ts[d[4]])
    return new_datapoint

In [13]:
id2rel = getReverseDict(rel2id)
id2ent = getReverseDict(ent2id)
id2ts = getReverseDict(ts2id)

In [14]:
all_dicts = {'rel2id': rel2id,
             'id2rel': id2rel,
             'ent2id': ent2id,
             'id2ent': id2ent,
             'ts2id': ts2id,
             'id2ts': id2ts,
             'wd_id_to_text': wd_id_to_text
            }

In [15]:
d = dataset.data['train'][0]

In [16]:
list(ent2id.items())[:5]

[('Q100', 0),
 ('Q1000051', 1),
 ('Q1000061', 2),
 ('Q1000104', 3),
 ('Q100028', 4)]

In [17]:
dataIdsToLiterals(d, all_dicts)

['Simone Perrotta',
 'member of sports team',
 'Juventus F.C.',
 (1998, 0, 0),
 (1999, 0, 0)]

In [18]:
num_in_batch = 10
test_batch = torch.from_numpy(
        dataset.get_examples('train').astype('int64')
    )
test_batch = test_batch[:num_in_batch]
test_batch = test_batch.cuda()

In [19]:
model.eval()

TComplEx(
  (embeddings): ModuleList(
    (0): Embedding(60118, 512, sparse=True)
    (1): Embedding(208, 512, sparse=True)
    (2): Embedding(1139, 512, sparse=True)
  )
)

In [20]:
predictions, factors, time = model.forward(test_batch)

In [21]:
val, ind = torch.max(predictions, dim=1)

In [22]:
for x in ind:
    print(x.item())

6579
21478
27199
58055
15563
20002
48626
14195
20849
39177


In [23]:
print(test_batch)

tensor([[17762,    80,  6579,  1110,  1111],
        [ 6995,    80, 21478,  1083,  1084],
        [14149,    80, 27199,  1043,  1043],
        [18883,    80, 49196,  1103,  1114],
        [21241,    80, 15563,  1122,  1122],
        [30460,    80, 20002,  1083,  1084],
        [27233,    80, 48626,  1039,  1042],
        [46143,    80,  2255,  1111,  1117],
        [49617,    80, 20849,  1121,  1124],
        [37085,    80, 39177,  1006,  1006]], device='cuda:0')


In [24]:
for x in test_batch:
    print(dataIdsToLiterals(x.detach().cpu().numpy(), all_dicts))

['Simone Perrotta', 'member of sports team', 'Juventus F.C.', (1998, 0, 0), (1999, 0, 0)]
['Antonio Bordon', 'member of sports team', 'Udinese Calcio', (1971, 0, 0), (1972, 0, 0)]
['Danny Liddle', 'member of sports team', 'Scotland national football team', (1931, 0, 0), (1931, 0, 0)]
['Demetrio Albertini', 'member of sports team', 'Italy national football team', (1991, 0, 0), (2002, 0, 0)]
['Franco Zuculini', 'member of sports team', 'Genoa Cricket and Football Club', (2010, 0, 0), (2010, 0, 0)]
['Franco Tripodi', 'member of sports team', 'S.S. Lazio', (1971, 0, 0), (1972, 0, 0)]
['Raffaele Costantino', 'member of sports team', 'F.C. Bari 1908', (1927, 0, 0), (1930, 0, 0)]
['Jamie McMaster', 'member of sports team', 'Leeds United F.C.', (1999, 0, 0), (2005, 0, 0)]
['Michael Bryan', 'member of sports team', 'Watford F.C.', (2009, 0, 0), (2012, 0, 0)]
['Alf Edge', 'member of sports team', 'Manchester City F.C.', (1894, 0, 0), (1894, 0, 0)]


In [25]:
time_scores = model.forward_over_time(test_batch)

In [26]:
val, ind = torch.topk(time_scores, 1, dim=1)

In [27]:
ind[0]

tensor([1111], device='cuda:0')

In [28]:
for row in ind:
    years_string = ""
    for x in row:
        years_string += str(id2ts[x.item()][0]) + ' '
    print(years_string)

1999 
1971 
1931 
1991 
2010 
1973 
1932 
2004 
2011 
1894 


In [29]:
for x in test_batch:
    print(dataIdsToLiterals(x.detach().cpu().numpy(), all_dicts))

['Simone Perrotta', 'member of sports team', 'Juventus F.C.', (1998, 0, 0), (1999, 0, 0)]
['Antonio Bordon', 'member of sports team', 'Udinese Calcio', (1971, 0, 0), (1972, 0, 0)]
['Danny Liddle', 'member of sports team', 'Scotland national football team', (1931, 0, 0), (1931, 0, 0)]
['Demetrio Albertini', 'member of sports team', 'Italy national football team', (1991, 0, 0), (2002, 0, 0)]
['Franco Zuculini', 'member of sports team', 'Genoa Cricket and Football Club', (2010, 0, 0), (2010, 0, 0)]
['Franco Tripodi', 'member of sports team', 'S.S. Lazio', (1971, 0, 0), (1972, 0, 0)]
['Raffaele Costantino', 'member of sports team', 'F.C. Bari 1908', (1927, 0, 0), (1930, 0, 0)]
['Jamie McMaster', 'member of sports team', 'Leeds United F.C.', (1999, 0, 0), (2005, 0, 0)]
['Michael Bryan', 'member of sports team', 'Watford F.C.', (2009, 0, 0), (2012, 0, 0)]
['Alf Edge', 'member of sports team', 'Manchester City F.C.', (1894, 0, 0), (1894, 0, 0)]


In [30]:
import pickle
# questions = pickle.load(open('/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_with_paraphrases.pickle', 'rb'))
# questions = pickle.load(open('/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_with_paraphrases_v2.pickle', 'rb'))
questions = pickle.load(open('/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_1_paraphrases.pickle', 'rb'))

In [32]:
# import random
# questions_shuffled = questions.copy()
# random.shuffle(questions_shuffled)
# fname = '/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_1_paraphrases_shuffled.pickle'
# pickle.dump(questions_shuffled, open(fname, 'wb'))

In [33]:
fname = '/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_1_paraphrases_shuffled.pickle'
questions = pickle.load(open(fname, 'rb'))
# questions = pickle.load(open('/scratche/home/apoorv/tempqa/data/questions/questions_position_held_small_with_paraphrases_v2_shuffled.pickle', 'rb'))

In [59]:
len(questions)

57954

In [40]:
import random

def checkQuestion(question, target_template):
    template = question['template']
    if target_template != template:
        return False
    return True

# def getDataPoint(question, all_dicts):

def predictTime(question, model, all_dicts, k=1):
    entities = list(question['entities'])
    times = question['times']
    target_template = 'When did {head} hold the position of {tail}?'
    if checkQuestion(question, target_template) == False:
        print('Not time question')
        return set()
    ent2id = all_dicts['ent2id']
    rel2id = all_dicts['rel2id']
    id2ts = all_dicts['id2ts']
    ent1 = entities[0]
    ent2 = entities[1]
    text = question['question']
    if text.find(ent1) < text.find(ent2):
        head = ent2id[ent1]
        tail = ent2id[ent2]
    else:
        head = ent2id[ent2]
        tail = ent2id[ent1]
    relation = rel2id[list(question['relations'])[0]]
    data_point = [head, relation, tail, 1, 1]
    data_batch = torch.from_numpy(np.array([data_point])).cuda()
    time_scores = model.forward_over_time(data_batch)
    val, ind = torch.topk(time_scores, k, dim=1)
    topk_set = set()
    for row in ind:
        for x in row:
            topk_set.add(id2ts[x.item()][0])
    return topk_set

def predictHead(question, model, all_dicts, k=1):
    entities = list(question['entities'])
    times = list(question['times'])
    target_template = 'Who was the {tail} in {time}?'
    if checkQuestion(question, target_template) == False:
        print('Not time question')
        return set()
    ent2id = all_dicts['ent2id']
    rel2id = all_dicts['rel2id']
    ts2id = all_dicts['ts2id']
    id2ent = all_dicts['id2ent']
    head = ent2id[entities[0]]
    try:
        time = ts2id[(times[0],0,0)]
    except:
        return set()
    relation = rel2id[list(question['relations'])[0]] + model.embeddings[1].weight.shape[0]//2#+ 90
    data_point = [head, relation, 1, time, time]
    data_batch = torch.from_numpy(np.array([data_point])).cuda()
    predictions, factors, time = model.forward(data_batch)
    val, ind = torch.topk(predictions, k, dim=1)
    topk_set = set()
    for row in ind:
        for x in row:
            topk_set.add(id2ent[x.item()])
    return topk_set


In [66]:
import numpy as np
from tqdm import tqdm
for question_type in ['predictHead', 'predictTime']:
    correct_count = 0
    total_count = 0
    k = 1 # hit at k
    for i in tqdm(range(len(questions))):
        question_template = questions[i]['template']
        if question_type == 'predictHead':
            which_question_function = predictHead
            target_template = 'Who was the {tail} in {time}?'
        elif question_type == 'predictTime':
            which_question_function = predictTime
            target_template = 'When did {head} hold the position of {tail}?'            
        if question_template != target_template:
            continue
        total_count += 1
        id = i   
        predicted = which_question_function(questions[id], model, all_dicts, k)
        intersection_set = questions[id]['answers'].intersection(predicted)
        if len(intersection_set) > 0:
            correct_count += 1
    
    print(question_type, correct_count, total_count, correct_count/total_count)

100%|██████████| 57954/57954 [00:10<00:00, 5492.44it/s]
  1%|          | 520/57954 [00:00<00:11, 5142.03it/s]

predictHead 8408 8603 0.9773334883180286


100%|██████████| 57954/57954 [00:11<00:00, 5184.84it/s]

predictTime 12224 12272 0.9960886571056062





In [43]:
len(questions)

57954

In [44]:
random.choice(questions)

{'question': 'When did the first Q11902879 come to power?',
 'answers': {1854},
 'answer_type': 'time',
 'template': 'When did the {adj} {tail} come to power?',
 'entities': {'Q11902879'},
 'times': set(),
 'relations': {'P39'},
 'paraphrases': ['When did the first Lord Mayor come to power?']}

In [45]:
questions[10:15]

[{'question': 'When was the first time that Q69036 was the Q30263013?',
  'answers': {2012},
  'answer_type': 'time',
  'template': 'When was the {adj} time that {head} was the {tail}?',
  'entities': {'Q30263013', 'Q69036'},
  'times': set(),
  'relations': {'P39'},
  'paraphrases': ['When was the first time that Torsten Albig was the Minister-President of Schleswig-Holstein?']},
 {'question': 'Who was the Q17014422 in 1982?',
  'answers': {'Q2625332', 'Q2748845'},
  'answer_type': 'entity',
  'template': 'Who was the {tail} in {time}?',
  'entities': {'Q17014422'},
  'times': {1982},
  'relations': {'P39'},
  'paraphrases': ['Who was the President of the Congress of Deputies in 1982?']},
 {'question': 'Who was the Q2484309 in 1802?',
  'answers': {'Q337864'},
  'answer_type': 'entity',
  'template': 'Who was the {tail} in {time}?',
  'entities': {'Q2484309'},
  'times': {1802},
  'relations': {'P39'},
  'paraphrases': ['Who was the Secretary of State for the Home Department in 1802?'

In [46]:
from sentence_transformers import SentenceTransformer
model_st = SentenceTransformer('distilbert-base-nli-mean-tokens')


In [47]:
model_st.cuda()

SentenceTransformer(
  (0): Transformer(
    (auto_model): DistilBertModel(
      (embeddings): Embeddings(
        (word_embeddings): Embedding(30522, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (transformer): Transformer(
        (layer): ModuleList(
          (0): TransformerBlock(
            (attention): MultiHeadSelfAttention(
              (dropout): Dropout(p=0.1, inplace=False)
              (q_lin): Linear(in_features=768, out_features=768, bias=True)
              (k_lin): Linear(in_features=768, out_features=768, bias=True)
              (v_lin): Linear(in_features=768, out_features=768, bias=True)
              (out_lin): Linear(in_features=768, out_features=768, bias=True)
            )
            (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (ffn): FFN(
              (dro

In [48]:
import torch.nn as nn

In [49]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)

In [50]:
# seq_length, batch_size, embed_dim
src = torch.rand(10, 40, 512)
# mask is batch_size*num_heads, seq_length, seq_length
mask = torch.ones((40, 10), dtype=torch.bool)


In [51]:
mask.shape

torch.Size([40, 10])

In [52]:
out = transformer_encoder(src, src_key_padding_mask=mask)

In [53]:
questions[1]

{'question': 'When was the first time that Q6787053 was the Q41582565?',
 'answers': {1880},
 'answer_type': 'time',
 'template': 'When was the {adj} time that {head} was the {tail}?',
 'entities': {'Q41582565', 'Q6787053'},
 'times': set(),
 'relations': {'P39'},
 'paraphrases': ['When was the first time that Sir Mathew Wilson, 1st Baronet was the Member of the 22nd Parliament of the United Kingdom?']}

In [54]:
x = [0,1,2]
y = [0,1]

In [55]:
x = torch.tensor(x, dtype=torch.long)
y = torch.tensor(y, dtype=torch.long)


In [56]:
padding_tensor([x,y])[0].cuda()

NameError: name 'padding_tensor' is not defined

In [57]:
def padding_tensor(sequences):
    """
    :param sequences: list of tensors
    :return:
    """
    num = len(sequences)
    max_len = max([s.size(0) for s in sequences])
    out_dims = (num, max_len)
    out_tensor = sequences[0].data.new(*out_dims).fill_(0)
    mask = sequences[0].data.new(*out_dims).fill_(0)
    for i, tensor in enumerate(sequences):
        length = tensor.size(0)
        out_tensor[i, :length] = tensor
        mask[i, :length] = 1
    return out_tensor, mask


In [189]:
ent_emb_matrix = model.embeddings[0].weight.data
time_emb_matrix = model.embeddings[2].weight.data

In [193]:
out = torch.cat([ent_emb_matrix, time_emb_matrix], dim=0)
out.shape

torch.Size([22170, 512])

In [211]:
batch_size = 5
nb_digits = 10
# Dummy input that HAS to be 2D for the scatter (you can use view(-1,1) if needed)
y = torch.LongTensor(batch_size,2).random_() % nb_digits
# One hot encoding buffer that you create out of the loop and just keep reusing
y_onehot = torch.FloatTensor(batch_size, nb_digits)

# In your for loop
y_onehot.zero_()
y_onehot.scatter_(1, y, 1)


tensor([[0., 0., 1., 0., 0., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0.]])

In [212]:
y

tensor([[2, 8],
        [9, 0],
        [6, 0],
        [7, 1],
        [0, 5]])

In [233]:
a = torch.tensor([1,2,3])
b = torch.tensor([3,4,5])
[a,b]

[tensor([1, 2, 3]), tensor([3, 4, 5])]

In [234]:
x = torch.stack([a,b,a,b])

In [235]:
x

tensor([[1, 2, 3],
        [3, 4, 5],
        [1, 2, 3],
        [3, 4, 5]])

In [223]:
y_onehot = torch.FloatTensor(4, 20)

# In your for loop
y_onehot.zero_()
y_onehot.scatter_(1, x, 1)


tensor([[0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]])

In [224]:
y_onehot

tensor([[0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.],
        [0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]])

In [242]:
a = model.embeddings[0](x.cuda())
a.shape

torch.Size([4, 3, 512])

In [248]:
b = torch.FloatTensor(4, 512).cuda()

In [249]:
b.shape

torch.Size([4, 512])

In [250]:
b = b.unsqueeze(1)

In [251]:
b.shape

torch.Size([4, 1, 512])

In [252]:
c = torch.cat([b,a], dim=1)

In [253]:
c.shape

torch.Size([4, 4, 512])

In [256]:
mask = torch.BoolTensor((10,20))

In [264]:
mask = torch.zeros((4, 10), dtype=torch.bool)

In [265]:
mask

tensor([[False, False, False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False],
        [False, False, False, False, False, False, False, False, False, False]])