In [1]:
import numpy as np
import logging
from scipy.io.matlab import loadmat
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

import rescal
from brescal import BayesianRescal
from seq_brescal import PFBayesianRescal

%matplotlib inline

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
mat = loadmat('../data/alyawarradata.mat')
T = np.array(mat['Rs'], np.float32)

n_dim = 5
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1)  # [relation, entity, entity]
n_relation, n_entity, _ = T.shape
print('Tensor dimension: %s' % (str(T.shape)))
print('Tensor size: %d' % (np.prod(T.shape)))
print('Total population: %d' % (np.sum(T)))
print('Proportion of true value %.3f' % (np.sum(T)/np.prod(T.shape)))

Tensor dimension: (26, 104, 104)
Tensor size: 281216
Total population: 10790
Proportion of true value 0.038


In [3]:
import itertools
maskT = np.zeros_like(T)
p = 0.1
for k in range(n_relation):
    for i,j in itertools.product(range(n_entity),repeat=2):
        if T[k, i, j] and np.random.binomial(1, p):
            maskT[k, i, j] = 1
            
print('Number of initial observation %d' % (np.sum(maskT)))

Number of initial observation 1066


In [4]:
model = BayesianRescal(n_dim, eval_fn=roc_auc_score)
model.fit(T*maskT, max_iter=20)
_X = model._reconstruct()
print(roc_auc_score(T.flatten(), _X.flatten()))

INFO:brescal:[INIT] LL: -119225561.136 | fit: 0.51138
INFO:brescal:[  0] LL: 334369.135 | fit: 0.51150 |  sec: 0.187
INFO:brescal:[  1] LL: 334464.467 | fit: 0.60330 |  sec: 0.187
INFO:brescal:[  2] LL: 334512.714 | fit: 0.62049 |  sec: 0.196
INFO:brescal:[  3] LL: 334589.140 | fit: 0.61721 |  sec: 0.356
INFO:brescal:[  4] LL: 334694.387 | fit: 0.67560 |  sec: 0.239
INFO:brescal:[  5] LL: 334913.563 | fit: 0.74514 |  sec: 0.188
INFO:brescal:[  6] LL: 335151.273 | fit: 0.79419 |  sec: 0.192
INFO:brescal:[  7] LL: 335291.943 | fit: 0.83854 |  sec: 0.186
INFO:brescal:[  8] LL: 335297.644 | fit: 0.83847 |  sec: 0.187
INFO:brescal:[  9] LL: 335337.966 | fit: 0.83821 |  sec: 0.195
INFO:brescal:[ 10] LL: 335344.589 | fit: 0.84667 |  sec: 0.300
INFO:brescal:[ 11] LL: 335373.658 | fit: 0.83764 |  sec: 0.224
INFO:brescal:[ 12] LL: 335400.973 | fit: 0.85003 |  sec: 0.200
INFO:brescal:[ 13] LL: 335420.448 | fit: 0.84314 |  sec: 0.211
INFO:brescal:[ 14] LL: 335468.023 | fit: 0.84865 |  sec: 0.202
I

0.776309937295


In [5]:
model = PFBayesianRescal(n_dim, controlled_var=False, n_particles=5, eval_fn=roc_auc_score)
seq = model.fit(T, obs_mask = maskT, max_iter=1000)

INFO:seq_brescal:[NEXT] (0, 49, 49): 0.000000, population: 0/0
INFO:seq_brescal:[  0] LL: -30349.628 | fit: 0.50172 |  sec: 6.321
INFO:seq_brescal:[NEXT] (1, 103, 103): 0.000000, population: 0/1
INFO:seq_brescal:[  1] LL: -30288.909 | fit: 0.51623 |  sec: 4.799
INFO:seq_brescal:[NEXT] (11, 6, 6): 0.000000, population: 0/2
INFO:seq_brescal:[  2] LL: -30298.159 | fit: 0.52074 |  sec: 5.914
INFO:seq_brescal:[NEXT] (15, 10, 41): 1.000000, population: 1/3
INFO:seq_brescal:[  3] LL: -30226.057 | fit: 0.53275 |  sec: 4.766
INFO:seq_brescal:[NEXT] (5, 41, 67): 0.000000, population: 1/4
INFO:seq_brescal:[  4] LL: -29845.411 | fit: 0.56669 |  sec: 4.707
INFO:seq_brescal:[NEXT] (15, 34, 41): 0.000000, population: 1/5
INFO:seq_brescal:[  5] LL: -29152.517 | fit: 0.64568 |  sec: 4.711
INFO:seq_brescal:[NEXT] (5, 41, 19): 1.000000, population: 2/6
INFO:seq_brescal:[  6] LL: -28724.948 | fit: 0.71585 |  sec: 4.728
INFO:seq_brescal:[NEXT] (15, 7, 41): 1.000000, population: 3/7
INFO:seq_brescal:[  7] L