# RESCAL vs BayesianRESCAL on KINSHIP dataset

In [55]:
import numpy as np
import logging
from brescal import BayesianRescal
from scipy.io.matlab import loadmat
from scipy.sparse import csr_matrix
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline  

import rescal

logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [56]:
mat = loadmat('../data/alyawarradata.mat')
T = np.array(mat['Rs'], np.float32)

D = 5
T = np.swapaxes(T, 1, 2)
T = np.swapaxes(T, 0, 1)  # [relation, entity, entity]
num_relation, num_entity, _ = T.shape
print(T.shape)

(26, 104, 104)


In [57]:
import itertools
trainT = np.zeros_like(T)
p = 1
for k in range(num_relation):
    for i,j in itertools.product(range(num_entity),repeat=2):
        if T[k, i, j] and np.random.binomial(1, p):
            trainT[k, i, j] = 1

## Training RESCAL

```output = fit (normalized reconstruction error) | changes in fit | elapsed time```

In [58]:
X = list()
for k in range(num_relation):
    X.append(csr_matrix(trainT[k]))
    
A, R, f, itr, exectimes = rescal.rescal_als(X, D)

INFO:RESCAL:[  0] fit: 0.14301 | delta: 1.4e-01 | secs: 0.01207
INFO:RESCAL:[  1] fit: 0.20000 | delta: 5.7e-02 | secs: 0.01414
INFO:RESCAL:[  2] fit: 0.22227 | delta: 2.2e-02 | secs: 0.01595
INFO:RESCAL:[  3] fit: 0.23262 | delta: 1.0e-02 | secs: 0.01381
INFO:RESCAL:[  4] fit: 0.24004 | delta: 7.4e-03 | secs: 0.01330
INFO:RESCAL:[  5] fit: 0.24663 | delta: 6.6e-03 | secs: 0.01492
INFO:RESCAL:[  6] fit: 0.25038 | delta: 3.7e-03 | secs: 0.01532
INFO:RESCAL:[  7] fit: 0.25203 | delta: 1.7e-03 | secs: 0.01359
INFO:RESCAL:[  8] fit: 0.25284 | delta: 8.1e-04 | secs: 0.01498
INFO:RESCAL:[  9] fit: 0.25329 | delta: 4.5e-04 | secs: 0.01493
INFO:RESCAL:[ 10] fit: 0.25356 | delta: 2.6e-04 | secs: 0.01486
INFO:RESCAL:[ 11] fit: 0.25372 | delta: 1.6e-04 | secs: 0.01542
INFO:RESCAL:[ 12] fit: 0.25383 | delta: 1.1e-04 | secs: 0.01617
INFO:RESCAL:[ 13] fit: 0.25390 | delta: 7.9e-05 | secs: 0.01633


## Training BayesianRESCAL

```output = log-likelihood | fit | elapsed time```

In [59]:
var_e = 1.; var_x = 1.; var_r = 1.
model = BayesianRescal(D, var_e=var_e, var_x=var_x, var_r=var_r)
model.fit(trainT, max_iter=20)

INFO:brescal:[  0] LL: -5960.260 | fit: -0.05770 |  sec: 0.206
INFO:brescal:[  1] LL: -5804.599 | fit: -0.05902 |  sec: 0.208
INFO:brescal:[  2] LL: -5758.575 | fit: -0.05865 |  sec: 0.190
INFO:brescal:[  3] LL: -5715.261 | fit: -0.05390 |  sec: 0.199
INFO:brescal:[  4] LL: -5644.974 | fit: -0.04051 |  sec: 0.271
INFO:brescal:[  5] LL: -5425.924 | fit: 0.00568 |  sec: 0.224
INFO:brescal:[  6] LL: -5179.480 | fit: 0.05885 |  sec: 0.199
INFO:brescal:[  7] LL: -5078.191 | fit: 0.08469 |  sec: 0.200
INFO:brescal:[  8] LL: -4995.866 | fit: 0.10293 |  sec: 0.188
INFO:brescal:[  9] LL: -4956.870 | fit: 0.11305 |  sec: 0.192
INFO:brescal:[ 10] LL: -4941.775 | fit: 0.11890 |  sec: 0.186
INFO:brescal:[ 11] LL: -4881.196 | fit: 0.13010 |  sec: 0.183
INFO:brescal:[ 12] LL: -4859.562 | fit: 0.13119 |  sec: 0.187
INFO:brescal:[ 13] LL: -4839.320 | fit: 0.13355 |  sec: 0.183
INFO:brescal:[ 14] LL: -4840.061 | fit: 0.13498 |  sec: 0.186
INFO:brescal:[ 15] LL: -4814.839 | fit: 0.14224 |  sec: 0.192
INF

## Compare both models by ROC-AUC

#### ROC-AUC score of BRESCAL

In [60]:
from sklearn.metrics import roc_auc_score
_T = model._reconstruct()
print(roc_auc_score(T.flatten(), _T.flatten()))

0.912949753695


#### ROC-AUC score of RESCAL

In [61]:
_X = np.zeros_like(T)
for k in range(T.shape[0]):
    _X[k] = np.dot(np.dot(A, R[k]), A.T)
print(roc_auc_score(T.flatten(), _X.flatten()))

0.938404085088


## Initialize Bayesian_RESCAL with RESCAL

Let's see that Bayesian_RESCAL performs worse than RESCAL because it can't escape from local optimum.

In [62]:
A, R, f, itr, exectimes = rescal.rescal_als(X, D)
model = BayesianRescal(D, var_e=var_e, var_x=var_x, var_r=var_r)
model.n_relations = num_relation
model.n_entities = num_entity
model.E = A
model.R = np.zeros([num_relation, D, D])
for k in range(num_relation):
    model.R[k] = R[k]
model._gibbs(trainT, max_iter=5)

INFO:RESCAL:[  0] fit: 0.14301 | delta: 1.4e-01 | secs: 0.01367
INFO:RESCAL:[  1] fit: 0.20000 | delta: 5.7e-02 | secs: 0.01450
INFO:RESCAL:[  2] fit: 0.22227 | delta: 2.2e-02 | secs: 0.01279
INFO:RESCAL:[  3] fit: 0.23262 | delta: 1.0e-02 | secs: 0.01321
INFO:RESCAL:[  4] fit: 0.24004 | delta: 7.4e-03 | secs: 0.01225
INFO:RESCAL:[  5] fit: 0.24663 | delta: 6.6e-03 | secs: 0.01216
INFO:RESCAL:[  6] fit: 0.25038 | delta: 3.7e-03 | secs: 0.01312
INFO:RESCAL:[  7] fit: 0.25203 | delta: 1.7e-03 | secs: 0.01222
INFO:RESCAL:[  8] fit: 0.25284 | delta: 8.1e-04 | secs: 0.01256
INFO:RESCAL:[  9] fit: 0.25329 | delta: 4.5e-04 | secs: 0.01222
INFO:RESCAL:[ 10] fit: 0.25356 | delta: 2.6e-04 | secs: 0.01260
INFO:RESCAL:[ 11] fit: 0.25372 | delta: 1.6e-04 | secs: 0.01322
INFO:RESCAL:[ 12] fit: 0.25383 | delta: 1.1e-04 | secs: 0.01231
INFO:RESCAL:[ 13] fit: 0.25390 | delta: 7.9e-05 | secs: 0.01278
INFO:brescal:[  0] LL: -4759.940 | fit: 0.14582 |  sec: 0.183
INFO:brescal:[  1] LL: -4819.234 | fit: 0.

In [63]:
from sklearn.metrics import roc_auc_score
_T = model._reconstruct()
print(roc_auc_score(T.flatten(), _T.flatten()))

0.878177958976
