In [4]:
import os 
from rescal_model import RESCALModel
import sys

# Benchmark 1: Alyawarra kinship data

In [5]:
model1 = RESCALModel()
X = model1.read_dataset(os.getcwd()+'\\benchmark1\\rescal_input_alyawarradata.mat', 'Rs')

## Number of predicates ($m$)

In [6]:
print(len(X))

26


## Shape of each slice of tensor ($n \times n$)

In [7]:
print(X[0].shape)

(104, 104)


## Rank of factorization ($r$)

In [8]:
r = 50

## Tensor factorization with $r=50$
Rescal factorizes each slice of tensor X into 2 matrices $A$ and $R_k$ using Alternating Least Square (ALS) approach

$X_k \approx AR_kA^T,\;for\:k=1, 2, ... , m$

In [9]:
A1, R1 = model1.factorize(r)

INFO:RESCAL:[  0] fit: 0.58742 | delta: 5.9e-01 | secs: 0.02889
INFO:RESCAL:[  1] fit: 0.70874 | delta: 1.2e-01 | secs: 0.02605
INFO:RESCAL:[  2] fit: 0.76460 | delta: 5.6e-02 | secs: 0.02893
INFO:RESCAL:[  3] fit: 0.79413 | delta: 3.0e-02 | secs: 0.04181
INFO:RESCAL:[  4] fit: 0.80742 | delta: 1.3e-02 | secs: 0.03568
INFO:RESCAL:[  5] fit: 0.81309 | delta: 5.7e-03 | secs: 0.03092
INFO:RESCAL:[  6] fit: 0.81563 | delta: 2.5e-03 | secs: 0.03295
INFO:RESCAL:[  7] fit: 0.81688 | delta: 1.2e-03 | secs: 0.03856
INFO:RESCAL:[  8] fit: 0.81754 | delta: 6.6e-04 | secs: 0.03236


## Matrix $A$ contains the latent component representation of the entities in the domain

In [10]:
print(A1)

[[-0.08895653  0.14236985  0.11788327 ...  0.26650432  0.12298793
   0.32778377]
 [ 0.12130986  0.00184876 -0.08225458 ...  0.0285237  -0.01637083
   0.42199477]
 [ 0.13180202  0.02598964 -0.17744959 ... -0.04703289  0.01671648
   0.29588571]
 ...
 [ 0.13151619 -0.02539297 -0.00727691 ... -0.21539883  0.0530544
   0.31401749]
 [ 0.07343107 -0.09189324 -0.04857683 ... -0.04078883  0.05100282
   0.46618083]
 [-0.19936275 -0.30501302 -0.04550286 ...  0.23861946 -0.00076335
   0.36909164]]


## Shape of $A$ ($n \times r$)

In [11]:
print(A1.shape)

(104, 50)


## Matrix $R_k$ models the interactions of the latent components in the k-th predicate

In [12]:
print(R1[0])

[[-0.05298081  0.01344864  0.04423119 ... -0.01612217 -0.01345469
   0.01752797]
 [-0.01240607 -0.05152611  0.01070069 ... -0.0130523  -0.03444891
   0.10918697]
 [ 0.04269095  0.01861163  0.0056755  ...  0.01379274  0.03133312
   0.02853293]
 ...
 [-0.03540096 -0.04112311  0.00567597 ...  0.03560325  0.00800331
   0.03948583]
 [-0.01669339 -0.03597238  0.03266643 ...  0.00399071 -0.03280139
   0.0658602 ]
 [ 0.03503293  0.1079669   0.02725706 ...  0.03155712  0.05991559
   0.12683266]]


## Shape of $R_k$ ($r \times r$)

In [13]:
print(R1[0].shape)

(50, 50)


## Save matrices to txt files and load them back

In [14]:
model1.save_model(os.getcwd()+'\\benchmark1')
A1_load, R1_load = model1.load_model(os.getcwd()+'\\benchmark1')
print(A1_load)
print()
print(R1_load[0])

[[-0.08895653  0.14236985  0.11788327 ...  0.26650432  0.12298793
   0.32778377]
 [ 0.12130986  0.00184876 -0.08225458 ...  0.0285237  -0.01637083
   0.42199477]
 [ 0.13180202  0.02598964 -0.17744959 ... -0.04703289  0.01671648
   0.29588571]
 ...
 [ 0.13151619 -0.02539297 -0.00727691 ... -0.21539883  0.0530544
   0.31401749]
 [ 0.07343107 -0.09189324 -0.04857683 ... -0.04078883  0.05100282
   0.46618083]
 [-0.19936275 -0.30501302 -0.04550286 ...  0.23861946 -0.00076335
   0.36909164]]

[[-0.05298081  0.01344864  0.04423119 ... -0.01612217 -0.01345469
   0.01752797]
 [-0.01240607 -0.05152611  0.01070069 ... -0.0130523  -0.03444891
   0.10918697]
 [ 0.04269095  0.01861163  0.0056755  ...  0.01379274  0.03133312
   0.02853293]
 ...
 [-0.03540096 -0.04112311  0.00567597 ...  0.03560325  0.00800331
   0.03948583]
 [-0.01669339 -0.03597238  0.03266643 ...  0.00399071 -0.03280139
   0.0658602 ]
 [ 0.03503293  0.1079669   0.02725706 ...  0.03155712  0.05991559
   0.12683266]]


## Evaluation: 10-fold cross validation
For both training and testing data, compute mean and standard deviation of Precision Recall Area Under Curve (PR AUC)

In [15]:
mean_train1, std_train1, mean_test1, std_test1 = model1.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train1, mean_test1))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train1, std_test1))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.55494 | delta: 5.5e-01 | secs: 0.03013
INFO:RESCAL:[  1] fit: 0.69432 | delta: 1.4e-01 | secs: 0.03391
INFO:RESCAL:[  2] fit: 0.75465 | delta: 6.0e-02 | secs: 0.02995
INFO:RESCAL:[  3] fit: 0.78683 | delta: 3.2e-02 | secs: 0.02688
INFO:RESCAL:[  4] fit: 0.80240 | delta: 1.6e-02 | secs: 0.02475
INFO:RESCAL:[  5] fit: 0.80899 | delta: 6.6e-03 | secs: 0.03363
INFO:RESCAL:[  6] fit: 0.81176 | delta: 2.8e-03 | secs: 0.03662
INFO:RESCAL:[  7] fit: 0.81303 | delta: 1.3e-03 | secs: 0.02693
INFO:RESCAL:[  8] fit: 0.81366 | delta: 6.4e-04 | secs: 0.03100
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.57267 | delta: 5.7e-01 | secs: 0.03299
INFO:RESCAL:[  1] fit: 0.69693 | delta: 1.2e-01 | secs: 0.03244
INFO:RESCAL:[  2] fit: 0.75513 | delta: 5.8e-02 | secs: 0.02994
INFO:RESCAL:[  3] fit: 0.78686 | delta: 3.2e-02 | secs: 0.02892
INFO:RESCAL:[  4] fit: 0.80263 | delta: 1.6e-02 | secs: 0.02494
INFO:RESCAL:[  5] fit: 0.80994 | delta: 7.3e-03 | secs:

INFO:RESCAL:[  6] fit: 0.81370 | delta: 2.8e-03 | secs: 0.03098
INFO:RESCAL:[  7] fit: 0.81502 | delta: 1.3e-03 | secs: 0.02997
INFO:RESCAL:[  8] fit: 0.81570 | delta: 6.8e-04 | secs: 0.03049
INFO:RESCAL:Train Fold 7
INFO:RESCAL:[  0] fit: 0.54798 | delta: 5.5e-01 | secs: 0.02194
INFO:RESCAL:[  1] fit: 0.69454 | delta: 1.5e-01 | secs: 0.02866
INFO:RESCAL:[  2] fit: 0.75430 | delta: 6.0e-02 | secs: 0.02394
INFO:RESCAL:[  3] fit: 0.78586 | delta: 3.2e-02 | secs: 0.03092
INFO:RESCAL:[  4] fit: 0.80068 | delta: 1.5e-02 | secs: 0.04485
INFO:RESCAL:[  5] fit: 0.80706 | delta: 6.4e-03 | secs: 0.05186
INFO:RESCAL:[  6] fit: 0.80999 | delta: 2.9e-03 | secs: 0.02992
INFO:RESCAL:[  7] fit: 0.81154 | delta: 1.6e-03 | secs: 0.01995
INFO:RESCAL:[  8] fit: 0.81248 | delta: 9.3e-04 | secs: 0.02593
INFO:RESCAL:Test Fold 7
INFO:RESCAL:[  0] fit: 0.56664 | delta: 5.7e-01 | secs: 0.02693
INFO:RESCAL:[  1] fit: 0.70056 | delta: 1.3e-01 | secs: 0.03490
INFO:RESCAL:[  2] fit: 0.75943 | delta: 5.9e-02 | secs:

PR AUC training/test mean: 0.907/0.922
PR AUC training/test standard deviation: 0.024/0.034


# Benchmark 2: UMLS dataset

In [16]:
model2 = RESCALModel()
X2 = model2.read_dataset(os.getcwd()+'\\benchmark2\\rescal_input_umls.mat', 'Rs')

## Tensor factorization with $r=50$

In [17]:
A2, R2 = model2.factorize(50)
print(A2)
print()
print(R2[0])

INFO:RESCAL:[  0] fit: 0.81603 | delta: 8.2e-01 | secs: 0.08390
INFO:RESCAL:[  1] fit: 0.85399 | delta: 3.8e-02 | secs: 0.07124
INFO:RESCAL:[  2] fit: 0.86156 | delta: 7.6e-03 | secs: 0.06571
INFO:RESCAL:[  3] fit: 0.86400 | delta: 2.4e-03 | secs: 0.04893
INFO:RESCAL:[  4] fit: 0.86492 | delta: 9.3e-04 | secs: 0.04921


[[ 0.37251859 -0.17126244 -0.25438756 ... -0.08340365  0.17627114
   0.80348998]
 [-0.03971887  0.15285085 -0.09014183 ... -0.09636373  0.05674021
   0.08959834]
 [-0.08183783 -0.38663234  0.41196811 ... -0.21496444  0.20525317
   0.08685161]
 ...
 [-0.27017881  0.03368626  0.15856518 ... -0.14769298 -0.20885229
   0.21603422]
 [-0.2520424   0.09279751  0.26025802 ... -0.19110763 -0.1897917
   0.29220382]
 [-0.61129318 -0.15969682 -0.17140513 ... -0.32530941 -0.05231979
   0.38937459]]

[[ 0.00370207  0.00137972  0.00061588 ... -0.01362687 -0.00347582
   0.0023683 ]
 [ 0.00123718  0.00047669  0.00013656 ... -0.00491878 -0.00131598
   0.00096254]
 [ 0.00067218  0.00011997 -0.0004416  ... -0.00020739 -0.0003865
   0.00013157]
 ...
 [-0.00595686 -0.0028831  -0.0015314  ...  0.05252767  0.0126729
  -0.01126272]
 [-0.00111131 -0.00073727 -0.00085988 ...  0.01321018  0.00278032
  -0.00272735]
 [ 0.00050812  0.00047089  0.00043859 ... -0.01012903 -0.00232156
   0.00211033]]


## Evaluation

In [18]:
mean_train2, std_train2, mean_test2, std_test2 = model2.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train2, mean_test2))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train2, std_test2))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.81054 | delta: 8.1e-01 | secs: 0.05029
INFO:RESCAL:[  1] fit: 0.84955 | delta: 3.9e-02 | secs: 0.06007
INFO:RESCAL:[  2] fit: 0.85708 | delta: 7.5e-03 | secs: 0.05275
INFO:RESCAL:[  3] fit: 0.85945 | delta: 2.4e-03 | secs: 0.06152
INFO:RESCAL:[  4] fit: 0.86032 | delta: 8.7e-04 | secs: 0.05792
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.81107 | delta: 8.1e-01 | secs: 0.04568
INFO:RESCAL:[  1] fit: 0.85074 | delta: 4.0e-02 | secs: 0.05027
INFO:RESCAL:[  2] fit: 0.85862 | delta: 7.9e-03 | secs: 0.04829
INFO:RESCAL:[  3] fit: 0.86117 | delta: 2.5e-03 | secs: 0.04588
INFO:RESCAL:[  4] fit: 0.86211 | delta: 9.5e-04 | secs: 0.04129
INFO:RESCAL:Train Fold 1
INFO:RESCAL:[  0] fit: 0.81174 | delta: 8.1e-01 | secs: 0.03840
INFO:RESCAL:[  1] fit: 0.85046 | delta: 3.9e-02 | secs: 0.05270
INFO:RESCAL:[  2] fit: 0.85805 | delta: 7.6e-03 | secs: 0.04882
INFO:RESCAL:[  3] fit: 0.86052 | delta: 2.5e-03 | secs: 0.05300
INFO:RESCAL:[  4] fit: 0.86147

PR AUC training/test mean: 0.432/0.495
PR AUC training/test standard deviation: 0.135/0.120


# Benchmark 3: human disease-symptoms data resource

In [19]:
model3 = RESCALModel()
X3 = model3.read_dataset(os.getcwd()+'\\benchmark3\\rescal_input_diseases.mat', 'K')

## Tensor factorization with $r=50$

In [20]:
A3, R3 = model3.factorize(50)
print(A3)
print()
print(R3[0])

INFO:RESCAL:[  0] fit: 0.93453 | delta: 9.3e-01 | secs: 0.84738
INFO:RESCAL:[  1] fit: 0.93513 | delta: 6.0e-04 | secs: 0.86365


[[ 0.00446232  0.0089613   0.0283861  ...  0.00726947  0.01805538
   0.00186346]
 [-0.19432559 -0.10615873 -0.32937182 ...  0.38323471  0.26551574
   0.40343787]
 [ 0.21909547  0.10338185 -0.19540799 ... -0.03775649  0.56021673
   0.34375652]
 ...
 [-0.2579827  -0.24322704 -0.4842665  ...  0.2944431   0.48326003
   0.40817303]
 [-0.09524507  0.17159799  0.33451489 ...  0.08324661 -0.01605023
   0.3603542 ]
 [ 0.08466193  0.39363298 -0.05904715 ...  0.43346224 -0.5798173
   0.28820235]]

[[-0.0970226  -0.07249207 -0.25100448 ... -0.11536157 -0.09747333
  -0.2676122 ]
 [-0.07249207 -0.0516804  -0.14992046 ... -0.00794668 -0.01766966
  -0.05265964]
 [-0.25100448 -0.14992046 -0.50351671 ...  0.01315085 -0.00633601
  -0.12328693]
 ...
 [-0.11536157 -0.00794668  0.01315085 ...  0.58452327  0.39274157
   0.13546583]
 [-0.09747333 -0.01766966 -0.00633601 ...  0.39274157  0.41411803
   0.31993644]
 [-0.2676122  -0.05265964 -0.12328693 ...  0.13546583  0.31993644
   1.61585071]]


## Evaluation

In [21]:
mean_train3, std_train3, mean_test3, std_test3 = model3.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train3, mean_test3))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train3, std_test3))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.87640 | delta: 8.8e-01 | secs: 0.82113
INFO:RESCAL:[  1] fit: 0.87855 | delta: 2.2e-03 | secs: 0.84390
INFO:RESCAL:[  2] fit: 0.87889 | delta: 3.4e-04 | secs: 0.80598
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.90340 | delta: 9.0e-01 | secs: 0.81347
INFO:RESCAL:[  1] fit: 0.90415 | delta: 7.5e-04 | secs: 0.82185
INFO:RESCAL:Train Fold 1
INFO:RESCAL:[  0] fit: 0.87614 | delta: 8.8e-01 | secs: 0.78836
INFO:RESCAL:[  1] fit: 0.87843 | delta: 2.3e-03 | secs: 0.81283
INFO:RESCAL:[  2] fit: 0.87881 | delta: 3.8e-04 | secs: 0.82484
INFO:RESCAL:Test Fold 1
INFO:RESCAL:[  0] fit: 0.90338 | delta: 9.0e-01 | secs: 0.82386
INFO:RESCAL:[  1] fit: 0.90412 | delta: 7.4e-04 | secs: 0.80010
INFO:RESCAL:Train Fold 2
INFO:RESCAL:[  0] fit: 0.87651 | delta: 8.8e-01 | secs: 0.79586
INFO:RESCAL:[  1] fit: 0.87838 | delta: 1.9e-03 | secs: 0.83276
INFO:RESCAL:[  2] fit: 0.87870 | delta: 3.3e-04 | secs: 0.81482
INFO:RESCAL:Test Fold 2
INFO:RESCAL:[  0] fit

PR AUC training/test mean: 0.756/0.755
PR AUC training/test standard deviation: 0.003/0.002
