In [17]:
import os 
from rescal_model import RESCALModel
import sys

# Benchmark 1: Alyawarra kinship data

In [18]:
model1 = RESCALModel()
X = model1.read_dataset(os.getcwd()+'\\benchmark1\\rescal_input_alyawarradata.mat', 'Rs')

## Number of predicates ($m$)

In [19]:
print(len(X))

26


## Shape of each slice of tensor ($n \times n$)

In [20]:
print(X[0].shape)

(104, 104)


## Rank of factorization ($r$)

In [21]:
r = 50

## Tensor factorization with $r=50$
Rescal factorizes each slice of tensor X into 2 matrices $A$ and $R_k$ using Alternating Least Square (ALS) approach

$X_k \approx AR_kA^T,\;for\:k=1, 2, ... , m$

In [22]:
A1, R1 = model1.factorize(r)

INFO:RESCAL:[  0] fit: 0.58176 | delta: 5.8e-01 | secs: 0.01361
INFO:RESCAL:[  1] fit: 0.70900 | delta: 1.3e-01 | secs: 0.03702
INFO:RESCAL:[  2] fit: 0.76527 | delta: 5.6e-02 | secs: 0.02593
INFO:RESCAL:[  3] fit: 0.79524 | delta: 3.0e-02 | secs: 0.02454
INFO:RESCAL:[  4] fit: 0.80874 | delta: 1.4e-02 | secs: 0.02458
INFO:RESCAL:[  5] fit: 0.81426 | delta: 5.5e-03 | secs: 0.01963
INFO:RESCAL:[  6] fit: 0.81657 | delta: 2.3e-03 | secs: 0.02892
INFO:RESCAL:[  7] fit: 0.81764 | delta: 1.1e-03 | secs: 0.02493
INFO:RESCAL:[  8] fit: 0.81820 | delta: 5.6e-04 | secs: 0.03092


## Matrix $A$ contains the latent component representation of the entities in the domain

In [23]:
print(A1)

[[-0.01368813  0.1812085  -0.18251201 ... -0.15812467  0.05151897
  -0.31931646]
 [ 0.10554886  0.00871292 -0.34205654 ... -0.12527772  0.04689308
  -0.4163375 ]
 [-0.02269086 -0.16493713  0.19580615 ...  0.06744303 -0.26713003
  -0.30351533]
 ...
 [-0.07166691  0.06640762 -0.34817091 ...  0.30826677  0.03729921
  -0.29186425]
 [ 0.19305299 -0.12509444  0.07845929 ... -0.0055975   0.05294014
  -0.46705561]
 [-0.10595755 -0.06277095 -0.30730867 ... -0.1500461  -0.19493177
  -0.38445873]]


## Shape of $A$ ($n \times r$)

In [24]:
print(A1.shape)

(104, 50)


## Matrix $R_k$ models the interactions of the latent components in the k-th predicate

In [25]:
print(R1[0])

[[-0.08571054  0.05569732  0.04036777 ...  0.00672771  0.02540543
  -0.01749452]
 [ 0.05353351 -0.11611896  0.08703989 ... -0.02505809 -0.06562603
  -0.0952362 ]
 [ 0.00938007  0.09275408 -0.01627604 ... -0.06460518  0.05598936
  -0.00761776]
 ...
 [ 0.01877111  0.0017941  -0.06267289 ... -0.00418809  0.00012907
   0.03389138]
 [-0.00380127 -0.04715767  0.0184775  ...  0.01011285 -0.03646143
  -0.07919764]
 [-0.0283129  -0.09220294  0.00376541 ...  0.03015159 -0.0637244
   0.12118211]]


## Shape of $R_k$ ($r \times r$)

In [26]:
print(R1[0].shape)

(50, 50)


## Save matrices to txt files and load them back

In [27]:
model1.save_model(os.getcwd()+'\\benchmark1')
A1_load, R1_load = model1.load_model(os.getcwd()+'\\benchmark1')
print(A1_load)
print()
print(R1_load[0])

[[-0.12153519 -0.13431805 -0.19294858 ...  0.12244789 -0.13448148
  -0.33652847]
 [ 0.14288304 -0.0063402  -0.02964552 ... -0.0736359  -0.05171589
  -0.43410974]
 [ 0.43226877 -0.00052827 -0.02063994 ... -0.13428839  0.06117607
  -0.2815986 ]
 ...
 [ 0.13453518 -0.09481617  0.34942655 ... -0.31441742 -0.04371322
  -0.29731147]
 [ 0.20900997  0.06922798  0.01551271 ... -0.15134335 -0.06948929
  -0.46052085]
 [ 0.07244612  0.12880857 -0.37725693 ...  0.22373262  0.13053774
  -0.3890556 ]]

[[-0.09660659 -0.06167038 -0.02808791 ... -0.00770328 -0.00546235
   0.01177473]
 [-0.05963265 -0.13919658  0.05874502 ... -0.01654981 -0.05567043
   0.07856133]
 [-0.00152283  0.01936553 -0.02854187 ... -0.05287543  0.03139199
  -0.03258019]
 ...
 [-0.02092981  0.01183785 -0.0569907  ...  0.05211154 -0.00891527
  -0.05509378]
 [-0.0094615  -0.04386755  0.02568944 ... -0.00098676 -0.03354644
   0.08492345]
 [-0.00448898  0.0927219  -0.03584417 ... -0.04787778  0.07619653
   0.12584558]]


## Evaluation: 10-fold cross validation
For both training and testing data, compute mean and standard deviation of Precision Recall Area Under Curve (PR AUC)

In [28]:
mean_train1, std_train1, mean_test1, std_test1 = model1.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train1, mean_test1))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train1, std_test1))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.57700 | delta: 5.8e-01 | secs: 0.03125
INFO:RESCAL:[  1] fit: 0.70012 | delta: 1.2e-01 | secs: 0.02449
INFO:RESCAL:[  2] fit: 0.75648 | delta: 5.6e-02 | secs: 0.02612
INFO:RESCAL:[  3] fit: 0.78706 | delta: 3.1e-02 | secs: 0.02471
INFO:RESCAL:[  4] fit: 0.80150 | delta: 1.4e-02 | secs: 0.02448
INFO:RESCAL:[  5] fit: 0.80774 | delta: 6.2e-03 | secs: 0.02590
INFO:RESCAL:[  6] fit: 0.81044 | delta: 2.7e-03 | secs: 0.02493
INFO:RESCAL:[  7] fit: 0.81170 | delta: 1.3e-03 | secs: 0.02478
INFO:RESCAL:[  8] fit: 0.81236 | delta: 6.6e-04 | secs: 0.02783
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.56979 | delta: 5.7e-01 | secs: 0.02394
INFO:RESCAL:[  1] fit: 0.69855 | delta: 1.3e-01 | secs: 0.02492
INFO:RESCAL:[  2] fit: 0.75718 | delta: 5.9e-02 | secs: 0.03488
INFO:RESCAL:[  3] fit: 0.78819 | delta: 3.1e-02 | secs: 0.02595
INFO:RESCAL:[  4] fit: 0.80328 | delta: 1.5e-02 | secs: 0.02695
INFO:RESCAL:[  5] fit: 0.80992 | delta: 6.6e-03 | secs:

INFO:RESCAL:[  5] fit: 0.81149 | delta: 5.9e-03 | secs: 0.02194
INFO:RESCAL:[  6] fit: 0.81393 | delta: 2.4e-03 | secs: 0.02039
INFO:RESCAL:[  7] fit: 0.81504 | delta: 1.1e-03 | secs: 0.02735
INFO:RESCAL:[  8] fit: 0.81563 | delta: 5.9e-04 | secs: 0.02589
INFO:RESCAL:Train Fold 7
INFO:RESCAL:[  0] fit: 0.57137 | delta: 5.7e-01 | secs: 0.01950
INFO:RESCAL:[  1] fit: 0.69863 | delta: 1.3e-01 | secs: 0.02098
INFO:RESCAL:[  2] fit: 0.75581 | delta: 5.7e-02 | secs: 0.02464
INFO:RESCAL:[  3] fit: 0.78551 | delta: 3.0e-02 | secs: 0.02158
INFO:RESCAL:[  4] fit: 0.80047 | delta: 1.5e-02 | secs: 0.02179
INFO:RESCAL:[  5] fit: 0.80791 | delta: 7.4e-03 | secs: 0.02547
INFO:RESCAL:[  6] fit: 0.81123 | delta: 3.3e-03 | secs: 0.01993
INFO:RESCAL:[  7] fit: 0.81273 | delta: 1.5e-03 | secs: 0.02369
INFO:RESCAL:[  8] fit: 0.81348 | delta: 7.5e-04 | secs: 0.02191
INFO:RESCAL:Test Fold 7
INFO:RESCAL:[  0] fit: 0.56859 | delta: 5.7e-01 | secs: 0.03874
INFO:RESCAL:[  1] fit: 0.70089 | delta: 1.3e-01 | secs:

PR AUC training/test mean: 0.916/0.921
PR AUC training/test standard deviation: 0.030/0.030


# Benchmark 2: UMLS dataset

In [29]:
model2 = RESCALModel()
X2 = model2.read_dataset(os.getcwd()+'\\benchmark2\\rescal_input_umls.mat', 'Rs')

## Tensor factorization with $r=50$

In [30]:
A2, R2 = model2.factorize(50)
print(A2)
print()
print(R2[0])

INFO:RESCAL:[  0] fit: 0.81603 | delta: 8.2e-01 | secs: 0.07508
INFO:RESCAL:[  1] fit: 0.85399 | delta: 3.8e-02 | secs: 0.05726
INFO:RESCAL:[  2] fit: 0.86156 | delta: 7.6e-03 | secs: 0.05784
INFO:RESCAL:[  3] fit: 0.86400 | delta: 2.4e-03 | secs: 0.05490
INFO:RESCAL:[  4] fit: 0.86492 | delta: 9.3e-04 | secs: 0.04905


[[-0.37251859 -0.17126244 -0.25438756 ...  0.08340365  0.17627114
  -0.80348998]
 [ 0.03971887  0.15285085 -0.09014183 ...  0.09636373  0.05674021
  -0.08959834]
 [ 0.08183783 -0.38663234  0.41196811 ...  0.21496444  0.20525317
  -0.08685161]
 ...
 [ 0.27017881  0.03368626  0.15856518 ...  0.14769298 -0.20885229
  -0.21603422]
 [ 0.2520424   0.09279751  0.26025802 ...  0.19110763 -0.1897917
  -0.29220382]
 [ 0.61129318 -0.15969682 -0.17140513 ...  0.32530941 -0.05231979
  -0.38937459]]

[[ 0.00370207 -0.00137972 -0.00061588 ... -0.01362687  0.00347582
   0.0023683 ]
 [-0.00123718  0.00047669  0.00013656 ...  0.00491878 -0.00131598
  -0.00096254]
 [-0.00067218  0.00011997 -0.0004416  ...  0.00020739 -0.0003865
  -0.00013157]
 ...
 [-0.00595686  0.0028831   0.0015314  ...  0.05252767 -0.0126729
  -0.01126272]
 [ 0.00111131 -0.00073727 -0.00085988 ... -0.01321018  0.00278032
   0.00272735]
 [ 0.00050812 -0.00047089 -0.00043859 ... -0.01012903  0.00232156
   0.00211033]]


## Evaluation

In [31]:
mean_train2, std_train2, mean_test2, std_test2 = model2.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train2, mean_test2))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train2, std_test2))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.81333 | delta: 8.1e-01 | secs: 0.05435
INFO:RESCAL:[  1] fit: 0.85148 | delta: 3.8e-02 | secs: 0.05680
INFO:RESCAL:[  2] fit: 0.85891 | delta: 7.4e-03 | secs: 0.05952
INFO:RESCAL:[  3] fit: 0.86132 | delta: 2.4e-03 | secs: 0.06717
INFO:RESCAL:[  4] fit: 0.86228 | delta: 9.6e-04 | secs: 0.06782
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.81664 | delta: 8.2e-01 | secs: 0.04751
INFO:RESCAL:[  1] fit: 0.85315 | delta: 3.7e-02 | secs: 0.07053
INFO:RESCAL:[  2] fit: 0.86036 | delta: 7.2e-03 | secs: 0.07322
INFO:RESCAL:[  3] fit: 0.86271 | delta: 2.4e-03 | secs: 0.06698
INFO:RESCAL:[  4] fit: 0.86363 | delta: 9.1e-04 | secs: 0.05695
INFO:RESCAL:Train Fold 1
INFO:RESCAL:[  0] fit: 0.81243 | delta: 8.1e-01 | secs: 0.03813
INFO:RESCAL:[  1] fit: 0.85081 | delta: 3.8e-02 | secs: 0.05225
INFO:RESCAL:[  2] fit: 0.85848 | delta: 7.7e-03 | secs: 0.06708
INFO:RESCAL:[  3] fit: 0.86098 | delta: 2.5e-03 | secs: 0.05690
INFO:RESCAL:[  4] fit: 0.86193

PR AUC training/test mean: 0.489/0.480
PR AUC training/test standard deviation: 0.118/0.141


# Benchmark 3: human disease-symptoms data resource

In [32]:
model3 = RESCALModel()
X3 = model3.read_dataset(os.getcwd()+'\\benchmark3\\rescal_input_diseases.mat', 'K')

## Tensor factorization with $r=50$

In [33]:
A3, R3 = model3.factorize(50)
print(A3)
print()
print(R3[0])

INFO:RESCAL:[  0] fit: 0.93453 | delta: 9.3e-01 | secs: 0.60430
INFO:RESCAL:[  1] fit: 0.93513 | delta: 6.0e-04 | secs: 0.59885


[[ 0.00446232 -0.0089613  -0.0283861  ... -0.00726947 -0.01805538
  -0.00186346]
 [-0.19432559  0.10615873  0.32937182 ... -0.38323471 -0.26551574
  -0.40343787]
 [ 0.21909547 -0.10338185  0.19540799 ...  0.03775649 -0.56021673
  -0.34375652]
 ...
 [-0.2579827   0.24322704  0.4842665  ... -0.2944431  -0.48326003
  -0.40817303]
 [-0.09524507 -0.17159799 -0.33451489 ... -0.08324661  0.01605023
  -0.3603542 ]
 [ 0.08466193 -0.39363298  0.05904715 ... -0.43346224  0.5798173
  -0.28820235]]

[[-0.0970226   0.07249207  0.25100448 ...  0.11536157  0.09747333
   0.2676122 ]
 [ 0.07249207 -0.0516804  -0.14992046 ... -0.00794668 -0.01766966
  -0.05265964]
 [ 0.25100448 -0.14992046 -0.50351671 ...  0.01315085 -0.00633601
  -0.12328693]
 ...
 [ 0.11536157 -0.00794668  0.01315085 ...  0.58452327  0.39274157
   0.13546583]
 [ 0.09747333 -0.01766966 -0.00633601 ...  0.39274157  0.41411803
   0.31993644]
 [ 0.2676122  -0.05265964 -0.12328693 ...  0.13546583  0.31993644
   1.61585071]]


## Evaluation

In [34]:
mean_train3, std_train3, mean_test3, std_test3 = model3.evaluate()
print("PR AUC training/test mean: %.3f/%.3f" %(mean_train3, mean_test3))
print("PR AUC training/test standard deviation: %.3f/%.3f" %(std_train3, std_test3))

INFO:RESCAL:Train Fold 0
INFO:RESCAL:[  0] fit: 0.87639 | delta: 8.8e-01 | secs: 0.55189
INFO:RESCAL:[  1] fit: 0.87830 | delta: 1.9e-03 | secs: 0.57327
INFO:RESCAL:[  2] fit: 0.87868 | delta: 3.8e-04 | secs: 0.55382
INFO:RESCAL:Test Fold 0
INFO:RESCAL:[  0] fit: 0.90329 | delta: 9.0e-01 | secs: 0.56498
INFO:RESCAL:[  1] fit: 0.90411 | delta: 8.2e-04 | secs: 0.56917
INFO:RESCAL:Train Fold 1
INFO:RESCAL:[  0] fit: 0.87601 | delta: 8.8e-01 | secs: 0.54882
INFO:RESCAL:[  1] fit: 0.87825 | delta: 2.2e-03 | secs: 0.55421
INFO:RESCAL:[  2] fit: 0.87867 | delta: 4.2e-04 | secs: 0.54397
INFO:RESCAL:Test Fold 1
INFO:RESCAL:[  0] fit: 0.90330 | delta: 9.0e-01 | secs: 0.54614
INFO:RESCAL:[  1] fit: 0.90409 | delta: 7.9e-04 | secs: 0.56355
INFO:RESCAL:Train Fold 2
INFO:RESCAL:[  0] fit: 0.87628 | delta: 8.8e-01 | secs: 0.53786
INFO:RESCAL:[  1] fit: 0.87835 | delta: 2.1e-03 | secs: 0.53902
INFO:RESCAL:[  2] fit: 0.87876 | delta: 4.1e-04 | secs: 0.53811
INFO:RESCAL:Test Fold 2
INFO:RESCAL:[  0] fit

PR AUC training/test mean: 0.757/0.756
PR AUC training/test standard deviation: 0.001/0.001
