## NCMF
Example of running the "NCMF" module

Before running this notebook, perform the following steps:

```
cd ../src/
cp -rpf models_hadamard.py models.py
cp -prf loss_hadamard.py loss.py
```

After the execution of this notebook, switch the files back.

#### *User inputs*

In [1]:
sample_no = 1
data_dir = "../../datasets/NCMF/"
dataset_name = "PolyP3"

In [2]:
! mkdir -p {data_dir}/{dataset_name}/{sample_no}

#### *Loading all necessary modules*

In [3]:
import sys
sys.path.append("..")

In [4]:
import pprint
import numpy as np
import pickle as pkl
import time
import itertools
import os
import pprint
from datetime import datetime

In [5]:
from src.ncmf import ncmf

In [6]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

In [7]:
import torch
torch.manual_seed(0)

<torch._C.Generator at 0x7fddb3415390>

In [8]:
pp = pprint.PrettyPrinter()

#### *Instantiating the NCMF model with the specified hyper-parameters*

In [9]:
# Setting hyperparameters
num_epochs = 15
batch_size = int(2048*1e6)# MIMIC 2048
weight_decay = 1e-3 # MIMIC 0.5
learning_rate = 1e-6 # MIMIC 1e-6
convergence_threshold = 1e-5 # MIMIC -1e-3
entity_matrices = ['X0', 'X1', 'X2'] # for Polypharmacy, MIMIC, simulated
#entity_matrices = ['X1', 'X6'] # for PubMed
matrix_types = {
    "binary": ["X0", "X1", "X2"],
    "real": []
}

In [10]:
ncmf_model = ncmf(sample_no, data_dir, dataset_name, matrix_types, num_epochs, learning_rate, weight_decay, convergence_threshold, batch_size, batch_size, entity_matrices, autoencoder_k=200, autoencoder_act_f = "sigma", reconstructor_act_f = "sigma")

#### *Fitting... *
- Performs the input transformation and network construction
- (Pre-trains and) trains the model to obtain the entity representations
- Reconstruct the input matrices using the entity representations obtained

In [11]:
start_time = datetime.now()

In [12]:
ncmf_model.fit()

Mapping node ids to matrix indices...
Splitting training and validation links...
3030
Loading matrices and masks...
Loading X0
Number of batches = 1
Number of batches = 1
Loading X1
Number of batches = 1
Number of batches = 1
Loading X2
Number of batches = 1
Number of batches = 1
To reconstruct X0
dim:0; e0
X0 e0 row
X0 e0 col
X1 e0 row
dim:1; e0
X0 e0 row
X0 e0 col
X1 e0 row
To reconstruct X1
dim:0; e0
X0 e0 row
X0 e0 col
X1 e0 row
dim:1; e1
X1 e1 col
X2 e1 row
X2 e1 col
To reconstruct X2
dim:0; e1
X1 e1 col
X2 e1 row
X2 e1 col
dim:1; e1
X1 e1 col
X2 e1 row
X2 e1 col
Preparing autoencoders' configurations...
Preparing reconstructors' configurations...
Preparing fusions' configurations...
Initialising autoencoders...
Initialising reconstructors...
Initialising fusions...
Retreive Embedding
ZINB
ZINB
ZINB
====> Epoch 0: Average Train Loss: 12.1722988 | Train RMSE: 2.3299670 | Average Valid Loss: 1.4261483 | Valid RMSE: 2.5842800 | beta: 0.0
Retreive Embedding
ZINB
ZINB
ZINB
====> Epoch 

In [13]:
end_time = datetime.now()
runtime = end_time - start_time
runtime_seconds = runtime.total_seconds()
print(f"Total runtime = {runtime_seconds}")

Total runtime = 23.400121


In [14]:
ncmf_model.net.autoencoders

ModuleDict(
  (X0): ModuleDict(
    (row): VariationalAutoencoder(
      (encoder): VariationalEncoder(
        (hidden_layers): Sequential(
          (0): Linear(in_features=645, out_features=1024, bias=True)
          (1): Sigmoid()
        )
        (bottleneck_layers): ModuleDict(
          (mu): Linear(in_features=1024, out_features=200, bias=True)
          (logvar): Linear(in_features=1024, out_features=200, bias=True)
        )
      )
      (decoder): VariationalDecoder(
        (hidden_layers): Sequential(
          (0): Linear(in_features=200, out_features=1024, bias=True)
          (1): Sigmoid()
        )
        (output_layers): ModuleDict(
          (M_bar): Linear(in_features=1024, out_features=645, bias=True)
          (Theta): Linear(in_features=1024, out_features=645, bias=True)
          (Pi): Linear(in_features=1024, out_features=645, bias=True)
        )
      )
    )
    (col): VariationalAutoencoder(
      (encoder): VariationalEncoder(
        (hidden_layers): 

In [18]:
ncmf_model.net.reconstructors

ModuleDict(
  (X0): NMF_hadamard(
    (activations_available): ModuleDict(
      (relu): ReLU()
      (lrelu): LeakyReLU(negative_slope=0.01)
      (selu): SELU()
      (sigma): Sigmoid()
      (tanh): Tanh()
    )
    (actf): Sigmoid()
    (fc1): Linear(in_features=200, out_features=150, bias=True)
    (fc2): Linear(in_features=150, out_features=100, bias=True)
    (fc31): Linear(in_features=100, out_features=1, bias=True)
    (fc32): Linear(in_features=100, out_features=1, bias=True)
    (fc33): Linear(in_features=100, out_features=1, bias=True)
  )
  (X1): NMF_hadamard(
    (activations_available): ModuleDict(
      (relu): ReLU()
      (lrelu): LeakyReLU(negative_slope=0.01)
      (selu): SELU()
      (sigma): Sigmoid()
      (tanh): Tanh()
    )
    (actf): Sigmoid()
    (fc1): Linear(in_features=200, out_features=150, bias=True)
    (fc2): Linear(in_features=150, out_features=100, bias=True)
    (fc31): Linear(in_features=100, out_features=1, bias=True)
    (fc32): Linear(in_feat

#### Reconstruction using NMF network

In [14]:
repr_folder = f"../../datasets/NCMF/PolyP3/"
emb_file = f"emb_sample_{sample_no}.dat"

In [15]:
def load(emb_file_path):
    emb_dict = {}
    with open(emb_file_path, 'r') as emb_file:
        for i, line in enumerate(emb_file):
            if i == 0:
                train_para = line[:-1]
            else:
                index, emb = line[:-1].split('\t')
                emb_dict[index] = np.array(emb.split()).astype(np.float32)

    return train_para, emb_dict

In [16]:
train_para, emb_dict = load(repr_folder + emb_file)

In [17]:
drug_repr = []
protein_repr= []
for i in range(0, 645):
    drug_repr.append(emb_dict[str(i)])
for i in range(645, 645 + 837):
    protein_repr.append(emb_dict[str(i)])
drug_repr = np.array(drug_repr)
protein_repr = np.array(protein_repr)
print(drug_repr.shape, protein_repr.shape)

(645, 200) (837, 200)


In [18]:
ncmf_model.net.reconstructors["X1"]

NMF_hadamard(
  (activations_available): ModuleDict(
    (relu): ReLU()
    (lrelu): LeakyReLU(negative_slope=0.01)
    (selu): SELU()
    (sigma): Sigmoid()
    (tanh): Tanh()
  )
  (actf): Sigmoid()
  (fc1): Linear(in_features=200, out_features=150, bias=True)
  (fc2): Linear(in_features=150, out_features=100, bias=True)
  (fc31): Linear(in_features=100, out_features=1, bias=True)
  (fc32): Linear(in_features=100, out_features=1, bias=True)
  (fc33): Linear(in_features=100, out_features=1, bias=True)
)

In [19]:
recon_x1 = np.zeros((645, 837))
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
for i in range(0, 645):
    for j in range(645, 645 + 837):
        row_emb = torch.Tensor(drug_repr[i]).to(device).unsqueeze(dim=0)
        col_emb = torch.Tensor(protein_repr[j - 645]).to(device).unsqueeze(dim=0)
        recon_x1[i][j - 645] = ncmf_model.net.reconstructors["X1"](row_emb, col_emb)[0].cpu().detach().item()

In [20]:
recon_x1

array([[1.58801818, 1.58802187, 1.58801639, ..., 1.58806133, 1.58808243,
        1.58807898],
       [1.58800483, 1.5880177 , 1.58801198, ..., 1.58806109, 1.58808053,
        1.58807671],
       [1.58804679, 1.58805394, 1.58804333, ..., 1.58808136, 1.58810747,
        1.58809447],
       ...,
       [1.5879972 , 1.58800673, 1.58800387, ..., 1.58805275, 1.58806705,
        1.58806491],
       [1.58800149, 1.58800983, 1.58800912, ..., 1.58804977, 1.58807063,
        1.58806586],
       [1.58799672, 1.58800733, 1.58800507, ..., 1.58805501, 1.58807564,
        1.5880692 ]])

In [21]:
np.unique(recon_x1)

array([1.58787811, 1.58788061, 1.58788073, ..., 1.58813238, 1.58813488,
       1.58813667])

#### Validation set evaluation

In [22]:
import pandas as pd
import numpy as np

In [23]:
ncmf_recon_file = f"../../datasets/NCMF/PolyP3/{sample_no}/X1.npy"
ncmf_x1 = np.load(ncmf_recon_file, allow_pickle=True)
print(ncmf_x1.shape)

(645, 837)


In [24]:
ncmf_x1

array([[1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5878841],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5882717],
       [1.5878841, 1.5882717, 1.5882717, ..., 1.5878841, 1.5882717,
        1.5882717],
       ...,
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5878841],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5882717,
        1.5882717],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5882717,
        1.5878841]], dtype=float32)

In [25]:
!pip install openpyxl



In [26]:
test_x1 = pd.read_csv(f"../../datasets/NCMF/PolyP3/sampled{sample_no}_link.dat.test", sep="\t", header=None)
test_x1.columns = ["left", "right", "value"]
test_x1

Unnamed: 0,left,right,value
0,89,964,0
1,315,718,0
2,375,1230,0
3,170,1476,0
4,467,900,0
...,...,...,...
107968,346,1457,0
107969,37,1140,0
107970,384,1448,0
107971,358,906,0


In [27]:
val_x1 = test_x1.iloc[0:test_x1.shape[0]//2]
val_x1

Unnamed: 0,left,right,value
0,89,964,0
1,315,718,0
2,375,1230,0
3,170,1476,0
4,467,900,0
...,...,...,...
53981,600,1412,0
53982,144,711,0
53983,153,1145,0
53984,295,905,0


In [28]:
val_x1["value"].value_counts()

0    52503
1     1483
Name: value, dtype: int64

In [29]:
test_x1["value"].value_counts()

0    104943
1      3030
Name: value, dtype: int64

In [30]:
def get_value(row, array):
    i = int(row["left"])
    j = int(row["right"]) - 645
    return array[i][j]

In [31]:
val_x1["ncmf_recon"] = val_x1.apply(lambda row: get_value(row, ncmf_x1), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [32]:
val_x1["ncmf_recon_nmf"] = val_x1.apply(lambda row: get_value(row, recon_x1), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [33]:
y_true = list(val_x1["value"])

In [34]:
from sklearn.metrics import mean_squared_error
print("NCMF- validation set MSE")
print(mean_squared_error(y_true, list(val_x1["ncmf_recon"])))

NCMF- validation set MSE
2.461868363677383


In [35]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score, precision_score, recall_score
auroc = roc_auc_score(y_true, list(val_x1["ncmf_recon"]))
precision, recall, thresholds = precision_recall_curve(y_true, list(val_x1["ncmf_recon"]))
auprc = auc(recall, precision)
print("Validation NCMF - from reconstructed file")
print(f"AUROC = {auroc}")
print(f"AUPRC = {auprc}")

Validation NCMF - from reconstructed file
AUROC = 0.5494019870989872
AUPRC = 0.18600926268039078


In [36]:
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc, f1_score, precision_score, recall_score
auroc = roc_auc_score(y_true, list(val_x1["ncmf_recon_nmf"]))
precision, recall, thresholds = precision_recall_curve(y_true, list(val_x1["ncmf_recon_nmf"]))
auprc = auc(recall, precision)
print("Validation NCMF - from Urm * Ucm through NMF")
print(f"AUROC = {auroc}")
print(f"AUPRC = {auprc}")

Validation NCMF - from Urm * Ucm through NMF
AUROC = 0.44940037398755583
AUPRC = 0.02607387180368061


In [37]:
test_x1["ncmf_recon_nmf"] = test_x1.apply(lambda row: get_value(row, recon_x1), axis = 1)
y_true_test = list(test_x1["value"])
auroc = roc_auc_score(y_true_test, list(test_x1["ncmf_recon_nmf"]))
precision, recall, thresholds = precision_recall_curve(y_true_test, list(test_x1["ncmf_recon_nmf"]))
auprc = auc(recall, precision)
print("Test NCMF from Urm * Ucm -> NMF")
print(f"AUROC = {auroc}")
print(f"AUPRC = {auprc}")

Test NCMF from Urm * Ucm -> NMF
AUROC = 0.4473227584900796
AUPRC = 0.026295776659951694


In [38]:
ncmf_x0 = np.load(f"../../datasets/NCMF/PolyP3/{sample_no}/X0.npy", allow_pickle=True)
ncmf_x0

array([[1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585],
       [1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585],
       [1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585],
       ...,
       [1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585],
       [1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585],
       [1.7261585, 1.7261585, 1.7261585, ..., 1.7261585, 1.7261585,
        1.7261585]], dtype=float32)

In [39]:
# # making it binary
# ncmf_x0_mean = np.mean(np.unique(ncmf_x0))
# binary_ncmf_x0 = (ncmf_x0 > ncmf_x0_mean).astype(int)
# binary_ncmf_x0

In [40]:
ncmf_x1 = np.load(f"../../datasets/NCMF/PolyP3/{sample_no}/X1.npy", allow_pickle=True)
ncmf_x1

array([[1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5878841],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5882717],
       [1.5878841, 1.5882717, 1.5882717, ..., 1.5878841, 1.5882717,
        1.5882717],
       ...,
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5878841,
        1.5878841],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5882717,
        1.5882717],
       [1.5878841, 1.5878841, 1.5878841, ..., 1.5878841, 1.5882717,
        1.5878841]], dtype=float32)

In [41]:
# # making it binary
# ncmf_x1_mean = np.mean(np.unique(ncmf_x1))
# binary_ncmf_x1 = (ncmf_x1 > ncmf_x1_mean).astype(int)
# binary_ncmf_x1

In [42]:
ncmf_x2 = np.load(f"../../datasets/NCMF/PolyP3/{sample_no}/X2.npy", allow_pickle=True)
ncmf_x2

array([[1.5103683, 1.5099996, 1.5099996, ..., 1.5099996, 1.5103683,
        1.5103683],
       [1.5099996, 1.5099996, 1.5099996, ..., 1.5099996, 1.5099996,
        1.5103683],
       [1.5099996, 1.5099996, 1.5099996, ..., 1.5099996, 1.5099996,
        1.5099996],
       ...,
       [1.5099996, 1.5099996, 1.5099996, ..., 1.5103683, 1.5103683,
        1.5103683],
       [1.5103683, 1.5099996, 1.5099996, ..., 1.5103683, 1.5103683,
        1.5103683],
       [1.5103683, 1.5103683, 1.5099996, ..., 1.5103683, 1.5103683,
        1.5103683]], dtype=float32)

In [43]:
# # making it binary
# ncmf_x2_mean = np.mean(np.unique(ncmf_x2))
# binary_ncmf_x2 = (ncmf_x2 > ncmf_x2_mean).astype(int)
# binary_ncmf_x2

In [44]:
x0 = pd.read_csv(f"../../datasets/NCMF/PolyP3/drug-drug.csv", header=None)
x0

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,635,636,637,638,639,640,641,642,643,644
0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
642,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [45]:
x1 = pd.read_csv(f"../../datasets/NCMF/PolyP3/drug-protein.csv", header=None)
x1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,827,828,829,830,831,832,833,834,835,836
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
642,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
643,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
x2 = pd.read_csv(f"../../datasets/NCMF/PolyP3/protein-protein.csv", header=None)
x2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,827,828,829,830,831,832,833,834,835,836
0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
834,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
835,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [47]:
from sklearn.metrics import mean_squared_error
print("MSE on full matrix - based on reconstruction from files")
print(mean_squared_error(x0.values, ncmf_x0))
print(mean_squared_error(x1.values, ncmf_x1))
print(mean_squared_error(x2.values, ncmf_x2))

MSE on full matrix - based on reconstruction from files
2.2274287804271378
2.4588642290369482
2.2222189241723718


In [48]:
# print("MSE on full matrix - based on reconstruction from files - binarized")
# print(mean_squared_error(x0.values, binary_ncmf_x0))
# print(mean_squared_error(x1.values, binary_ncmf_x1))
# print(mean_squared_error(x2.values, binary_ncmf_x2))

In [49]:
# val_x1["binary_ncmf_recon"] = val_x1.apply(lambda row: get_value(row, binary_ncmf_x1), axis = 1)
# auroc = roc_auc_score(y_true, list(val_x1["binary_ncmf_recon"]))
# precision, recall, thresholds = precision_recall_curve(y_true, list(val_x1["binary_ncmf_recon"]))
# auprc = auc(recall, precision)
# print("NCMF Validation - comparison against binarized results")
# print(f"AUROC = {auroc}")
# print(f"AUPRC = {auprc}")
# print(f"Precision = {precision_score(y_true, list(val_x1['binary_ncmf_recon']))}")
# print(f"Recall = {recall_score(y_true, list(val_x1['binary_ncmf_recon']))}")
# print(f"F1 score = {f1_score(y_true, list(val_x1['binary_ncmf_recon']))}")