In [3]:
import warnings
warnings.filterwarnings('ignore')

import pickle
import numpy as np
import scipy.sparse as sp
from scipy.sparse import load_npz

import torch

from cell.utils import link_prediction_performance
from cell.cell import Cell, EdgeOverlapCriterion, LinkPredictionCriterion
from cell.graph_statistics import compute_graph_statistics

# load data

In [9]:
train_graph = load_npz('./data/covid_19/covid_19_sp.npz')

In [15]:
filename = './data/covid_19/test_dataset/test_idx.pkl'
test_data_idx = None
with open(filename, 'rb') as f:
    [test_data_idx] = pickle.load(f)
print(len(test_data_idx))

408


In [16]:
test_data_idx[:3]

[(5521, 10263, 0), (5296, 7344, 0), (2169, 8299, 1)]

In [6]:
train_graph

<23587x23587 sparse matrix of type '<class 'numpy.float64'>'
	with 576540 stored elements in Compressed Sparse Row format>

In [7]:
dense_adj = train_graph.todense()

In [18]:
print(dense_adj[5521, 10263], dense_adj[10263, 5521])

2.0 2.0


In [19]:
dense_adj[dense_adj > 1] = 1
print(dense_adj[5521, 10263], dense_adj[10263, 5521])

1.0 1.0


In [33]:
ones = []
zeros = []
val_ones = []
val_zeros = []
for (i, j, val) in test_data_idx[258:]:
    if val == 1:
        ones.append([i, j])
    else:
        zeros.append([i, j])
for (i, j, val) in test_data_idx[:258]:
    if val == 1:
        if len(val_ones) == 86:
            continue
        val_ones.append([i, j])
    else:
        val_zeros.append([i, j])

In [34]:
print(len(val_ones), len(val_zeros))

86 86


In [23]:
for (i, j, val) in test_data_idx:
    dense_adj[i, j] = 0
    dense_adj[j, i] = 0

In [24]:
from scipy.sparse.csgraph import csgraph_from_dense
train_graph = csgraph_from_dense(dense_adj)

In [25]:
train_graph

<23587x23587 sparse matrix of type '<class 'numpy.float64'>'
	with 575724 stored elements in Compressed Sparse Row format>

In [26]:
import random
seed = 1111
test_size = 37
test_ones = random.Random(seed).sample(ones, test_size)
test_zeros = random.Random(seed).sample(zeros, test_size)

In [27]:
symmetrize = lambda x: np.row_stack((x, np.column_stack((x[:, 1], x[:, 0]))))

In [35]:
test_ones = np.array(test_ones)
test_zeros = np.array(test_zeros)
val_ones = np.array(val_ones)
val_zeros = np.array(val_zeros)

test_ones = symmetrize(test_ones)
test_zeros = symmetrize(test_zeros)
val_ones = symmetrize(val_ones)
val_zeros = symmetrize(val_zeros)
print(len(test_ones), len(test_zeros), len(val_ones), len(val_zeros))

148 148 172 172


In [48]:
model = Cell(A=train_graph,
             H=9,
             callbacks=[LinkPredictionCriterion(invoke_every=2,
                                                val_ones=test_ones,
                                                val_zeros=test_zeros,
                                                max_patience=200)])

In [49]:
# train model 
model.train(steps=100,
            optimizer_fn=torch.optim.Adam,
            optimizer_args={'lr': 0.05,
                            'weight_decay': 1e-6})

Step:   2/100 Loss: 10.06835 ROC-AUC Score: 0.530 Average Precision: 0.521 Total-Time: 72
Step:   4/100 Loss: 10.02898 ROC-AUC Score: 0.526 Average Precision: 0.508 Total-Time: 148


KeyboardInterrupt: 

In [43]:
link_prediction_performance(scores_matrix=model._scores_matrix, val_ones=test_ones, val_zeros=test_zeros)

(0.5471146822498174, 0.5737520667710783)