In [5]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import networkx as nx
import scipy
%matplotlib inline

In [2]:
plt.rcParams['figure.figsize'] = [15, 10]
sns.set_style('whitegrid')
plt.rcParams['font.size'] = 20.0
plt.rcParams['xtick.labelsize'] = 20.0
plt.rcParams['ytick.labelsize'] = 20.0
import pickle

In [3]:
%load_ext autoreload
%autoreload 2


In [None]:
import graph_learning_utils as gl

# Occlusion model 

We are given a ground truth $A^s$ through an unbiased sampling procedure from the base PPI. 

We generate some number of P_i by knocking certain edges from P, then sampling from than biased version to get P_i. 

Run the algorithm, then ask: 

* How many edges can we recover?

Can use the usual binary classification metrics, e.g. precision, recall, F1, etc.

## Import data

In [6]:
sparse_matrix = scipy.sparse.load_npz('data/adj_matrix_sparse_restricted_9606.npz')

In [7]:
sparse_matrix

<11916x11916 sparse matrix of type '<class 'numpy.float64'>'
	with 5963604 stored elements in Compressed Sparse Row format>

In [8]:
human_ppi_ground_truth = sparse_matrix.toarray()

In [11]:
np.allclose(human_ppi_ground_truth - human_ppi_ground_truth.T, np.zeros_like(human_ppi_ground_truth))

True

In [12]:
human_ppi_ground_truth = human_ppi_ground_truth / np.max(human_ppi_ground_truth)

In [14]:
def gen_occluded_p(P, frac_to_occlude = 0.01): 
    n = P.shape[0]
    num_to_occlude = int(frac_to_occlude * n)
    occluded_indices = np.random.choice(n, size=num_to_occlude, replace=False)
    for x in occluded_indices:
        P[x, :] = 0
        P[:, x] = 0
    return P

In [15]:
m = 5 
P_copy = human_ppi_ground_truth.copy()
occluded_ground_truths = [gen_occluded_p(P_copy) for _ in range(m)]

In [16]:
validation_mat = gl.gen_sample_mat(human_ppi_ground_truth)
sample_mats = [gl.gen_sample_mat(occluded_ground_truth) for occluded_ground_truth in occluded_ground_truths]

KeyboardInterrupt: 

In [None]:
eta_init = gl.generate_random_eta(m)
result = gl.run_scipy_minimize(eta_init, sample_mats, validation_mat, delta=0.01, verbose=True)