In [1]:
import pathlib

import hetmatpy.hetmat
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.sparse
import seaborn as sns
import sklearn.metrics
import tqdm

import analysis

%matplotlib inline

In [2]:
restart_prob = 0.25

In [3]:
data_path = pathlib.Path('../data/edge_predict/')
data_path.mkdir(parents=True, exist_ok=True)

hetmat = hetmatpy.hetmat.HetMat('../data/ppi_hetmat/')

### 1. Compute edge priors

In [4]:
%%time
sum_perm_edges = None

for name, permat in hetmat.permutations.items():
    source, target, perm_adj = permat.metaedge_to_adjacency_matrix('PpP', dense_threshold=0)
    
    if sum_perm_edges is None:
        sum_perm_edges = perm_adj
    else:
        sum_perm_edges += perm_adj

edge_prior = sum_perm_edges / len(hetmat.permutations)

np.save(data_path.joinpath('prior_edge.npy'), edge_prior)

CPU times: user 23.5 s, sys: 16.4 s, total: 39.9 s
Wall time: 39.9 s


### 2. Compute RWR on the network

In [5]:
%%time

source, target, pruned_matrix = hetmat.metaedge_to_adjacency_matrix('PpP', dense_threshold=1)
ppi_rwr = analysis.all_pairs_rwr(pruned_matrix, restart_prob)

np.save(data_path.joinpath('PpP_RWR.npy'), ppi_rwr)

CPU times: user 11.9 s, sys: 139 ms, total: 12 s
Wall time: 12 s


### 3. Compute RWR on permuted networks

For speed, use only a subset (100 / 1000) permuted networks

In [6]:
subset_size = 1000

sum_perm_rwr = None
for name, permat in tqdm.tqdm(list(hetmat.permutations.items())[:subset_size]):
    source, target, perm_adj = permat.metaedge_to_adjacency_matrix('PpP', dense_threshold=1)
    rwr_mat = analysis.all_pairs_rwr(perm_adj, restart_prob)
    
    if sum_perm_rwr is None:
        sum_perm_rwr = rwr_mat
    else:
        sum_perm_rwr += rwr_mat

mean_rwr = sum_perm_rwr / subset_size
np.save(data_path.joinpath('prior_RWR.npy'), mean_rwr)

100%|██████████| 1000/1000 [1:47:14<00:00,  6.43s/it]


### 4. Flatten features and labels and scale features

In [7]:
original_edge = hetmat.metaedge_to_adjacency_matrix('PiP', dense_threshold=0)[2].flatten()

edge = hetmat.metaedge_to_adjacency_matrix('PpP', dense_threshold=0)[2].flatten()
rwr = ppi_rwr.flatten()
perm_rwr = mean_rwr.flatten()
prior = edge_prior.flatten()

In [8]:
np.savez_compressed(data_path.joinpath('pruned_features.npz'), 
                    original=original_edge, 
                    pruned_edge=edge,
                    rwr=rwr,
                    p_rwr=perm_rwr,
                    edge_prior=prior,
)