In [1]:
import hetmatpy.hetmat
import networkx as nx
import numpy as np
import pandas as pd
import sklearn.metrics
import tqdm
import xswap

import sys

sys.path.insert(0, '../')

import analysis

In [2]:
hetmat = hetmatpy.hetmat.HetMat('../../data/task1/hetionet-v1.0.hetmat/')

metaedge = 'DrD'
num_perms = 100

In [3]:
_, _, mat = hetmat.metaedge_to_adjacency_matrix(metaedge, dense_threshold=0)
edges = xswap.network_formats.matrix_to_edges(mat, False)

mapped_edges, mapping, _ = xswap.preprocessing.map_str_edges(edges, False)
num_nodes = max(mapping.values()) + 1
mapped_mat = xswap.network_formats.edges_to_matrix(
    mapped_edges, True, (num_nodes, num_nodes), bool, sparse = True)

df = pd.DataFrame({
    'source': np.repeat(range(num_nodes), num_nodes), 
    'target': np.repeat(np.arange(num_nodes).reshape((1, num_nodes)), num_nodes, axis = 0).flatten(),
    'edge': mapped_mat.toarray().flatten(),
})

degree_matrix = np.repeat(mapped_mat.sum(axis=1), mapped_mat.shape[1], axis=1) \
                + np.repeat(mapped_mat.sum(axis=0), mapped_mat.shape[0], axis=0)


unpermuted_auroc = list()


adamic = analysis.adamic_adar_index(mapped_mat).flatten()
jaccard = analysis.jaccard(mapped_mat, degree_matrix)
jaccard = np.array(jaccard).flatten()
preferential = analysis.preferential_attachment_index(mapped_mat).flatten()
resource = analysis.resource_allocation_index(mapped_mat).flatten()
rwr_mat = analysis.invertible_rwr(mapped_mat.toarray(), 0.25)
rwr = np.array(rwr_mat).flatten()

edge_prior = xswap.prior.compute_xswap_priors(mapped_edges, num_perms, shape=(num_nodes, num_nodes),
                                              allow_self_loops=False, allow_antiparallel=False)
edge_prior = edge_prior['xswap_prior'].values


full_features_df = (
    df
    .assign(
        adamic = adamic,
        jaccard = jaccard,
        preferential = preferential,
        resource = resource,
        rwr = rwr,
        edge_prior = edge_prior,
    )
)

row = {
    feature: sklearn.metrics.roc_auc_score(full_features_df['edge'], full_features_df[feature])
    for feature in ['adamic', 'jaccard', 'preferential', 'resource', 'rwr', 'edge_prior']
}
row['metaedge'] = metaedge

unpermuted_auroc.append(row)

In [4]:
auroc_values = list()

new_edges = mapped_edges.copy()
for i in tqdm.tnrange(num_perms):
    new_edges, _ = xswap.permute_edge_list(new_edges, allow_self_loops=False, 
                                           allow_antiparallel=False)
    perm_mat = xswap.network_formats.edges_to_matrix(
        new_edges, True, (num_nodes, num_nodes), bool, sparse = True)
    
    adamic = analysis.adamic_adar_index(perm_mat).flatten()
    jaccard = analysis.jaccard(perm_mat, degree_matrix)
    jaccard = np.array(jaccard).flatten()
    preferential = analysis.preferential_attachment_index(perm_mat).flatten()
    resource = analysis.resource_allocation_index(perm_mat).flatten()
    rwr_mat = analysis.invertible_rwr(perm_mat.toarray(), 0.25)
    rwr = np.array(rwr_mat).flatten()
    
    edge_prior = xswap.prior.compute_xswap_priors(new_edges, num_perms, shape=(num_nodes, num_nodes),
                                                  allow_self_loops=False, allow_antiparallel=False)
    edge_prior = edge_prior['xswap_prior'].values


    full_features_df = (
        df
        .assign(
            adamic = adamic,
            jaccard = jaccard,
            preferential = preferential,
            resource = resource,
            rwr = rwr,
            edge_prior = edge_prior,
        )
    )

    row = {
        feature: sklearn.metrics.roc_auc_score(full_features_df['edge'], full_features_df[feature])
        for feature in ['adamic', 'jaccard', 'preferential', 'resource', 'rwr', 'edge_prior']
    }
    row['metaedge'] = metaedge
    
    auroc_values.append(row)

HBox(children=(IntProgress(value=0), HTML(value='')))




In [5]:
unpermuted_auroc_df = pd.DataFrame(unpermuted_auroc, columns=['metaedge', 'adamic', 'jaccard', 
                                                              'preferential', 'resource', 'rwr', 'edge_prior'])

unpermuted_auroc_df.to_csv('../../data/unperm_auroc_values.csv', index=False)

unpermuted_auroc_df.head()

Unnamed: 0,metaedge,adamic,jaccard,preferential,resource,rwr,edge_prior
0,DrD,0.928981,0.839067,0.779937,0.933145,0.987947,0.775451


In [6]:
perm_auroc_df = pd.DataFrame(auroc_values, columns=['metaedge', 'adamic', 'jaccard', 'preferential', 
                                                    'resource', 'rwr', 'edge_prior'])

perm_auroc_df.to_csv('../../data/perm_auroc_values.csv', index=False)

perm_auroc_df.head()

Unnamed: 0,metaedge,adamic,jaccard,preferential,resource,rwr,edge_prior
0,DrD,0.70109,0.552461,0.779937,0.695145,0.72429,0.775938
1,DrD,0.711461,0.570694,0.779937,0.706222,0.727296,0.774621
2,DrD,0.715702,0.570046,0.779937,0.711213,0.731655,0.774704
3,DrD,0.702294,0.553095,0.779937,0.697556,0.734357,0.775804
4,DrD,0.704972,0.564396,0.779937,0.699838,0.72955,0.773119
