In [1]:
import hetmatpy.hetmat
import networkx as nx
import numpy as np
import pandas as pd
import sklearn.metrics
import tqdm
import xswap

import sys

sys.path.insert(0, '../')

import analysis

In [2]:
num_perms = 100

In [3]:
edges_df = pd.read_csv('../../data/task3/2.edges/ppi.tsv.xz', compression='xz', sep='\t')

edges = list(map(tuple, edges_df.query('train == 1')[['id_a', 'id_b']].values))
edges, mapping, _ = xswap.preprocessing.map_str_edges(edges, bipartite=False)
num_nodes = max(map(max, edges)) + 1

In [4]:
mat = xswap.network_formats.edges_to_matrix(edges, True, (num_nodes, num_nodes), sparse=True)

df = pd.DataFrame({
    'source': np.repeat(range(num_nodes), num_nodes), 
    'target': np.repeat(np.arange(num_nodes).reshape((1, num_nodes)), num_nodes, axis = 0).flatten(),
    'edge': mat.toarray().flatten(),
    'source_degree': np.repeat(np.array(mat.sum(axis=1)), mat.shape[1], axis=1).flatten(),
    'target_degree': np.repeat(np.array(mat.sum(axis=0)), mat.shape[0], axis=0).flatten(),
})

degree_matrix = np.repeat(mat.sum(axis=1), mat.shape[1], axis=1) \
                + np.repeat(mat.sum(axis=0), mat.shape[0], axis=0)

unpermuted_auroc = list()

adamic = analysis.adamic_adar_index(mat).flatten()
jaccard = analysis.jaccard(mat, degree_matrix)
jaccard = np.array(jaccard).flatten()
preferential = analysis.preferential_attachment_index(mat).flatten()
resource = analysis.resource_allocation_index(mat).flatten()
rwr_mat = analysis.invertible_rwr(mat.toarray(), 0.25)
rwr = np.array(rwr_mat).flatten()
edge_prior = xswap.prior.compute_xswap_priors(edges, num_perms, shape=(num_nodes, num_nodes),
                                              allow_self_loops=True, allow_antiparallel=False)
edge_prior = edge_prior['xswap_prior'].values

full_features_df = (
    df
    .assign(
        adamic = adamic,
        jaccard = jaccard,
        preferential = preferential,
        resource = resource,
        rwr = rwr,
        edge_prior = edge_prior,
    )
)

full_features_df.to_csv('../../data/unperm_feature_values.csv', index=False)

unpermuted_auroc_df = pd.DataFrame([{
    feature: sklearn.metrics.roc_auc_score(full_features_df['edge'], full_features_df[feature])
    for feature in ['adamic', 'jaccard', 'preferential', 'resource', 'rwr', 'edge_prior']
}])

unpermuted_auroc_df.to_csv('../../data/unperm_auroc_values.csv', index=False)

unpermuted_auroc_df.head()

KeyboardInterrupt: 

In [None]:
auroc_values = list()

new_edges = edges.copy()
for i in tqdm.tnrange(num_perms):
    new_edges, _ = xswap.permute_edge_list(new_edges, allow_self_loops=True, 
                                           allow_antiparallel=False)
    perm_mat = xswap.network_formats.edges_to_matrix(
        new_edges, True, (num_nodes, num_nodes), bool, sparse = True)
    
    features_dict = {
        'adamic': analysis.adamic_adar_index(perm_mat).flatten(),
        'preferential': analysis.preferential_attachment_index(perm_mat).flatten(),
        'resource': analysis.resource_allocation_index(perm_mat).flatten(),
    }
    jaccard = analysis.jaccard(perm_mat, degree_matrix)
    features_dict['jaccard'] = np.array(jaccard).flatten()    
    rwr_mat = analysis.invertible_rwr(perm_mat.toarray(), 0.25)
    features_dict['rwr'] = np.array(rwr_mat).flatten()

    row = {
        feature: sklearn.metrics.roc_auc_score(full_features_df['edge'], features_dict[feature])
        for feature in ['adamic', 'jaccard', 'preferential', 'resource', 'rwr']
    }
    auroc_values.append(row)

In [None]:
perm_auroc_df = pd.DataFrame(auroc_values, columns=['adamic', 'jaccard', 'preferential', 
                                                    'resource', 'rwr'])

perm_auroc_df.to_csv('../../data/perm_auroc_values.csv', index=False)

perm_auroc_df.head()