In [1]:
import hetmatpy.hetmat
import networkx as nx
import numpy as np
import pandas as pd
import xswap

In [2]:
def biadjacency_to_adjacency(biadjacency):
    top = np.pad(biadjacency, ((0, 0), (biadjacency.shape[0], 0)), 'constant')
    bottom = np.pad(biadjacency.T, ((0, 0), (0, biadjacency.shape[1])), 'constant')
    adj = np.vstack((top, bottom))
    return adj

In [3]:
hetmat = hetmatpy.hetmat.HetMat('../../data/task1/hetionet-v1.0.hetmat/')

_, _, biadjacency = hetmat.metaedge_to_adjacency_matrix('DrD', dense_threshold=0)
# adj_mat = biadjacency_to_adjacency(biadjacency)
adj_mat = biadjacency

G = nx.from_numpy_array(adj_mat)

source_ids = (
    np.repeat(
        np.arange(biadjacency.shape[0]).reshape(biadjacency.shape[0], 1), 
        biadjacency.shape[1], axis=1
    )
    .flatten()
)

target_ids = np.tile(np.arange(biadjacency.shape[1]), biadjacency.shape[0])

In [4]:
# feature_to_generator = {
#     'resource_allocation_index': nx.link_prediction.resource_allocation_index(
#         G, zip(source_ids, target_ids + biadjacency.shape[0])),
#     'jaccard_coefficient': nx.link_prediction.jaccard_coefficient(
#         G, zip(source_ids, target_ids + biadjacency.shape[0])),
#     'adamic_adar_index': nx.link_prediction.adamic_adar_index(
#         G, zip(source_ids, target_ids + biadjacency.shape[0])),
#     'preferential_attachment': nx.link_prediction.preferential_attachment(
#         G, zip(source_ids, target_ids + biadjacency.shape[0])),
# }

feature_to_generator = {
    'resource_allocation_index': nx.link_prediction.resource_allocation_index(
        G, zip(source_ids, target_ids)),
    'jaccard_coefficient': nx.link_prediction.jaccard_coefficient(
        G, zip(source_ids, target_ids)),
#     'adamic_adar_index': nx.link_prediction.adamic_adar_index(
#         G, zip(source_ids, target_ids)),
    'preferential_attachment': nx.link_prediction.preferential_attachment(
        G, zip(source_ids, target_ids)),
}

rows = list()
for feature_name, generator in feature_to_generator.items():
    for row in generator:
        rows.append(row + (feature_name,))

features_df = (
    pd.DataFrame(rows, columns=['id_a', 'id_b', 'value', 'feature'])
    .pivot_table(index=['id_a', 'id_b'], columns='feature', values='value')
    .reset_index()
    .rename_axis(None, axis=1)
    .assign(
#         id_b = lambda df: df['id_b'] - biadjacency.shape[0],
        edge = biadjacency.flatten(),
    )
)

for id_a, id_b in features_df.query('edge == True')[['id_a', 'id_b']].values:
    assert biadjacency[id_a, id_b]

In [5]:
features_df.to_csv('../../data/fig2.feature_degree/DrD.csv', index=False)

features_df.head()

Unnamed: 0,id_a,id_b,jaccard_coefficient,preferential_attachment,resource_allocation_index,edge
0,0,0,1.0,4.0,0.642857,False
1,0,1,0.0,12.0,0.0,False
2,0,2,0.0,12.0,0.0,False
3,0,3,0.0,6.0,0.0,False
4,0,4,0.0,0.0,0.0,False
