# Stats for computing prior probability of a hetnet edge based on XSwap permutation

https://github.com/greenelab/hetmech/issues/134

In [1]:
import collections

import numpy
import pandas
import scipy.special

import hetmech.hetmat

## Functions

In [2]:
def get_wedge_number(degrees):
    """
    Compute total number of wedges for a list of nodes with the specified degrees.
    """
    return sum(scipy.special.comb(degree, 2, exact=True) for degree in degrees)


def get_xswap_metaedge_df(hetmat):
    """
    Get a dataframe with metaedge summary information. Includes
    statistics for analytically computing the probability of an edge
    existing in XSwap permutations. See
    https://github.com/greenelab/hetmech/issues/134#issuecomment-425933781.

    HetMat analog to
    https://github.com/dhimmel/hetio-fork/blob/ae2d0bce46e7137ae8812e99ce0e8301f8b7fa53/hetio/stats.py#L94-L105
    """
    assert isinstance(hetmat, hetmech.hetmat.HetMat)
    rows = list()
    metaedges = list(hetmat.metagraph.get_edges(exclude_inverts=True))
    for metaedge in metaedges:
        # Metaedge information
        row = collections.OrderedDict()
        row['metaedge'] = metaedge.get_unicode_str()
        row['abbreviation'] = metaedge.get_abbrev()
        # Metaedge edges
        source_ids, target_ids, matrix = hetmat.metaedge_to_adjacency_matrix(metaedge)
        row['n_edges'] = matrix.sum()
        # Number of connected source and target nodes
        source_degrees = numpy.array(matrix.sum(axis=1).flat)
        target_degrees = numpy.array(matrix.sum(axis=0).flat)
        row['n_connected_source_nodes'] = sum(source_degrees > 0)
        row['n_connected_target_nodes'] = sum(target_degrees > 0)
        # XSwap prior probability statistics (https://git.io/fxkcp)
        row['n_source_wedges'] = get_wedge_number(source_degrees)
        row['n_target_wedges'] = get_wedge_number(target_degrees)
        row['n_wedges'] = row['n_source_wedges'] + row['n_target_wedges']
        row['n_valid_xswaps'] = scipy.special.comb(row['n_edges'], 2, exact=True) - row['n_wedges']
        rows.append(row)
    metaedge_df = pandas.DataFrame(rows).sort_values('metaedge')
    return metaedge_df

## Execution

In [3]:
# Read Hetionet v1.0
hetmat = hetmech.hetmat.HetMat('../../data/hetionet-v1.0.hetmat/')

In [4]:
metaedge_df = get_xswap_metaedge_df(hetmat)
metaedge_df.head(3)

Unnamed: 0,metaedge,abbreviation,n_edges,n_connected_source_nodes,n_connected_target_nodes,n_source_wedges,n_target_wedges,n_wedges,n_valid_xswaps
0,Anatomy–downregulates–Gene,AdG,102240,36,15097,173440264,493897,173934161,5052523519
1,Anatomy–expresses–Gene,AeG,526407,241,18094,2290279787,10749138,2301028925,136250872696
2,Anatomy–upregulates–Gene,AuG,97848,36,15929,149352969,359661,149712630,4637353998


In [5]:
# Write as TSV
metaedge_df.to_csv('hetionet-v1.0-metaedge-xswap-stats.tsv', sep='\t', index=False)