In [1]:
%load_ext autoreload
%autoreload 2

import collections
import time

import numpy as np
import pandas as pd
import pytest
import tqdm

import hetmech.degree_weight
import hetmech.hetmat

In [2]:
# Three hetmats so that there is no cache sharing
hetmat = hetmech.hetmat.HetMat('../data/hetionet-v1.0.hetmat/')
hetmat_rec = hetmech.hetmat.HetMat('../data/hetionet-v1.0.hetmat/')
hetmat_chain = hetmech.hetmat.HetMat('../data/hetionet-v1.0.hetmat/')

In [3]:
# Rephetio metapaths
metapaths = list()
for metapath in hetmat.metagraph.extract_metapaths('Compound', 'Disease', max_length=4):
    if hetmech.degree_weight.categorize(metapath) in {'long_repeat', 'other'}:
        continue
    metapaths.append(metapath)
len(metapaths)

1172

In [4]:
def equal_outputs(metapath, dense_threshold=0, dtype=np.float64):
    """Compute DWWC using all three functions, and ensure that they give the same output."""
    time1 = time.time()
    
    # Recursive
    row_rec, col_rec, dwwc_rec = hetmech.degree_weight.dwwc_recursive(
        hetmat_rec, metapath, dense_threshold=dense_threshold, dtype=dtype)
    rec_time = time.time()
    
    # Chain ordering
    row_chain, col_chain, dwwc_chain = hetmech.degree_weight.dwwc_chain(
        hetmat_chain, metapath, dense_threshold=dense_threshold, dtype=dtype)
    chain_time = time.time()
    
    # Original DWWC method
    row_original, col_original, dwwc_original = hetmech.degree_weight.dwwc(
        hetmat, metapath, dense_threshold=dense_threshold, dtype=dtype)
    original_time = time.time()
    
    times = [rec_time - time1, chain_time - rec_time, original_time - chain_time]
    
    # Ensure identical outputs
    try:
        assert (row_rec == row_original) and (row_chain == row_original) and \
            (col_rec == col_original) and (col_chain == col_original)
        assert abs(dwwc_rec - dwwc_original).max() == pytest.approx(0, rel=1e-7)
        assert abs(dwwc_chain - dwwc_original).max() == pytest.approx(0, abs=1e-7)
    except AssertionError:
        print(metapath)
    return times

In [5]:
all_times = []

for metapath in tqdm.tqdm(metapaths):
    times = equal_outputs(metapath, dense_threshold=1)
    all_times.append(times)

100%|██████████| 1172/1172 [06:34<00:00,  2.97it/s]


In [6]:
df = pd.DataFrame(all_times, columns=['recursive', 'chain', 'original'])

In [7]:
df.head()

Unnamed: 0,recursive,chain,original
0,0.013497,0.009822,0.010345
1,0.005449,0.004633,0.004521
2,0.03011,0.02754,0.026869
3,0.019711,0.017824,0.017884
4,0.016536,0.015252,0.01478


In [8]:
df.mean()

recursive    0.080484
chain        0.061970
original     0.191609
dtype: float64

In [9]:
df.sum()

recursive     94.327582
chain         72.628601
original     224.566000
dtype: float64

In [10]:
from scipy import stats

In [11]:
stats.ttest_rel(df['recursive'], df['original'])

Ttest_relResult(statistic=-13.066339983865676, pvalue=1.6033049959631514e-36)

In [12]:
stats.ttest_rel(df['chain'], df['original'])

Ttest_relResult(statistic=-15.054544561165635, pvalue=5.914702926397875e-47)

## Example of typically long computations

In [13]:
%%timeit

_ = hetmech.degree_weight.dwwc(hetmat, 'CbGeAeGaD', dense_threshold=0)

2.81 s ± 30.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit

_ = hetmech.degree_weight.dwwc(hetmat, 'CbGeAeGaD', dense_threshold=1)

2.54 s ± 2.95 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
%%timeit

_ = hetmech.degree_weight.dwwc_recursive(hetmat, 'CbGeAeGaD', dense_threshold=0)

2.08 s ± 16.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [16]:
%%timeit

_ = hetmech.degree_weight.dwwc_recursive(hetmat, 'CbGeAeGaD', dense_threshold=1)

356 ms ± 2.69 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit

_ = hetmech.degree_weight.dwwc_chain(hetmat, 'CbGeAeGaD', dense_threshold=0)

2.09 s ± 19.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
%%timeit

_ = hetmech.degree_weight.dwwc_chain(hetmat, 'CbGeAeGaD', dense_threshold=1)

294 ms ± 905 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
