In [2]:
%load_ext autoreload
%autoreload 2

import collections
import time

import numpy as np
import pandas as pd
import pytest
import tqdm

import hetmech.degree_weight
import hetmech.hetmat

In [3]:
hetmat = hetmech.hetmat.HetMat('../data/hetionet-v1.0.hetmat/')

In [4]:
metapaths = list()
for metapath in hetmat.metagraph.extract_metapaths('Compound', 'Disease', max_length=4):
    if hetmech.degree_weight.categorize(metapath) in {'long_repeat', 'other'}:
        continue
    metapaths.append(metapath)
len(metapaths)

1172

In [5]:
def equal_outputs(metapath, dense_threshold=0, dtype=np.float64):
    time1 = time.time()
    row_rec, col_rec, dwwc_rec = hetmech.degree_weight.dwwc_recursive(hetmat, metapath, dense_threshold=dense_threshold, dtype=dtype)
    rec_time = time.time()
    row_chain, col_chain, dwwc_chain = hetmech.degree_weight.dwwc_chain(hetmat, metapath, dense_threshold=dense_threshold, dtype=dtype)
    chain_time = time.time()
    row_original, col_original, dwwc_original = hetmech.degree_weight.dwwc(hetmat, metapath, dense_threshold=dense_threshold, dtype=dtype)
    original_time = time.time()
    times = [rec_time - time1, chain_time - rec_time, original_time - chain_time]
    try:
        assert (row_rec == row_original) and (row_chain == row_original) and (col_rec == col_original) and (col_chain == col_original)
        assert abs(dwwc_rec - dwwc_original).max() == pytest.approx(0, rel=1e-7)
        assert abs(dwwc_chain - dwwc_original).max() == pytest.approx(0, abs=1e-7)
    except AssertionError:
        print(metapath)
    return times

In [6]:
all_times = []

for metapath in tqdm.tqdm(metapaths):
    times = equal_outputs(metapath, dense_threshold=1)
    all_times.append(times)

100%|██████████| 1172/1172 [08:37<00:00,  2.27it/s]


In [7]:
df = pd.DataFrame(all_times, columns=['recursive', 'chain', 'original'])

In [11]:
df.head()

Unnamed: 0,recursive,chain,original
0,0.012481,0.005083,0.00486
1,0.00592,0.006678,0.005424
2,0.010085,0.010011,0.010484
3,0.012289,0.011549,0.011183
4,0.012842,0.011379,0.011887


In [8]:
from scipy import stats

In [33]:
df.mean()

recursive    0.192078
chain        0.060468
original     0.186719
dtype: float64

In [9]:
stats.ttest_rel(df['recursive'], df['original'])

Ttest_relResult(statistic=34.77167885366179, pvalue=1.4490807657665431e-182)

In [10]:
stats.ttest_rel(df['chain'], df['original'])

Ttest_relResult(statistic=-14.92654217431387, pvalue=2.979717495601953e-46)

## Example of typically long computations

In [27]:
%%timeit

_ = hetmech.degree_weight.dwwc(hetmat, 'CbGeAeGaD', dense_threshold=0)

2.76 s ± 21.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [28]:
%%timeit

_ = hetmech.degree_weight.dwwc(hetmat, 'CbGeAeGaD', dense_threshold=1)

2.54 s ± 6.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [29]:
%%timeit

_ = hetmech.degree_weight.dwwc_recursive(hetmat, 'CbGeAeGaD', dense_threshold=0)

3.01 s ± 161 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [30]:
%%timeit

_ = hetmech.degree_weight.dwwc_recursive(hetmat, 'CbGeAeGaD', dense_threshold=1)

2.6 s ± 19.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [31]:
%%timeit

_ = hetmech.degree_weight.dwwc_chain(hetmat, 'CbGeAeGaD', dense_threshold=0)

2.15 s ± 49.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%%timeit

_ = hetmech.degree_weight.dwwc_chain(hetmat, 'CbGeAeGaD', dense_threshold=1)

330 ms ± 26.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
