# Compute DWPCs for all metapaths

In [1]:
import csv
import itertools
import numpy
import pandas
import pathlib
import scipy.sparse
import tqdm

import hetmech.degree_weight
from hetmech.hetmat import HetMat
import hetmech.matrix
import hetmech.degree_group
import pipeline

In [2]:
hetmat = HetMat('../../data/hetionet-v1.0.hetmat/')
hetmech.degree_weight.default_dwwc_method = hetmech.degree_weight.dwwc_recursive

All metapaths up to length 2 saving PC and DWPC matrices on disk -- actual hetnet.

In [3]:
metapaths = hetmat.metagraph.extract_metapaths('Disease', max_length=2)
len(metapaths)

88

In [4]:
metapaths = [hetmat.metagraph.get_metapath('GpBPpGiG')]

### Compute path counts

In [5]:
%%time
for metapath in tqdm.tqdm(metapaths):
    row, col, dwpc_matrix = pipeline.compute_save_dwpc(hetmat, metapath, damping=0, dtype='uint64')
    del dwpc_matrix

100%|██████████| 1/1 [00:16<00:00, 16.01s/it]

CPU times: user 15.4 s, sys: 585 ms, total: 16 s
Wall time: 16 s





### Compute DWPCs

In [6]:
%%time
mean_dwpcs = dict()
for metapath in tqdm.tqdm(metapaths):
    row, col, dwpc_matrix = pipeline.compute_save_dwpc(hetmat, metapath, damping=0.5)
    mean_dwpcs[(metapath, 'dwpc', 0.5)] = dwpc_matrix.mean()
    del dwpc_matrix

100%|██████████| 1/1 [00:26<00:00, 26.22s/it]

CPU times: user 24.1 s, sys: 2.14 s, total: 26.2 s
Wall time: 26.2 s





### Generate DGP files
One file per metapath per permutation

In [7]:
for name, permat in tqdm.tqdm((hetmat.permutations.items())):
    permat.path_counts_cache = hetmech.hetmat.PathCountPriorityCache(permat, allocate_GB=16)
    for metapath in metapaths:
        path = permat.directory.joinpath('degree-grouped-path-counts', 'dwpc-0.5', f'{metapath}.tsv')
        if path.exists():
            pass
        else:
            degree_grouped_df = hetmech.degree_group.single_permutation_degree_group(
                permat, metapath, dwpc_mean=mean_dwpcs[(metapath, 'dwpc', 0.5)], damping=0.5)
            path.parent.mkdir(parents=True, exist_ok=True)
            degree_grouped_df.to_csv(path, sep='\t')
    permat.path_counts_cache = None

100%|██████████| 145/145 [00:00<00:00, 21547.35it/s]


### Create multi-permutation DGP summary metrics.
One file per metapath

In [8]:
for metapath in metapaths:
    path = hetmat.directory.joinpath('adjusted-path-counts', 'dwpc-0.5', 
                                    'degree-grouped-permutations', f'{metapath}.tsv')
    if path.exists():
        pass
    else:
        degree_stats_df = hetmech.degree_group.summarize_degree_grouped_permutations(hetmat, metapath, damping=0.5)    
        path.parent.mkdir(parents=True, exist_ok=True)
        degree_stats_df.to_csv(path, sep='\t')

### Combine DWPC with DGP.
One file per metapath

In [9]:
path = hetmat.directory.joinpath('adjusted-path-counts', 'dwpc-0.5',
                                    'degree-grouped-permutations', f'{metapaths[0]}.tsv')
dgp_df = pandas.read_table(path)
dgp_dict = dgp_df.set_index(['source_degree', 'target_degree']).to_dict('index')

dwpc_gen = hetmech.degree_group.dwpc_to_degrees(hetmat, metapaths[0])

In [None]:
%%time
path = hetmat.directory.joinpath('adjusted-path-counts', 'dwpc-0.5',
                                    'adjusted-dwpcs', f'{metapaths[0]}-nonzero.tsv')

with open(path, "w") as csv_file:
    writer = csv.DictWriter(csv_file, delimiter='\t', fieldnames=[
        'source_id', 'source_name', 'target_name', 'target_id', 'source_degree', 'target_degree',
        'dwpc', 'path-count', 'n', 'nnz', 'n_perms', 'mean', 'sd', 'r-dwpc', 'z-dwpc'])
    for row in dwpc_gen:
        # Save only nonzero DWPC rows
        if row['dwpc'] == 0:
            continue
        dgp_info = dgp_dict[(row['source_degree'], row['target_degree'])]
        row.update(dgp_info)
        row['r-dwpc'] = row['dwpc'] - row['mean']
        row['z-dwpc'] = row['r-dwpc'] / row['sd']
        # Only save rows above z-threshold
        if row['z-dwpc'] < 0.5:
            continue
        row = {key: numpy.float32(value) for key, value in row.items() if key in [
            'dwpc', 'r-dwpc', 'mean', 'sd', 'z-dwpc']}
        writer.writerow(row)

csv_file.close()