# Permute Hetnets for Interpreting Compressed Latent Spaces

Modified from @dhimmel - https://github.com/dhimmel/integrate/blob/master/permute.ipynb

Generate several randomly permuted hetnets to serve as a null distribution. The permutations preserve node degree but randomizes connections between nodes. See [Himmelstein et al. 2017](https://doi.org/10.7554/eLife.26726) for more details.

In [1]:
import os
import pandas as pd

import hetio.readwrite
import hetio.permute

In [2]:
%%time
hetnet_path = os.path.join('hetnets', 'interpret_hetnet.json.bz2')
graph = hetio.readwrite.read_graph(hetnet_path)

CPU times: user 1min 18s, sys: 2.72 s, total: 1min 21s
Wall time: 1min 23s


In [3]:
# Selected as a result of `scripts/evaluate-permutations.ipynb`
num_permuted_hetnets = 5
num_swaps = 4

In [4]:
%%time
stat_dfs = list()
permuted_graph = graph
 
for i in range(num_permuted_hetnets):
    i += 1
    print('Starting permutation', i)
    permuted_graph, stats = hetio.permute.permute_graph(permuted_graph,
                                                        multiplier=num_swaps,
                                                        seed=i)
    stat_df = pd.DataFrame(stats)
    stat_df['permutation'] = i
    stat_dfs.append(stat_df)
    perm_path = os.path.join('hetnets', 'permuted',
                             'interpret_hetnet_perm-{}.json.bz2'.format(i))
    hetio.readwrite.write_graph(permuted_graph, perm_path)

Starting permutation 1
Starting permutation 2
Starting permutation 3
Starting permutation 4
Starting permutation 5
CPU times: user 4h 10min 45s, sys: 20.8 s, total: 4h 11min 6s
Wall time: 4h 24min 49s


In [5]:
# Save stats
stat_df = pd.concat(stat_dfs)
stat_path = os.path.join('hetnets', 'permuted', 'stats.tsv')
stat_df.to_csv(stat_path, sep='\t', index=False, float_format='%.5g')