In [1]:
import xswap
import hetmatpy.hetmat
import numpy as np
import numpy
import pandas
import scipy.sparse

### 1. Generate 1000 permutations

In [2]:
class xswap_hetmat(hetmatpy.hetmat.HetMat):
    def permute_graph(self, num_new_permutations=None, namer=None, start_from=None,
                      multiplier=10, seed=0):
        """
        Generate and save permutations of the HetMat adjacency matrices.
        Parameters
        ----------
        num_new_permutations : int
            The number of new, permuted HetMats to generate
        namer : generator
            Yields the names of new permutations. Cannot pass names of existing permutations
        start_from : str
            Name of permutation to use as starting point. For multiple permutations,
            the first permutation starts from start_from, and future permutations
            continue from the previous one.
        multiplier : int
            How many attempts to make when cross-swapping edges.
        seed : int
            Random seed for generating new permutations
        """
        if namer is None:
            # If no namer given, continue increasing names by one for new permutations
            namer = (f'{x:03}' for x in itertools.count(start=1))

        stat_dfs = list()
        for _ in range(num_new_permutations):
            permutation_name = next(namer)
            new_hetmat = hetmatpy.hetmat.initialize_permutation_directory(self, permutation_name)

            if start_from is None:
                start_from = self
            elif isinstance(start_from, str):
                start_from = self.permutations[start_from]
            assert isinstance(start_from, hetmatpy.hetmat.HetMat)

            metaedges = list(self.metagraph.get_edges(exclude_inverts=True))
            for metaedge in metaedges:
                rows, cols, original_matrix = start_from.metaedge_to_adjacency_matrix(
                    metaedge, dense_threshold=1)
                original_matrix_coo = scipy.sparse.coo_matrix(original_matrix)
                edges = list(zip(original_matrix_coo.row, original_matrix_coo.col))
                permuted_edges, stats = xswap.permute_edge_list(
                    edges, allow_antiparallel=False, allow_self_loops=False, multiplier=multiplier,
                    seed=seed
                )
                assert permuted_edges != edges
                permuted_matrix_coo = scipy.sparse.coo_matrix((numpy.ones(len(edges)), zip(*permuted_edges)),
                                                             shape=original_matrix.shape)
                permuted_matrix = scipy.sparse.csc_matrix(permuted_matrix_coo)
                path = new_hetmat.get_edges_path(metaedge, file_format=None)
                hetmatpy.hetmat.save_matrix(permuted_matrix, path)
                stat_df = pandas.DataFrame([stats])
                stat_df['metaedge'] = metaedge
                stat_df['abbrev'] = metaedge.get_abbrev()
                stat_df['permutation'] = permutation_name
                stat_dfs.append(stat_df)
            start_from = permutation_name
            seed += 1
            self.permutations[permutation_name] = new_hetmat
        return pandas.concat(stat_dfs)

In [3]:
%%time

permutation_names = (str(i).zfill(4) for i in range(1, 1001))
hetmat = xswap_hetmat('../data/ppi_hetmat/')
permutation_info_df = hetmat.permute_graph(num_new_permutations=1000, namer=permutation_names)

CPU times: user 2min 25s, sys: 1.38 s, total: 2min 26s
Wall time: 2min 27s


In [4]:
permutation_info_df.head()

Unnamed: 0,duplicate,excluded,same_edge,self_loop,swap_attempts,undir_duplicate,metaedge,abbrev,permutation
0,3841,0,13,278,128750,4051,Protein - interacts - Protein,PiP,1
0,2571,0,9,255,102860,2690,Protein - interacts-pruned - Protein,PpP,1
0,3846,0,10,300,128750,4036,Protein - interacts - Protein,PiP,2
0,2527,0,14,210,102860,2842,Protein - interacts-pruned - Protein,PpP,2
0,3889,0,16,348,128758,4162,Protein - interacts - Protein,PiP,3
