In [1]:
import os
import warnings
import copy

import uuid
import random
from tqdm import tqdm

import networkx as nx

import itertools
import math
import pandas as pd
import numpy as np
from scipy.stats import skewnorm

import sys
sys.path.insert(1, '../../scripts/')
from simulation.graphs import graph_generator as gg_
from simulation import utils



In [2]:
def weight_bias(n, skew):
    '''
    n: int
        Number of entries to bin
    skew: extent to which skew binning
    '''
    unbiased = [1/n]*n
    if skew == 0:
        biased = np.array(unbiased)
    elif skew == 1:
        biased = [0]*len(unbiased)
        biased[-1] = 1
        biased = np.array(biased)
    else:
        X = np.linspace(0, len(unbiased), len(unbiased))
        biased = skewnorm.pdf(X, a = 1, loc = len(unbiased), scale = (1-skew/1)*len(unbiased))
        biased = biased/sum(biased)
    return biased*100

class LR():
    '''object to store metadata and relevant information for the ligan-receptor dimension of tensor
    for internal use
    
    '''
    def __init__(self, B, ligands, receptors, edge_list, network_type = None, alpha = None, 
                 fit = None, comp = None, p = None):
        '''Initialize
        
        Parameters
        ----------

        B: nx.Graph
            a undirected bipartite network representing PPI between ligands and receptors (direction would always be L-->R)
        ligands: list 
            ligand IDs for each protein
        receptors: list
            receptor IDs for each protein
        self.edge_list: list
            each entry is a tuple representing a potential interaction between a ligand-receptor pair, ligands on 0 index of each tuple
        network_type: str
            "scale-free" indicates scale-free network, "normal" indicates a normal degree distribution
        alpha: float
            scale-free exponent for network degree distribution (recommended 2<alpha<3)
        p: float
            probability of adding an edge when using network_type option = 'normal'
        fit: igraph.FittedPowerLaw
            scale-free network parameters for B_ig (p-value from Kolmogrov-Smirnov test)
        comp: pd.DataFrame or None
            summary of differences in network properties between  bipartite network and similar Barabasi network
        '''
        if network_type is None:
            self.network_type = 'user-speficied'
        else:
            self.network_type = network_type
        self.B = B
        self.ligands = ligands
        self.receptors = receptors
        self.edge_list = edge_list
        self.alpha = alpha
        self.fit = fit
        self.comp = comp
        self.p = p
    def generate_metadata(self, n_LR_cats = {2: 0}, cat_skew = 0):
        '''Generate metadata groupings for the L-R pairs. Categories are defined as distinct types of 
        metadata associated with the LR pair, e.g. "signaling pathway". Subcategories are
        the associated labels within a category, e.g. "growth" and "inflammation" within the "signaling pathway" category.

        Note: For skew, 0 means evenly distributed, 1 means all LR pairs fall into the first category/subcategory. 

        n_LR_cats: dict
            The length of the dictionary represents the total number of categories associated with the LR
            Each key is an integer representing the number of subcategories for the particular category. 
            Each value is a float [0,1] indicating the skew of distribution of LRs across 
            subcategories within each category. 
        cat_skew: float [0,1]
            Skew of distribution of LRs across categories

        '''
        if len(n_LR_cats) > 1:
            raise ValueError('Currently, only one metadata category can be considered')
        # group each LR into the categories above
        # generate categories
        LR_categories = [str(uuid.uuid4()).split('-')[-1] for i in range(len(n_LR_cats))]
        cat_bias = weight_bias(n = len(LR_categories), skew = 0)
        self.LR_metadata = pd.DataFrame(data = {'LR_id': self.edge_list, 
                        'category': random.choices(population = LR_categories, weights=cat_bias, 
                                                  k=len(self.edge_list))})

        # generate subcategories
        self.LR_metadata['subcategory'] = float('nan')
        i = 0
        for n_subcat, subcat_skew in n_LR_cats.items():
            sub = self.LR_metadata[self.LR_metadata.category == LR_categories[i]]
            subcat_bias = weight_bias(n = n_subcat, skew = subcat_skew)
            self.LR_metadata.loc[sub.index, 'subcategory'] = random.choices(population = [str(uuid.uuid4()).split('-')[-1] for i in range(n_subcat)], 
                           weights=subcat_bias, k=sub.shape[0])
            i += 1

class CCI_MD():
    '''Generate the CCI network for the tensor slice at time point 0'''
    
    def cci_network(self, n_cells, directional = True):
            '''Initialize the cell-cell interaction network.

            n_cells: int
                the total number of cells to simulate; all cell-cell pairs will have a potential interaction, but only
                those that actually interact will have a score > 0 in the tensor slice
            directional: bool
                whether cell-cell interactions are directional (tuple of cell (A,B) indicates interaction from A-->B) or 
                not

            '''
            # generate random cell ids
            self.cell_ids = [str(uuid.uuid4()).split('-')[-1] for i in range(n_cells)]
            if directional:
                self.cell_interactions = list(itertools.permutations(self.cell_ids, 2))
            else:
                self.cell_interactions = list(itertools.combinations(self.cell_ids, 2))
    def generate_metadata(self, n_cell_cats = {2: 0}, cat_skew = 0, 
                         remove_homotypic = None):
        '''Generate metadata groupings for the cells (individual). Categories are defined as distinct types of 
        metadata associated with the cell or protein, e.g. "cell type" and "cell cycle phase". Subcategories are
        the associated labels within a category, e.g. "T-cell" and "dendritic cell" within the "cell type" category.
        
        Note: For skew, 0 means evenly distributed, 1 means all cells fall into the first category/subcategory. 
        
        n_cell_cats: dict
            The length of the dictionary represents the total number of categories associated with the cell
            Each key is an integer representing the number of subcategories for the particular category. 
            Each value is a float [0,1] indicating the skew of distribution of cells across 
            subcategories within each category. 
        cat_skew: float [0,1]
            Skew of distribution of cells across categories
        remove_homotypic: int
            whether to remove homotypic ineractions between cells by cell category; how many categories to consider; 
            must be <= the number of categories present in the metadata

        '''
        if len(n_cell_cats) > 1:
            raise ValueError('Currently, only one metadata category can be considered')
        if remove_homotypic > len(n_cell_cats):
            raise ValueError('The value for "remove_homotypic" cannot be larger than the total number of categories associated with the cells')

        # group each cell into the categories above
        # generate categories
        cell_categories = [str(uuid.uuid4()).split('-')[-1] for i in range(len(n_cell_cats))]
        cat_bias = weight_bias(n = len(cell_categories), skew = 0)
        self.cell_ids = pd.DataFrame(data = {'cell_id': self.cell_ids, 
                        'category': random.choices(population = cell_categories, weights=cat_bias, 
                                                  k=len(self.cell_ids))})
        
        # generate subcategories
        self.cell_ids['subcategory'] = float('nan')
        i = 0
        for n_subcat, subcat_skew in n_cell_cats.items():
            sub = self.cell_ids[self.cell_ids.category == cell_categories[i]]
            subcat_bias = weight_bias(n = n_subcat, skew = subcat_skew)
            self.cell_ids.loc[sub.index, 'subcategory'] = random.choices(population = [str(uuid.uuid4()).split('-')[-1] for i in range(n_subcat)], 
                           weights=subcat_bias, k=sub.shape[0])
            i += 1
        self.cell_metadata = self.cell_ids
        del self.cell_ids
        

        if remove_homotypic is not None and remove_homotypic > 0: # remove homotypic interactions of a given category
            print('Remove homotypic cell interactions for {} categories'.format(remove_homotypic))
            i = 0
            to_remove = list()
            while i < remove_homotypic:
                cat = cell_categories[i]
                sub = self.cell_metadata[self.cell_metadata.category == cell_categories[i]]
                cell_ids = sub.cell_id.tolist()

                for ccp in self.cell_interactions:
                    sub_ = sub[(sub.cell_id == ccp[0]) | (sub.cell_id == ccp[1])]
                    if sub_.shape[0] == 2 and sub_.subcategory.unique().shape[0] == 1:
                        to_remove.append(ccp)

                i += 1

            # remove homotypic interactions as identified above for categories 1-i
            self.cell_interactions = list(set(self.cell_interactions).difference(to_remove))

            # filter out any cells that no longer are present 
            cell_ids = list(set(sum(list(zip(*self.cell_interactions)), ())))
            self.cell_metadata = self.cell_metadata[self.cell_metadata.cell_id.isin(cell_ids)]
            self.cell_metadata.reset_index(inplace = True, drop = True)

In [3]:
def fold_change_pattern(initial_value):
    '''The maximum change in the average LR score given the starting value'''
    decrease = False
    if initial_value > 0.5:
        initial_value = 0.5 - (initial_value - 0.5)
        decrease = True
    
    if initial_value >= 0.2:
        change = 2*initial_value
    else:
        change = initial_value + 0.2
    
    change = change - initial_value
    
    if decrease:
        change = - change
    
    return change

def linear(x, n_conditions):
    return list(np.linspace(x[1], x[1] + x[0], n_conditions))

def pulse(x, n_conditions):
    change = x[0]
    initial_val = x[1]
    
    vector = [initial_val] * n_conditions # initialize
    
    if n_conditions % 2 == 1:
        mid_point = [math.floor(n_conditions/2)]
    else:
        mid_point = [n_conditions/2 - 1, n_conditions/2]

    periph = None
    if n_conditions >= 5: 
        periph = [min(mid_point)-1, max(mid_point)+1]
    
    for m in mid_point:
        vector[int(m)] = initial_val + change
    if periph is not None:
        for p in periph:
            vector[int(p)] = initial_val + (change*0.5)
    return vector

def oscillate(x, n_conditions):
    osc_period = 3
    if n_conditions > 3:
        iter_vals = list(np.linspace(x[1], x[1] + x[0], osc_period))
        iter_vals += [iter_vals[1]]#iter_vals[1:-1][::-1]

        vector = list()
        for i,j in enumerate(itertools.cycle(iter_vals)):
            vector.append(j)
            if i >= n_conditions - 1:
                break
        return vector
    else:
        return pulse(x, n_conditions)

pattern_mapper = {'linear': linear, 'pulse': pulse, 'oscillate': oscillate}

def generate_pattern(x, n_conditions):
    pattern = x[0]
    return pattern_mapper[pattern](x[1:], n_conditions)

class Simulate():
    def __init__(self):
        '''Initialize self

        '''
        self.cci = None
    
    def LR_network(self, network_type = None, B = None, subset = False, **params):
        '''
        Simulates a PPI network of *potential* ligand-receptor interactions, or extracts information. \
        from a use provided network.Defines one tensor dimension
        Caveats: for a scale-free network, the number of ligands = the number of receptors \
                 for either network, there may be disconnected edges depending on "p"
        
        Parameters
        ----------
        network_type: str
             "scale-free" to generate a scale-free network or "normal" to generate a network with a normal degree distribution
        B: nx.Graph
            a user provided undirected, unweighted bipartite network. Assumes in B.nodes, ligands are listed \
            before receptors. Takes precedence over network_type.
        subset: bool
            if B is provided and subset is true, this will take a random subset of the network, dropping disconnected nodes \
            (of a specified size, specfied in params)
        **params: dict (keys for each option specified below)
            the required parameters for generating a bipartite, undirected random network either scale-free or not. \
            
            network_type = scale-free: keys - nodes, degrees, alpha, edges (see graphs.graph_generator.bipartite_sf for description) 
            network_type = normal: keys - n_ligands, n_receptors, p analogous to n,m,p in nx.bipartite.gnmk_random_graph
            B != None: keys - n_ligands as described above
            subset = True: keys - 
                n_ligands as described above
                'subset_size' a value between (0,1) indicating the proportional \
                size of the subset (by nodes) compared to the network
                'subset_type' either 'edges' or 'nodes' indicating whether to subset by removing nodes or edges \
                (edges recommended because they maintain the scale-free property)
        
        Returns
        ----------
        self.LR: 
            populates LR object, key outputs outlined here
        self.LR.B: nx.Graph
            undirected bipartite graph with specified degree distribution (power or normal), or user specified B \
            disconnected nodes are removed
        self.LR.edge_list: list
            each entry is a tuple representing a potential interaction between a ligand-receptor pair, ligands on 0 index of each tuple


        '''
        gg = gg_() # return networkx object for graphs
        user = False
        if B is not None: #untested
            user = True
            # properties checked when calling gg.nx_to_edgelist
            if network_type is not None:
                warnings.warn('You have specified a network type and provided a network, B will take priority over network type')
            if 'n_ligands' not in params:
                raise ValueError('For a provided B, you must specify n_ligands in params')
            
            if subset:
                if 'subset_size' not in params or 'subset_type' not in params:
                    raise ValueError('To subset B, you must provide a desired subset_size and subset_type')
                if params['subset_type'] == 'edges':
                    B = gg.subset_edges(B, subset_size = params['subset_size'], drop = True)
                elif params['subset_type'] == 'nodes': 
                    B = gg.subset_nodes(B, subset_size = params['subset_size'], drop = True)
                else:
                    raise ValueError("The subset_type param must be either 'edges' or 'nodes'")
            
            
        elif network_type == 'scale-free': 
            if 'degrees' not in params or 'nodes' not in params:
                raise ValueError('Must specify degrees and nodes in **params')
            if 'alpha' not in params: 
                params['alpha'] = 2 # also default in gg obj, didn't make it a **kwrag
            if 'edges' not in params:
                B, node_groups, fit, comp = gg.bipartite_sf(nodes = params['nodes'], degrees = params['degrees'], 
                                                            alpha = params['alpha'])
            else:
                B, node_groups, fit, comp = gg.bipartite_sf(nodes = params['nodes'], degrees = params['degrees'], 
                                                        alpha = params['alpha'], edges = ['edges'])  
            B = B['nx']
            params['n_ligands'] = params['nodes'] # same no. of ligands and receptors
        elif network_type == 'normal':
            if sorted(params) != ['n_ligands', 'n_receptors', 'p']:
                raise ValueError('Must specify n_ligands, n_receptors in **params')
            else:
                B = nx.bipartite.random_graph(params['n_ligands'],params['n_receptors'], params['p'])
        else:
            raise ValueError('Must specify an appropriate network_type or provide a network B')
        
        B, edge_list, ng = gg.nx_to_edgelist(B, params['n_ligands']) # formatting/extract info
        
        # store PPI information in LR()
        if user:
            self.LR = LR(B, ng['1'], ng['2'], edge_list)
        elif network_type == 'scale-free':
            self.LR = LR(B, ng['1'], ng['2'], edge_list, network_type = network_type, 
                         alpha = params['alpha'], fit = fit, comp = comp)
        elif network_type == 'normal':
            self.LR = LR(B, ng['1'], ng['2'], edge_list, network_type = network_type, p = params['p'])

    def emulate_sf_network(self, G):
        '''Emulate a user-provided (recommended scale-free) network for L-R pair tensors dimension
        
        Parameters
        ----------
        G: nx.Graph
            user-provided network (recommended scale-free)
        
        Returns
        ---------
        G2: nx.Graph
            random bipartite scale-free network built using G's properties
        
        '''
        gg = gg_()
        fit = gg.power_fit(G)
        if fit.p < 0.05:
            warnings.warn('Input network is not scale-free')
        print('----Simulated network------')
        
        G2, node_groups, fit2, comp = gg.bipartite_sf(nodes = round(len(G.nodes)), # should be 1/2 the nodes, but many are disconnected 
                                 degrees = np.median([i[1] for i in G.degree]),
                                 alpha = fit.alpha, edges = len(G.edges),
                                 check_properties = True, compare_barabasi = False)
        G2 = G2['nx']
        gg.drop_disconnected_nodes(G2)
        return G2
    
    def generate_tensor_md(self, n_patterns, n_conditions, patterns = ['pulse', 'linear', 'oscillate']):
        '''Generates cell-LR metadata pairs for tensor slices.
        
        Parameters
        ----------
        n_patterns: int (> 0)
            the number of CC - LR metadata pairs for which to form distinct interactions 
            the remaining backgroun will default to 0, with noise increasing this value
            the groups with distinct interactions will each have distinct average values spaced b/w (0,1]
        n_conditions: int (>2)
            the number of conditions across which to generate tensor slices 
        patterns: list
            list of strings, each of which should be included as a potential pattern for a given cell 
            metadata - LR metadata pair. Options: ['pulse', 'linear', 'exponential', 'oscillate']
        
        Returns
        ---------
        self.clrm: pd.DataFrame
            a list of metadata CC-LR pairs for which patterns of scores will change across conditions
            alongside the expected average score for each condition
        '''
        #checks------------------------------------------------------------------------------------------------
        if n_conditions <=2:
            warnings.warn('At least 4 conditions are required')
            n_conditions = 3
        self.n_conditions = n_conditions
        
        allowed_patterns = ['pulse', 'linear', 'oscillate']
        if patterns is not None:
            if len(set(patterns).difference(allowed_patterns)) > 0:
                raise ValueError('Patterns can only include: ' + ', '.join(allowed_patterns))
        else:
            patterns = allowed_patterns        
        #------------------------------------------------------------------------------------------------
        
        n_lr_cat = len(self.LR.LR_metadata.subcategory.unique())
        n_cc_cat = len(self.LR.LR_metadata.subcategory.unique())
    
        # all possible groups that have patterns of expression

        lr_group = list()
        for i in range(1, n_lr_cat + 3):
            lr_group += list(itertools.combinations(self.LR.LR_metadata.subcategory.unique(), i))

        # all possible cell groups that have patterns of expression
        ccat_map = dict(zip(self.cci.cell_metadata.cell_id, self.cci.cell_metadata.subcategory))
        ccati = list()
        for ci in self.cci.cell_interactions:
            ccati.append((ccat_map[ci[0]], ccat_map[ci[1]]))
        ccati = pd.Series(ccati).unique()

        # all possible groups of cell metadata - LR metadata pairs
        clrm = pd.DataFrame(columns = ['cell_subcat', 'LR_subcat'])
        counter = 0
        for i in list(itertools.product(ccati, lr_group)):
            clrm.loc[counter, : ]= [i[0], i[1]]
            counter += 1

        # no all-all combinations
        # clrm.drop(index = [clrm.shape[0] - 1], inplace = True)

        if n_patterns > clrm.shape[0]:
            warnings.warn('More patterns than possible specificed, setting to maximum possible: {}'.format(clrm.shape[0]))
            n_patterns = clrm.shape[0]

        for i in range(n_patterns):
            clrm = clrm.loc[sorted(random.sample(clrm.index.tolist(), k = n_patterns)),]
        clrm.reset_index(inplace = True, drop = True)
        clrm.LR_subcat = clrm.LR_subcat.apply(lambda x: x[0])
        
        self.clrm = clrm
        self.n_patterns = n_patterns
        self.ts_frame = pd.DataFrame(columns = self.cci.cell_interactions, index = self.LR.edge_list)

        # sort metadata categories
        ccat_map = dict(zip(self.cci.cell_metadata.cell_id, self.cci.cell_metadata.subcategory))
        LR_map = dict(zip(self.LR.LR_metadata.LR_id, self.LR.LR_metadata.subcategory))
        lrcats = [LR_map[lri] for lri in self.ts_frame.index]
        ccats = [(ccat_map[ci[0]], ccat_map[ci[1]]) for ci in self.ts_frame.columns]
        
        # get tensor slice coordinates for CC-LR pairs with expected patterns
        def get_coords(i):
            coords = list(zip([k for k in range(len(ccats)) if \
                                         ccats[k] == self.clrm.loc[i, 'cell_subcat']], 
                                         [k for k in range(len(lrcats)) if lrcats[k] == self.clrm.loc[i, 'LR_subcat']]))
            return [tuple([i[0] for i in coords]), tuple([i[1] for i in coords])]
        self.clrm['ts_coordinates'] = pd.Series(self.clrm.index).apply(lambda i: get_coords(i)).tolist()
        
        # initial value
        self.clrm[['0']] = list(np.arange(1/self.n_patterns, 1+1/self.n_patterns, 1/self.n_patterns))
        
        # patterns over time
        ap = list()
        for i in range(math.ceil(self.n_patterns/len(patterns))):
            random.shuffle(patterns)
            ap += patterns
        self.clrm.insert(3, 'pattern', ap[:self.n_patterns])

        self.clrm = pd.concat([self.clrm,
                  pd.DataFrame(index = self.clrm.index, columns = [str(i) for i in range(1,self.n_conditions)])], axis = 1)
        self.clrm.insert(3, 'change', self.clrm['0'].apply(lambda x: fold_change_pattern(x)))

        # apply patterns to get averages across conditions
        self.clrm[[str(i) for i in range(self.n_conditions)]] = self.clrm[['pattern', 'change', '0']].apply(generate_pattern, args = (self.n_conditions,), axis = 1).tolist()

    def generate_tensor(self, noise, binary = False):
        '''Generates the tensor.
        
        Parameters
        ----------
        noise: float [0,1]
            extent from which to perturb scores from the expected average value, including background
        binary: bool
            whether to have scores be continuous b/w [0,1] (False) or binary (True). Binary scoring not currently 
            implemented and must be set to False
        
        Returns
        ---------
        self.ts: dictionary
            keys are labels for each condition (0 through n_conditions-1). Values are tensor slices with
            columns as cell-cell pairs and rows as ligand-receptor pairs
        '''
        if not binary:
            binary = False
            warnings.warn('Only continuous scoring is currently implemented')
        # initialize tensor slices
        self.ts = {str(i): self.ts_frame.copy() for i in range(self.n_conditions)}

        # generate the background
        print('Generate background noise')
        if noise == 0:
            for i in self.ts:
                self.ts[i] = self.ts[i].fillna(0)
        else:
            # background will have largest average = minimum across all conditions
            min_val = self.clrm[[str(i) for i in range(self.n_conditions)]].min().min()
            scale = min_val/np.array([utils.piecewise_fit(min_val, *utils.fit_params)])[0]
            for i in tqdm(self.ts):
                vals = utils.get_truncated_normal(n = self.ts[i].shape[0]*self.ts[i].shape[1], 
                                                  sd = noise*min_val, mean = 0)*scale
                self.ts[i][:] = vals.reshape(self.ts[i].shape)


        for cond in tqdm(self.ts):
            for i in self.clrm.index:
                avg_val = self.clrm.loc[i, cond]
                coords = self.clrm.loc[i, 'ts_coordinates']

                self.ts[cond].values[coords] = avg_val if noise == 0 else \
                                          utils.get_truncated_normal(n = len(coords[0]), sd = noise*avg_val, mean = avg_val)

        
#     def _generate_t0(self, noise, binary):
#         '''Generate tensor slice 0. See tensor_slice_t0 method for agrument descriptions'''
#         # initialize
#         self.ts0 = pd.DataFrame(columns = self.cci.cell_interactions, index = self.LR.edge_list)

#         #------------------------------------------------------------------------------------------

        

#         # background
#         if noise == 0:
#             self.ts0.fillna(0, inplace = True)
#         else:
#             if not binary: # generate background noise values
#                 # noise values increase with noise, and have a maximum average of self.clrm['mean'].min()
#                 scale = self.clrm['mean'].min()/np.array([utils.piecewise_fit(self.clrm['mean'].min(), *utils.fit_params)])[0]
#                 vals = utils.get_truncated_normal(n = self.ts0.shape[0]*self.ts0.shape[1], 
#                                                   sd = noise*self.clrm['mean'].min(), mean = 0)*scale
#                 self.ts0[:] = vals.reshape(self.ts0.shape)
#             else:
#                 raise ValueError('Binary situations not yet dealt with')
#         #        freq = np.mean([abs(i) if abs(i) <= 1 else 1 for i in np.random.normal(loc = 0, scale = noise*0.33, size = 10**5)])
#         #                 background_coord_noise = random.sample(background_coord, k = int(round(freq*len(background_coord))))
#         #                 for coord in background_coord_noise:
#         #                     self.ts0.iloc[coord] = 1

#         #------------------------------------------------------------------------------------------

#         # add CCI values by cell-LR metadata pairs
#         for i in self.clrm.index:
#             avg_val = self.clrm.loc[i, 'mean']
#             coords = self.clrm.loc[i, 'ts_coordinates']
#             if not binary: 
#                 self.ts0.values[coords] = avg_val if noise == 0 else \
#                                           utils.get_truncated_normal(n = len(coords[0]), sd = noise*avg_val, mean = avg_val)
#             else:
#                 raise ValueError('Binary situations not yet dealt with')
#         #         adj_coord = random.sample(coords, int(round(len(coords)*self.clrm.loc[i, 'mean'])))
#         #         for coord in adj_coord:
#         #             self.ts0.iloc[coord] = 1
#         #         if noise > 0:
#         #             cm = {0:1, 1:0}
#         #             change_coords = random.sample(coords, int(round(len(coords)*self.clrm.loc[i, 'mean']*noise*0.5)))
#         #             for coord in change_coords:
#         #                 self.ts0.iloc[coord] = cm[self.ts0.iloc[coord]]
    
#     def tensor_slice_t0(self, noise = 0, n_patterns = 2, binary = False):

#         '''Simulates a static time point tensor slice
        
#         *Note, in current format, only one cell-cell metadata subcategory can have an interaction pattern 
#         ie, can't combine multiple cell-cell pairs to have the same pattern
        
#         Parameters
#         ----------
#         n_patterns: int (> 0)
#             the number of cell metadata - LR metadata pairs for which to form distinct interactions 
#             the remainder will default to 0, with noise increasing this value
#             the groups with distinct interactions will each have distincts values
#             recommended to set decomposition rank = n_patterns
#         binary: bool
#             whether L-R scores are binary or continuous b/w [0,1]
#         noise: float [0,1]
#             the amount of noise to add to the data
#             as noise increases, the fraction of noisy cc-lr coordinates increases, as does the change in value of the interaction, and the standard deviation if non-continuous
        
#         Returns
#         -------
#         self.ts0: pd.DataFrame
#             matrix with cell network_type pairs as columns, ligand-receptor pairs as rows, scores as entries
#         '''
        
#         if sim.cci is None or type(sim.cci) != CCI_MD:
#             raise ValueError('Make sure to generate cell-cell network and metadata with the CCI_MD() class')
#         if noise > 1 or noise < 0: 
#             raise ValueError('Noise must be between 0 and 1')
#         if type(binary) is not bool:
#             raise ValueError('binary arg must be boolean')
        
#         # by separating into two methods, can test different values of noise without changing category pairings
#         self._generate_clrm(n_patterns = n_patterns)
#         self._generate_t0(noise = noise, binary = binary)
            
    def copy(self):
        return copy.deepcopy(self)

In [4]:
# init
sim_sf = Simulate() 
sim_norm = Simulate()

# simulate a randomly connected ligand-receptor network (potential interactions)
sim_sf.LR_network(network_type = 'scale-free', **{'nodes': 1000, 'degrees': 3, 'alpha': 2}) #scale-free
sim_norm.LR_network(network_type = 'normal', **{'n_ligands': 500, 'n_receptors': 500, 'p': 0.5}) # normally distributed
# from here on proceed with the scale-free network
sim = sim_sf

# LR metadata
sim.LR.generate_metadata(n_LR_cats = {3: 0}, cat_skew = 0)

# cell metadata
cci = CCI_MD()
cci.cci_network(n_cells = 50, directional = False)
# generate 1 metadata categories, with 3 subcategories and 0 skew, the overall skew of categories is 0
cci.generate_metadata(n_cell_cats = {3: 0}, cat_skew = 0, remove_homotypic = 1)
# add metadata to simulation object
sim.cci = cci

# generate n_patter metadata groups of CC-LR pairs that change across n_conditions
# these changes can either be linear, oscillating, or a pulse
sim.generate_tensor_md(n_patterns = 4, n_conditions = 12, patterns = ['pulse', 'linear', 'oscillate'])

#generate a t0 tensor slice with continuous LR scores and baseline noise
sim.generate_tensor(noise = 0.05, binary = False)




/home/hratch/Projects/cci_dt/notebooks/simulation/tmp_57_bao7_bipartite_sf.csv
Generate undirected, bipartite, scale-free graph
Check network properties
All properties are as expected




Remove homotypic cell interactions for 1 categories


  0%|          | 0/12 [00:00<?, ?it/s]

Generate background noise


100%|██████████| 12/12 [00:05<00:00,  2.30it/s]
100%|██████████| 12/12 [00:00<00:00, 267.40it/s]


In [None]:
sender are rows, receivers are columns, each slice of the 3d tense is a LR pair, 
4d tensor is condition