In [None]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from functions import intercell_networks, utility_functions
import importlib
from IPython.display import display
from joblib import Parallel, delayed
import itertools

In [None]:
raw_data_dir = '../data/raw/'
gene_exp_dir = '../data/processed/gene_expression/'
org_pairs_dir = '../data/processed/organotropism_pairs/'
intercell_net_dir = '../data/processed/intercell_networks/'

In [None]:
metastasis_datasets = ['autopsy', 'hcmdb']
tissue_datasets = ['gtex', 'consensus']
network_types = ['all', 'curated']

# Tissue labels & match

**Tissue id for file naming**
* We will set a index column to create a integer id for each tissue.
* The id is dependent on the tissue database so the same number might not correspond to a similar tissue in both databases.
* This will allow us to name the intercellular network files in an simple and unambiguous way

In [None]:
tissues = pd.read_csv(raw_data_dir+'tissue_match.csv').reset_index().set_index('tissue')
tissues.head()

In [None]:
# we will use two intercellular interactions datasets:
# all interactions
# only manually curated interactions

intercell_graph = {}
for net_type in network_types:
    if net_type == 'all':
        label = '_'
    else:
        label = f'_{net_type}_'
    
    intercell_graph[net_type] = pd.read_csv(intercell_net_dir+f'intercell{label}graph.csv')
    print(f'{net_type}: {intercell_graph[net_type].shape[0]} interactions')

# Computing intercellular interactions between tissue pairs
This part of our work will be the first step towards building tissue specific PPI networks. We will start by looking at 
our hypothesis is that intercellular interactions established between metastasizing cells and the cells pre-metastatic niche are essential for metastasis development. We expect to find more intercell interactions between organotropism pairs than between control pairs.

Workflow:
* import tissue pairs, gene expression calls, intercell interactions data
* select intercell interactions genes
* compute number of intercell interactions between tissue pairs using expression calls
* compute weighted (normalized by the max value) intercell interactions between tissue pairs 

# Intercellular interactions networks with gene calls

## Ungrouped Tissue Networks

In [None]:
# build intercell networks

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/records.csv')

    # choose calls
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')
    # Transform zeros in NaN
    calls = calls.where(calls==1)

    for net_type in tqdm(network_types, desc='network_type'):

        graph = intercell_graph[net_type]
        directory = intercell_net_dir+f'{net_type}/{tissue_dataset}/ungrouped'
        utility_functions.check_dir(directory)
        
        intercell_networks.build_intercell_networks(
            tissues[['index', tissue_dataset]].dropna(), 
            calls,
            graph,
            tissue_column=tissue_dataset,
            directory=directory,
            sep='_'
        )

## Grouped Tissue Networks

In [None]:
# build intercell networks

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')

    # choose calls
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')
    # Transform zeros in NaN
    calls = calls.where(calls==1)

    for net_type in tqdm(network_types, desc='network_type'):

        graph = intercell_graph[net_type]
        directory = intercell_net_dir+f'{net_type}/{tissue_dataset}/grouped'
        utility_functions.check_dir(directory)
        
        intercell_networks.build_grouped_intercell_networks( 
            calls,
            graph,
            directory=directory,
            sep='-'
        )

## Controlled comparison network stats
In the controlled comparison we will not be using the grouped tissues networks since each tissue/organ appears the same amount of times in the organotropism vs control groups. That means the number of sub-tissues is balanced between groups and does not skew the results

### Compute number of intercellular interactions for each tissue pair

In [None]:
# compute the number of intercell interactions for each pair

network_stats = []

for net_type in tqdm(network_types, desc='network_type'):
    for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
        directory = intercell_net_dir+f'{net_type}/{tissue_dataset}/ungrouped'
        stats = intercell_networks.compute_intercell_interactions(
            directory,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ]
        )
        network_stats.extend(stats)
        
network_stats = pd.DataFrame(network_stats)
network_stats.head(2)

In [None]:
# split directed and undirected stats
network_stats_undir = network_stats.drop(['directed_interactions', 'direction'], axis=1)\
    .drop_duplicates(ignore_index=True)
network_stats_dir = network_stats.drop('simple_interactions', axis=1)
network_stats_undir.head(2)

In [None]:
# to get all tissue pairs, we'll create a dataframe where we reverse the tissue order
network_stats_undir_rev = network_stats_undir.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)

# for directed interactions we also have to change the entry's direction
network_stats_dir_rev = network_stats_dir.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)
display(network_stats_dir_rev.head())
network_stats_dir_rev['direction'] = np.where(network_stats_dir_rev['direction']=='c_to_m', 'm_to_c', 'c_to_m')
network_stats_dir_rev.head()

In [None]:
# Concatenate reverse dataframes
network_stats_undir_network = pd.concat(
    [network_stats_undir, network_stats_undir_rev], ignore_index=True)
network_stats_dir_network = pd.concat(
    [network_stats_dir, network_stats_dir_rev], ignore_index=True)

network_stats_dir_network.head(2)

### Compute Jaccard index for each tissue pair

The Jaccard index, also known as the Jaccard similarity coefficient, is a statistic used for gauging the similarity and diversity of sample sets:
$$
J(C,M)=\frac{|C\cap{M}|}{|C\cup{M}|}=\frac{|C\cap{M}|}{|C|+|M|-|C\cap{M}|},
$$

$|C\cap{M}|:$ number of intercellular interactions between cancer (C) and metastasis (C) tissues

$|C\cup{M}|:$ total number of intercellular interactions that cancer (C) and metastasis (C) tissues can form

#### Undirected jaccard

In [None]:
jaccard_records = []

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')

    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        df = network_stats_undir_network[
            (network_stats_undir_network.tissue_dataset==tissue_dataset) & 
            (network_stats_undir_network.interactions==net_type)
            ]
        
        unique_pairs = df[
            ['cancer_tissue', 'metastasis_tissue', 'simple_interactions']].drop_duplicates().values

        for pair in tqdm(unique_pairs, desc='pairs'):
            
            tissue_pair = pair[:2]
            intersection = pair[2]
            jaccard = intercell_networks.jaccard_index(
                tissue_pair, calls, graph, intersection)
            
            row = dict(
                cancer_tissue=tissue_pair[0],
                metastasis_tissue=tissue_pair[1],
                tissue_dataset=tissue_dataset,
                interactions=net_type,
                jaccard=jaccard)
            jaccard_records.append(row)
            
jaccard = pd.DataFrame(jaccard_records)
jaccard.head()

In [None]:
# merge datasets: 
undir_stats_jaccard = pd.merge(
    network_stats_undir_network,
    jaccard,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'interactions'], how='left')
undir_stats_jaccard.head(2)

In [None]:
undir_stats_jaccard.to_csv(intercell_net_dir + 'undirected_network_stats.csv', index=False)

#### Directed jaccard

In [None]:
jaccard_records = []

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')

    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        for direction in tqdm(network_stats_dir_network.direction.unique(), desc='direction'):
                           
            df = network_stats_dir_network[
                (network_stats_dir_network.tissue_dataset==tissue_dataset) & 
                (network_stats_dir_network.interactions==net_type) &
                (network_stats_dir_network.direction==direction)
            ]
            unique_pairs = df[
                ['cancer_tissue', 'metastasis_tissue', 'directed_interactions']
            ].drop_duplicates().values
        
            for pair in tqdm(unique_pairs):
                tissue_pair = pair[:2]
                intersection = pair[2]
                jaccard = intercell_networks.jaccard_index(
                    tissue_pair, calls, graph, intersection, direction=direction)

                row = dict(
                    cancer_tissue=tissue_pair[0],
                    metastasis_tissue=tissue_pair[1],
                    tissue_dataset=tissue_dataset,
                    interactions=net_type,
                    direction=direction,
                    jaccard=jaccard)
                jaccard_records.append(row)
            
jaccard = pd.DataFrame(jaccard_records)
jaccard.head()

In [None]:
dir_stats_jaccard = pd.merge(
    network_stats_dir_network,
    jaccard,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'interactions', 'direction'], 
    how='left')
dir_stats_jaccard.head(2)

In [None]:
dir_stats_jaccard.to_csv(intercell_net_dir + 'directed_network_stats.csv', index=False)

## Cancer-wise Comparison network stats
In the cancer-wise analysis we need to use the grouped tissues. Since we are correlating network stats with frequency of metastasis, organs with several tissues can influence the final result  

### Compute number of intercellular interactions for each tissue pair

In [None]:
# compute the number of intercell interactions for each pair
importlib.reload(intercell_networks)
network_stats = []

for net_type in tqdm(network_types, desc='network_type'):
    for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
        directory = f'{intercell_net_dir}{net_type}/{tissue_dataset}/grouped'

        stats = intercell_networks.compute_intercell_interactions(
            directory,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ]
        )
        network_stats.extend(stats)
        
network_stats = pd.DataFrame(network_stats)
network_stats.head(2)

In [None]:
# split directed and undirected stats
network_stats_undir = network_stats.drop(['directed_interactions', 'direction'], axis=1).drop_duplicates(ignore_index=True)
network_stats_dir = network_stats.drop('simple_interactions', axis=1)
network_stats_undir.head(2)

In [None]:
# to get all tissue pairs, we'll create a dataframe where we reverse the tissue order
network_stats_undir_rev = network_stats_undir.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)

# for directed interactions we also have to change the entry's direction
network_stats_dir_rev = network_stats_dir.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)
display(network_stats_dir_rev.head())
network_stats_dir_rev['direction'] = np.where(network_stats_dir_rev['direction']=='c_to_m', 'm_to_c', 'c_to_m')
network_stats_dir_rev.head()

In [None]:
# Concatenate reverse dataframes
network_stats_undir_network = pd.concat(
    [network_stats_undir, network_stats_undir_rev], ignore_index=True)
network_stats_dir_network = pd.concat(
    [network_stats_dir, network_stats_dir_rev], ignore_index=True)

network_stats_dir_network.head(2)

### Compute Jaccard index for each tissue pair

The Jaccard index, also known as the Jaccard similarity coefficient, is a statistic used for gauging the similarity and diversity of sample sets:
$$
J(C,M)=\frac{|C\cap{M}|}{|C\cup{M}|}=\frac{|C\cap{M}|}{|C|+|M|-|C\cap{M}|},
$$

$|C\cap{M}|:$ number of intercellular interactions between cancer (C) and metastasis (C) tissues

$|C\cup{M}|:$ total number of intercellular interactions that cancer (C) and metastasis (C) tissues can form

#### Undirected jaccard

In [None]:
importlib.reload(intercell_networks)
jaccard_records = []

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')

    for net_type in tqdm(network_types, desc='network_type'): 

        graph = intercell_graph[net_type]

        df = network_stats_undir_network[
            (network_stats_undir_network.tissue_dataset==tissue_dataset) &
            (network_stats_undir_network.interactions==net_type)]
        
        unique_pairs = df[
            ['cancer_tissue', 'metastasis_tissue', 'simple_interactions']].drop_duplicates().values

        for pair in tqdm(unique_pairs, desc='pairs'):
            
            tissue_pair = pair[:2]
            intersection = pair[2]
            jaccard = intercell_networks.jaccard_index(
                tissue_pair, calls, graph, intersection)
            
            row = dict(
                cancer_tissue=tissue_pair[0],
                metastasis_tissue=tissue_pair[1],
                tissue_dataset=tissue_dataset,
                interactions=net_type,
                jaccard=jaccard)
            jaccard_records.append(row)
            
jaccard = pd.DataFrame(jaccard_records)
jaccard.head()

In [None]:
# merge datasets: 
undir_stats_jaccard = pd.merge(
    network_stats_undir_network,
    jaccard,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'interactions'], how='left')
undir_stats_jaccard.head(2)

In [None]:
undir_stats_jaccard.to_csv(intercell_net_dir + 'undirected_grouped_network_stats.csv', index=False)

#### Directed jaccard

In [None]:
importlib.reload(intercell_networks)

jaccard_records = []
for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')

    for net_type in tqdm(network_types, desc='network_type'): 

        graph = intercell_graph[net_type]

        for direction in tqdm(network_stats_dir_network.direction.unique(), desc='direction'):
                
            df = network_stats_dir_network[
                (network_stats_dir_network.tissue_dataset==tissue_dataset) & 
                (network_stats_dir_network.interactions==net_type) &
                (network_stats_dir_network.direction==direction)
            ]
            unique_pairs = df[
                ['cancer_tissue', 'metastasis_tissue', 'directed_interactions']
            ].drop_duplicates().values
        
            for pair in tqdm(unique_pairs):
                tissue_pair = pair[:2]
                intersection = pair[2]
                jaccard = intercell_networks.jaccard_index(
                    tissue_pair, calls, graph, intersection, direction=direction)

                row = dict(
                    cancer_tissue=tissue_pair[0],
                    metastasis_tissue=tissue_pair[1],
                    tissue_dataset=tissue_dataset,
                    interactions=net_type,
                    direction=direction,
                    jaccard=jaccard)
                jaccard_records.append(row)
            
jaccard = pd.DataFrame(jaccard_records)
jaccard.head()

In [None]:
dir_stats_jaccard = pd.merge(
    network_stats_dir_network,
    jaccard,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'interactions', 'direction'], 
    how='left')
dir_stats_jaccard.head(2)

In [None]:
dir_stats_jaccard.to_csv(intercell_net_dir + 'directed_grouped_network_stats.csv', index=False)

# Random intercellular networks with gene calls

We want to keep the same proportion of source/target genes in the random networks. For that we will use 3 distinct intercellular gene pools:
* genes that can be both source and target
* source only genes
* target only genes

Additionally each gene has a probability of being choosen proportional to the number of times it is expressed in all tissues.

## Undirected interactions

In [None]:
importlib.reload(intercell_networks)
# load intercell network stats
network_stats = pd.read_csv(intercell_net_dir+'undirected_grouped_network_stats.csv')

random_network_stats = []

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')
    # Transform zeros in NaN
    calls = calls.where(calls==1)
    
    for net_type in tqdm(network_types, desc='network_type'):

        graph = intercell_graph[net_type]

        stats = intercell_networks.build_random_grouped_intercell_networks( 
            calls,
            graph,
            directed_graph=False,
            weights=True,
            iterations=1000,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ],
            n_jobs=-1
        )

        random_network_stats.extend(stats)

random_network_stats = pd.DataFrame(random_network_stats)

# to get all tissue pairs, we'll create a dataframe where we reverse the tissue order
random_net_rev = random_network_stats.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)

# Concatenate reverse dataframes
random_net_full = pd.concat(
    [random_network_stats, random_net_rev], ignore_index=True)

# merge random_net with normal_net and compute z-score
network_stats_update = pd.merge(
    random_net_full,
    network_stats,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'interactions'])

# compute z-score
network_stats_update['zscore'] =\
    (network_stats_update['simple_interactions']-network_stats_update['mean'])\
        /network_stats_update['std']

# update network stats file excluding the distribution column
network_stats_update.to_csv(
    intercell_net_dir+'undirected_grouped_network_stats.csv', 
    index=False,
    columns=network_stats_update.columns.drop('dist'))

## Directed interactions

In [None]:
importlib.reload(intercell_networks)
# load intercell network stats
network_stats = pd.read_csv(intercell_net_dir+'directed_grouped_network_stats.csv')

random_network_stats = []

for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    calls = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    calls = calls.pivot_table(values='call_0.4_0.9', index='gene_id', columns='tissue')
    # Transform zeros in NaN
    calls = calls.where(calls==1)
    
    for net_type in tqdm(network_types, desc='network_type'):

        graph = intercell_graph[net_type]

        stats = intercell_networks.build_random_grouped_intercell_networks( 
            calls,
            graph,
            directed_graph=True,
            weights=True,
            iterations=1000,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ],
            n_jobs=-1
        )

        random_network_stats.extend(stats)

random_network_stats = pd.DataFrame(random_network_stats)

# to get all tissue pairs, we'll create a dataframe where we reverse the tissue order
random_net_rev = random_network_stats.copy().rename({
    'cancer_tissue':'metastasis_tissue',
    'metastasis_tissue':'cancer_tissue',
}, axis=1)
random_net_rev['direction'] = np.where(random_net_rev['direction']=='c_to_m', 'm_to_c', 'c_to_m')

# Concatenate reverse dataframes
random_net_full = pd.concat(
    [random_network_stats, random_net_rev], ignore_index=True)

# merge random_net with normal_net and compute z-score
network_stats_update = pd.merge(
    random_net_full,
    network_stats,
    on=['cancer_tissue', 'metastasis_tissue', 'tissue_dataset', 'direction', 'interactions'])

# compute z-score
network_stats_update['zscore'] =\
    (network_stats_update['directed_interactions']-network_stats_update['mean'])\
        /network_stats_update['std']

# update network stats file excluding the distribution column
network_stats_update.to_csv(
    intercell_net_dir+'directed_grouped_network_stats.csv', 
    index=False,
    columns=network_stats_update.columns.drop('dist'))

# Weighted intercellular interactions networks

## Undirected networks

In [None]:
importlib.reload(intercell_networks)

network_stats = []
for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    weights = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/records.csv')
    weights = weights.pivot_table(values='log2_TPM', index='gene_id', columns='tissue')
    weights = weights/np.max(weights.to_numpy(), keepdims=True, axis=1)
        
    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        stats = intercell_networks.weighted_intercell_network(
            weights=weights,
            interactions=graph,
            direction=False,
            extra_labels=[
                ('tissue_dataset', tissue_dataset),
                ('interactions', net_type),
            ]
        )
        network_stats.extend(stats)
    
network_stats = pd.DataFrame(network_stats)

network_stats_rev = network_stats.copy().rename({
    'cancer_tissue': 'metastasis_tissue',
    'metastasis_tissue': 'cancer_tissue',
}, axis=1)
network_stats_full = pd.concat([network_stats, network_stats_rev], ignore_index=True)
network_stats_full.to_csv(intercell_net_dir + 'undirected_weighted_network_stats.csv', index=False)

## Directed networks

In [None]:
importlib.reload(intercell_networks)
network_stats = []
for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    weights = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/records.csv')
    weights = weights.pivot_table(values='log2_TPM', index='gene_id', columns='tissue')
    weights = weights/np.max(weights.to_numpy(), keepdims=True, axis=1)
        
    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        stats = intercell_networks.weighted_intercell_network(
            weights=weights,
            interactions=graph,
            direction=['c_to_m', 'm_to_c'],
            extra_labels=[
                ('tissue_dataset', tissue_dataset),
                ('interactions', net_type),
            ]
        )
        network_stats.extend(stats)
    
network_stats = pd.DataFrame(network_stats)

network_stats_rev = network_stats.copy().rename({
    'cancer_tissue': 'metastasis_tissue',
    'metastasis_tissue': 'cancer_tissue',
}, axis=1)
network_stats_rev['direction'] = np.where(network_stats_rev['direction']=='c_to_m', 'm_to_c', 'c_to_m')
network_stats_full = pd.concat([network_stats, network_stats_rev], ignore_index=True)
network_stats_full.to_csv(intercell_net_dir + 'directed_weighted_network_stats.csv', index=False)

# Random weighted intercellular networks (z-score)

## Undirected interactions

In [103]:
importlib.reload(intercell_networks)
network_stats = []
for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    weights = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    weights = weights.pivot_table(values='log2_TPM', index='gene_id', columns='tissue')
    weights = weights/np.max(weights.to_numpy(), keepdims=True, axis=1)
        
    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        stats = intercell_networks.random_grouped_weighted_intercell_networks(
            weights,
            graph,
            direction=False,
            iterations=1000,
            n_jobs=-1,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ],
        )
        network_stats.extend(stats)


network_stats = pd.DataFrame(network_stats)

network_stats_rev = network_stats.copy().rename({
    'cancer_tissue': 'metastasis_tissue',
    'metastasis_tissue': 'cancer_tissue',
}, axis=1)
network_stats_full = pd.concat([network_stats, network_stats_rev], ignore_index=True)
network_stats_full.to_csv(
    intercell_net_dir + 'undirected_grouped_weighted_network_stats.csv',
    index=False,
    columns=network_stats_full.columns.drop('dist')
)

tissue_dataset:   0%|          | 0/2 [00:00<?, ?it/s]

network_type:   0%|          | 0/2 [00:00<?, ?it/s]

network_type:   0%|          | 0/2 [00:00<?, ?it/s]

## Directed interactions

In [108]:
importlib.reload(intercell_networks)
network_stats = []
for tissue_dataset in tqdm(tissue_datasets, desc='tissue_dataset'):
    
    weights = pd.read_csv(gene_exp_dir+f'{tissue_dataset}/grouped_records.csv')
    weights = weights.pivot_table(values='log2_TPM', index='gene_id', columns='tissue')
    weights = weights/np.max(weights.to_numpy(), keepdims=True, axis=1)
        
    for net_type in tqdm(network_types, desc='network_type'):
        
        graph = intercell_graph[net_type]

        stats = intercell_networks.random_grouped_weighted_intercell_networks(
            weights,
            graph,
            direction=['c_to_m', 'm_to_c'],
            iterations=1000,
            n_jobs=-1,
            extra_labels=[
                ('interactions', net_type),
                ('tissue_dataset', tissue_dataset)
            ],
        )
        network_stats.extend(stats)


network_stats = pd.DataFrame(network_stats)

tissue_dataset:   0%|          | 0/2 [00:00<?, ?it/s]

network_type:   0%|          | 0/2 [00:00<?, ?it/s]

network_type:   0%|          | 0/2 [00:00<?, ?it/s]

In [109]:
network_stats_rev = network_stats.copy().rename({
    'cancer_tissue': 'metastasis_tissue',
    'metastasis_tissue': 'cancer_tissue',
}, axis=1)
network_stats_full = pd.concat([network_stats, network_stats_rev], ignore_index=True)
network_stats_rev['direction'] = np.where(network_stats_rev['direction']=='c_to_m', 'm_to_c', 'c_to_m')
network_stats_full.to_csv(
    intercell_net_dir + 'directed_grouped_weighted_network_stats.csv',
    index=False,
    columns=network_stats_full.columns.drop('dist')
)