In [1]:
import os
import re
import sys
from pathlib import Path

import pandas as pd
import scanpy as sc
import anndata as ad
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [2]:
spatial_omics_folder = (Path().cwd().parents[0]).absolute() / 'data' / 'spatial_omics_graph'
process_path = (Path().cwd().parents[0]).absolute() / 'data' / 'torch_graph_data'

In [3]:
d_dir = (Path().cwd().parents[0].parents[0]).absolute()
data_dir = d_dir / "09_datasets"

p_dir = (Path().cwd().parents[0]).absolute()

In [4]:
%load_ext autoreload
%autoreload 2

module_path = str(p_dir / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

# Load data

In [5]:
# Save data
save_path = (Path().cwd().parents[0]).absolute() / 'data' / 'adata'

# adata_combined = ad.read_h5ad(save_path / 'all.h5ad')
adata_foll = ad.read_h5ad(save_path / 'foll.h5ad')

  utils.warn_names_duplicates("obs")


In [6]:
name_map = {'0': 'Ki67+ B-cell', '4': 'Ki67+', '5': 'Ki67+ Macrophage', '6':'CD20+ Macrophage', '3':'Plasma Cell', '8':'Other', '11':'Macrophage', '1':'B-cell', '7':'FDC', '18':'Dendritic cell', '13':'Other'}

adata_foll.obs['name'] = (
    adata_foll.obs["merged"]
    .map(lambda x: name_map.get(x, x))
    .astype("category")
)

adata_foll.uns["name_colors"] = sc.pl.palettes.vega_20_scanpy[:len(np.unique(adata_foll.obs['name']))]

# Motifs counts

In [7]:
import multiprocessing
import spatial as sm
import palettable
import sklearn 
import networkx as nx

# Import spatial omics library
import athena as ath
from spatialOmics import SpatialOmics
from matplotlib.colors import ListedColormap, hex2color

heatmap_cmp = palettable.cmocean.diverging.Balance_20.mpl_colormap
heatmap_cmp_r = palettable.cmocean.diverging.Balance_20_r.mpl_colormap

n_cpu = multiprocessing.cpu_count()

from itertools import combinations_with_replacement
from grandiso import find_motifs

def generate_list_motifs(PPI_pairs, n=4):
    '''
    Generate list of possible motifs of size n from PPI pairs
    '''
    
    comb = []
    for i in range(3,n+1):
        comb += list(combinations_with_replacement(PPI_pairs, i))
    return comb 

def count_motifs(network, comb):
    motifs = []
    counts = []
    for c in comb:
        motif=nx.complete_graph(len(c))
        labels = {i:marker for i,marker in enumerate(c)}
        nx.set_node_attributes(motif, labels, "label")

        motifs.append(motif)
        counts.append(len(find_motifs(motif, network)))
        
    return motifs, counts

## Motifs

In [88]:
%%time
counts_all = []
ids = []
conditions = []

# Count motfis
cell_types = list(adata_foll.obs['name'].unique())
comb = generate_list_motifs(cell_types, n=4)

# Plot example of spatial projection
for d in adata_foll.obs.Dataset.unique():
    adata_donor = adata_foll[adata_foll.obs.Dataset == d, :]
    
    # Get spatial omics dataset
    dataset = f'07_{d}'
    path = spatial_omics_folder / f'{dataset}.hdf5'
    spadata = SpatialOmics.from_h5py(path)
    
    # Loop through follicles
    for foll in spadata.spl.Foll:
        print(d, foll)
        adata_f = adata_donor[adata_donor.obs.Foll == foll]
        
        spadata.obs[str(foll)]['name'] = adata_f.obs.name.tolist()
        spadata.obs[str(foll)]['name']  = spadata.obs[str(foll)]['name'].astype('category')
        
        # Added colormap to spadata object
        # we have some overhead here as we need to convert to numeric types for the ATHENA framework
        spl = str(foll)
        spadata.obs[spl]['name_id'] = spadata.obs[spl].groupby('name').ngroup().astype('category')
        
        mask = spadata.masks[str(foll)]['cellmasks']
        
        # generate colormap
        labs = spadata.obs[spl].groupby(['name_id']).head(1)[['name_id', 'name']].set_index('name_id').to_dict()
        cmap = ListedColormap([hex2color(i) for i in adata_f.uns['name_colors']])
        spadata.uns['cmaps'].update({'name_id':cmap})
        spadata.uns['cmap_labels'].update({'name_id': labs['name']})
        
        # Get networks
        network = spadata.G[str(foll)]['contact']
        mappings = dict(zip(spadata.obs[str(foll)].Cell.tolist(), spadata.obs[str(foll)].name.tolist()))
        network.remove_edges_from(list(nx.selfloop_edges(network)))
        nx.set_node_attributes(network, mappings , "label")

        # Get counts
        motifs, counts = count_motifs(network, comb)
        
        counts_all.append(counts)
        ids.append(d+'_'+str(foll))

A11 1
A11 2
A11 3
A11 4
A11 5
A11 6
A11 7
A11 8
A11 9
A11 10
A11 11
A11 12
A11 13
A18 1
A18 2
A18 3
A18 4
A18 5
A18 6
A18 7
A18 8
A18 9
A18 10
A18 11
A18 12
A18 13
A18 14
A18 15
A18 16
A18 17
A18 18
A18 19
A18 20
A18 21
A18 22
A18 23
A18 24
A18 25
A21 1
A21 2
A21 3
A21 4
A21 5
A21 6
A21 7
A21 8
A21 9
A21 10
A21 11
A21 12
A21 13
A21 14
A22 1
A22 2
A22 3
A22 4
A22 5
A22 6
A22 7
A22 8
A22 9
A22 10
A22 11
A22 12
A22 13
A22 14
A22 15
A22 16
A22 17
A22 18
A22 19
A22 20
A22 21
A22 22
A22 23
A22 24
A22 25
A22 26
A22 27
A22 28
A6 1
A6 2
A6 3
A6 4
A6 5
A6 6
A6 7
A6 8
A8 1
A8 2
A8 3
A8 4
A8 5
A8 6
A8 7
A8 8
A8 9
A8 10
A8 11
A8 12
A8 13
A8 14
A8 15
A8 16
A8 17
T18 1
T18 2
T18 3
T18 4
T18 5
T18 6
T18 7
T18 8
T18 9
T18 10
T18 11
T18 12
T18 13
T18 14
T18 15
T18 16
T18 17
T18 18
T18 19
T18 20
T18 21
T18 22
T18 23
T18 24
T18 25
T18 26
T18 27
T18 28
T18 29
T18 30
T18 31
T18 32
T18 33
T18 34
T18 35
T18 36
T18 37
T18 38
T18 39
T18 40
T18 41
T18 42
T18 43
T18 44
T18 45
T18 46
T18 47
T18 48
T18 49
T18 50
T1

In [90]:
df_count_motifs = pd.DataFrame(counts_all, index=ids)
df_count_motifs = df_count_motifs.loc[:, (df_count_motifs != 0).any(axis=0)]
df_labels = pd.DataFrame(comb, columns=['CT1', 'CT2', 'CT3', 'CT4'])

In [91]:
df_count_motifs

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,925,926,927,928,929,930,931,932,933,934
A11_1,18,0,56,40,28,0,0,0,0,2,...,0,0,0,0,0,0,0,0,0,0
A11_2,30,2,36,30,10,0,0,8,0,0,...,0,0,0,0,0,0,0,0,0,0
A11_3,18,0,50,0,2,0,0,0,6,0,...,0,0,0,0,0,0,0,0,0,0
A11_4,192,0,118,0,8,0,0,4,0,0,...,0,0,0,0,0,0,0,0,0,0
A11_5,0,0,8,4,12,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
T8_63,0,0,0,0,2,0,0,6,0,0,...,8,0,0,0,0,0,0,0,0,0
T8_64,0,0,6,2,4,0,2,2,0,0,...,0,0,0,0,0,0,0,0,0,0
T8_65,0,0,0,0,0,0,0,0,0,0,...,0,0,0,2,6,0,0,0,0,0
T8_66,0,0,0,0,2,0,2,4,2,2,...,0,0,0,2,0,0,0,0,0,24


In [92]:
save_path = p_dir / 'data' / 'metadata' /  'motifs.csv'
df_count_motifs.to_csv(save_path, index=False)

save_path = p_dir / 'data' / 'metadata' /  'motifs_labels.csv'
df_labels.to_csv(save_path)

## Permutation motifs search

In [11]:
import sklearn 
from joblib import Parallel, delayed

# Count motfis
cell_types = list(adata_foll.obs['name'].unique())
comb = generate_list_motifs(cell_types, n=4)

def permutation_pval(spadata, comb=comb):
    network = spadata.G[str(foll)]['contact']
    mappings = dict(zip(
        spadata.obs[str(foll)].Cell.tolist(), 
        sklearn.utils.shuffle(spadata.obs[str(foll)].name.tolist())
        ))     
    network.remove_edges_from(list(nx.selfloop_edges(network)))
    nx.set_node_attributes(network, mappings , "label")
    # Get counts
    motifs, counts = count_motifs(network, comb)
    return motifs, counts

In [12]:
mean_perm = []
std_perm = []
ids_perm = []
n_perm = 100

# Count motfis
cell_types = list(adata_foll.obs['name'].unique())
comb = generate_list_motifs(cell_types, n=4)

# Plot example of spatial projection
for d in adata_foll.obs.Dataset.unique():
    adata_donor = adata_foll[adata_foll.obs.Dataset == d, :]
    
    # Get spatial omics dataset
    dataset = f'07_{d}'
    path = spatial_omics_folder / f'{dataset}.hdf5'
    spadata = SpatialOmics.from_h5py(path)
    
    # Loop through follicles
    for foll in spadata.spl.Foll:
        print(d, foll)
        adata_f = adata_donor[adata_donor.obs.Foll == foll]
        
        spadata.obs[str(foll)]['name'] = adata_f.obs.name.tolist()
        spadata.obs[str(foll)]['name']  = spadata.obs[str(foll)]['name'].astype('category')
        
        # Added colormap to spadata object
        # we have some overhead here as we need to convert to numeric types for the ATHENA framework
        spl = str(foll)
        spadata.obs[spl]['name_id'] = spadata.obs[spl].groupby('name').ngroup().astype('category')
        
        mask = spadata.masks[str(foll)]['cellmasks']
        
        # generate colormap
        labs = spadata.obs[spl].groupby(['name_id']).head(1)[['name_id', 'name']].set_index('name_id').to_dict()
        cmap = ListedColormap([hex2color(i) for i in adata_f.uns['name_colors']])
        spadata.uns['cmaps'].update({'name_id':cmap})
        spadata.uns['cmap_labels'].update({'name_id': labs['name']})
        
        # Get networks
        network = spadata.G[str(foll)]['contact']
        mappings = dict(zip(spadata.obs[str(foll)].Cell.tolist(), spadata.obs[str(foll)].name.tolist()))
        network.remove_edges_from(list(nx.selfloop_edges(network)))
        nx.set_node_attributes(network, mappings , "label")

        # Get counts
        motifs, counts = zip(*Parallel(n_jobs=-1)(delayed(permutation_pval)(spadata) for i in range(n_perm))) 
        perm = pd.DataFrame(counts).T
        mean = perm.mean(axis=1).T
        std = perm.std(axis=1).T
        
        mean_perm.append(mean)
        std_perm.append(std)
        ids_perm.append(d+'_'+str(foll))

A11 1
A11 2
A11 3
A11 4
A11 5
A11 6
A11 7
A11 8
A11 9
A11 10
A11 11
A11 12
A11 13
A18 1
A18 2
A18 3
A18 4
A18 5
A18 6
A18 7
A18 8
A18 9
A18 10
A18 11
A18 12
A18 13
A18 14
A18 15
A18 16
A18 17
A18 18
A18 19
A18 20
A18 21
A18 22
A18 23
A18 24
A18 25
A21 1
A21 2
A21 3
A21 4
A21 5
A21 6
A21 7
A21 8
A21 9
A21 10
A21 11
A21 12
A21 13
A21 14
A22 1
A22 2
A22 3
A22 4
A22 5
A22 6
A22 7
A22 8
A22 9
A22 10
A22 11
A22 12
A22 13
A22 14
A22 15
A22 16
A22 17
A22 18
A22 19
A22 20
A22 21
A22 22
A22 23
A22 24
A22 25
A22 26
A22 27
A22 28
A6 1
A6 2
A6 3
A6 4
A6 5
A6 6
A6 7
A6 8
A8 1
A8 2
A8 3
A8 4
A8 5
A8 6
A8 7
A8 8
A8 9
A8 10
A8 11
A8 12
A8 13
A8 14
A8 15
A8 16
A8 17
T18 1
T18 2
T18 3
T18 4
T18 5
T18 6
T18 7
T18 8
T18 9
T18 10
T18 11
T18 12
T18 13
T18 14
T18 15
T18 16
T18 17
T18 18
T18 19
T18 20
T18 21
T18 22
T18 23
T18 24
T18 25
T18 26
T18 27
T18 28
T18 29
T18 30
T18 31
T18 32
T18 33
T18 34
T18 35
T18 36
T18 37
T18 38
T18 39
T18 40
T18 41
T18 42
T18 43
T18 44
T18 45
T18 46
T18 47
T18 48
T18 49
T18 50
T1

In [13]:
pd.DataFrame(mean_perm, index=ids_perm)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,925,926,927,928,929,930,931,932,933,934
A11_1,1.98,0.40,12.64,19.48,8.76,0.00,0.10,0.60,0.04,0.10,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
A11_2,1.98,0.52,19.00,12.94,2.12,0.00,0.36,1.10,0.18,0.04,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
A11_3,0.78,0.12,12.00,3.64,0.92,0.00,0.00,0.50,0.16,0.18,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
A11_4,7.26,0.22,69.64,4.60,1.46,0.00,2.84,1.16,0.14,0.00,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
A11_5,0.00,0.00,1.66,2.34,0.32,0.00,0.06,0.00,0.00,0.00,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
T8_63,0.00,0.14,0.08,0.02,0.08,0.02,0.12,0.36,0.00,0.02,...,1.80,0.0,0.04,0.02,0.12,0.0,0.06,0.0,0.00,0.0
T8_64,0.12,0.60,1.28,1.02,0.52,0.00,0.50,0.54,0.00,0.10,...,0.00,0.0,0.00,0.00,0.00,0.0,0.00,0.0,0.00,0.0
T8_65,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,1.84,0.0,0.12,0.18,0.54,0.0,0.00,0.0,0.12,0.0
T8_66,0.00,0.06,0.12,0.04,0.32,0.02,0.28,0.28,0.00,0.02,...,0.20,0.0,0.00,0.02,0.00,0.0,0.00,0.0,0.00,0.0


In [15]:
df_count_perm = pd.DataFrame(mean_perm, index=ids_perm)
df_count_perm = df_count_perm.loc[:, (df_count_perm != 0).any(axis=0)]
df_labels = pd.DataFrame(comb, columns=['CT1', 'CT2', 'CT3', 'CT4'])

In [16]:
save_path = p_dir / 'data' / 'metadata' /  'motifs_perm.csv'
df_count_perm.to_csv(save_path, index=False)

save_path = p_dir / 'data' / 'metadata' /  'motifs_labels_perm.csv'
df_labels.to_csv(save_path)

# Statistical plot

In [43]:
# Get count of cell per follicles
save_path = (Path().cwd().parents[0]).absolute() / 'data' / 'adata' / 'foll.h5ad'
adata = ad.read_h5ad(save_path)

  utils.warn_names_duplicates("obs")


In [44]:
adata.obs

Unnamed: 0,ROI,Cell,Cell_ROI,Dataset,Data,Foll,GC,leiden,merged,name
4868,2,4869,1812,A11,07_A11,5,0,6,6,CD68+CD20+
4883,2,4884,1827,A11,07_A11,5,0,18,18,CD21+
4892,2,4893,1836,A11,07_A11,5,0,1,1,CD20+
4906,2,4907,1850,A11,07_A11,5,0,7,7,CD20+CD21+
4910,2,4911,1854,A11,07_A11,5,0,7,7,CD20+CD21+
...,...,...,...,...,...,...,...,...,...,...
498628,53,498629,10281,T8,07_T8,17,0,21,1,CD20+
498634,53,498635,10287,T8,07_T8,17,0,21,1,CD20+
498664,53,498665,10317,T8,07_T8,17,0,21,1,CD20+
498673,53,498674,10326,T8,07_T8,17,0,21,1,CD20+


In [39]:
df_foll_info = adata.obs.copy()
df_foll_info = df_foll_info.groupby(['Dataset', ''])

Unnamed: 0,ROI,Cell,Cell_ROI,Dataset
0,1,1,1,A11
1,1,2,2,A11
2,1,3,3,A11
3,1,4,4,A11
4,1,5,5,A11
...,...,...,...,...
501726,53,501727,13379,T8
501727,53,501728,13380,T8
501728,53,501729,13381,T8
501729,53,501730,13382,T8


In [35]:
# Melt count 
df_count = df_count_motifs.loc[:, df_count_motifs.mean(axis=0)>100]
df_count.index = df_count.index.set_names(['Follicle'])
df_count = df_count.reset_index()
df_melt = df_count.melt(id_vars=['Follicle'], var_name='PPI motif', value_name='Count')
