# DeepCOLOR analysis with Resolve A1-1 data

## Import libraries

In [2]:
import torch
import scanpy as sc
import numpy as np
import importlib
from matplotlib import pyplot as plt
import deepcolor
np.random.seed(1)
torch.manual_seed(1)
import pandas as pd
import plotly.graph_objects as go

## Load data

Load in the scRNA-seq data and the Resolve spatial data. To skip the preprocessing and training of the data, move straight to [here](#trained_data)

In [2]:
sc_adata = sc.read_h5ad('data/liver_mouseStSt_guilliams2022_withZonation.h5ad')
sp_adata = sc.read_h5ad('data/A1-1.h5ad')

### Preprocessing

Change the layer names for DeepCOLR and convert the type to float64.

In [None]:
sp_adata.layers['count'] = sp_adata.layers.pop('raw_counts')
sp_adata.layers['count'] = sp_adata.layers['count'].astype('float64')
sc_adata.layers['count'] = sc_adata.layers.pop('counts')

Calculate the log1p for the scRNA-seq data.

In [None]:
sc.pp.log1p(sc_adata)

#### Cell type annotations

Intersect the cell type annotations.
Begin by changing the annotation names of the spatial data to match the single cell.

In [None]:
sub_mapper = {'Kupffer cells':"KCs", "central_vein_EC45":"Central Vein Endothelial cells","portal_vein_EC45":"Portain Vein Endothelial cells","capsular_fibroblasts45":"Capsule fibroblasts","HepatocytesCentral":"Hepatocytes_central","HepatocytesPortal":"Hepatocytes_portal","LSEC Portal":"LSECs_portal","LSEC Central":'LSECs_central','VSMC':'VSMCs','stellateAll':"Stellate cells","stellate PeriCentral":"Stellate cells_central","stellate PeriPortal":"Stellate cells_portal","FibroblastsCentral": 'Fibroblast','fibroblastPortal':'Fibroblast',"Portal LAM macrophages":"MoMac1","Capsule and Central Vein Mac":"MoMac2", 'LECs':'Lymphatic Endothelial cells'}
mapper = {}
for col in sp_adata.obs['annotationSave'].cat.categories:
    if col not in sub_mapper:
        mapper[col] = col
    else:
        mapper[col] = sub_mapper[col]
sp_adata.obs['annotationSave'] = sp_adata.obs['annotationSave'].map(mapper).astype('category')

Intersect the two cell type annotations categories and only take the cells that are annotated with these intersecting cell types.

In [None]:
cells_keep = list(set(sc_adata.obs['annot_fine_zonated'].cat.categories) & set(sp_adata.obs['annotationSave'].cat.categories))

In [None]:
sc_adata = sc_adata[sc_adata.obs['annot_fine_zonated'].isin(cells_keep)]
sp_adata = sp_adata[sp_adata.obs['annotationSave'].isin(cells_keep)]

Remove any noise and only take genes that are expressed in both scRNA-seq and spatial data.

In [160]:
sc_adata = sc_adata[:, sc_adata.layers['count'].toarray().sum(axis=0) > 10]
sp_adata = sp_adata[:, sp_adata.layers['count'].sum(axis=0) > 10]
common_genes = np.intersect1d(sc_adata.var_names, sp_adata.var_names)
sc_adata = sc_adata[:, common_genes]
sp_adata = sp_adata[:, common_genes]

## Estimate the spatial distribution

Train the autoencoder with the data. The resultant scRNA-seq have `map2sp`, the probability of a cell in a spot, and `p_mat`, the colocalization matrix in `obsm`.

In [None]:
importlib.reload(deepcolor)
sc_adata, sp_adata = deepcolor.estimate_spatial_distribution(sc_adata, sp_adata, param_save_path='data/opt_params.pt', first_epoch=500, second_epoch=500, layer_name='count')

Loss: 82.7280044555664
Start first opt
loss at epoch 0 is 51.67353057861328
loss at epoch 10 is 36.75598907470703
loss at epoch 20 is 35.0432243347168
loss at epoch 30 is 34.37825393676758
loss at epoch 40 is 33.98917770385742
loss at epoch 50 is 33.78602981567383
loss at epoch 60 is 33.601844787597656
loss at epoch 70 is 33.49480056762695
loss at epoch 80 is 33.610107421875
loss at epoch 90 is 33.515098571777344
loss at epoch 100 is 33.39932632446289
loss at epoch 110 is 33.35929870605469
loss at epoch 120 is 33.369285583496094
loss at epoch 130 is 33.288700103759766
loss at epoch 140 is 33.251243591308594
loss at epoch 150 is 33.25636291503906
loss at epoch 160 is 33.246131896972656
loss at epoch 170 is 33.286502838134766
loss at epoch 180 is 33.2514533996582
loss at epoch 190 is 33.22034454345703
loss at epoch 200 is 33.16120147705078
loss at epoch 210 is 33.223854064941406
loss at epoch 220 is 33.24793243408203
loss at epoch 230 is 33.216094970703125
loss at epoch 240 is 33.1920700

## Read in the trained data

<a id='trained_data'></a>

Load the preprocessed and raw data instead of the raw data.

In [None]:
# Run the below to save the trained data to a new file
# sc_adata.write('data/deepcolor_mouseStSt.h5ad', compression='gzip')
# sp_adata.write('data/deepcolor_A1-1.h5ad', compression='gzip')

In [None]:
# scRNA-seq trained data
sc_adata = sc.read_h5ad('data/deepcolor_mouseStSt.h5ad')
# Spatial trained data
sp_adata = sc.read_h5ad('data/deepcolor_A1-1.h5ad')

## Calculate proximal cell communications

First load the ligand-target matrix of NicheNet. This matrix is taken from NicheNet v2 instead of DeepCOLOR's matrix.

In [None]:
#! wget -O data/ligand_target_df.csv https://www.dropbox.com/s/2z7ogbks4504iya/ligand_target_df.csv?dl=0
#lt_df = pd.read_csv('data/ligand_target_df.csv', index_col=0)
lt_df = pd.read_csv('data/ligand_target_matrix.csv', index_col=0)

Set KCs, LAM (MoMac1), and central vein and capsule macrophages (MoMac2) as receivers.

The figure below show the full result.

In [None]:
importlib.reload(deepcolor)
# KCs, MoMac1 & 2
fig, coexp_cc_df = deepcolor.calculate_proximal_cell_communications(sc_adata, 'annot_fine_zonated', lt_df, ["KCs", 'MoMac1', 'MoMac2'], celltype_sample_num=500, ntop_genes=4000, each_display_num=3, role="receiver", edge_thresh=1)
fig

  sc_adata = sc_adata[sc_adata.obs.groupby(celltype_label).sample(celltype_sample_num, replace=True).index]
  utils.warn_names_duplicates("obs")
  ligand_adata.layers['activity'] = make_top_values(top_exps @ lt_df)
  coexp_cc_df = coexp_df.groupby(['cell2_type', 'cell1_type']).sum()
  sub_coexp_cc_df = coexp_cc_df.sort_values('coactivity', ascending=False).groupby('cell2_type', as_index=False).head(n=each_display_num)


### Save coexpression result

Save the resultant dataframe to a csv file.

In [None]:
coexp_cc_df.to_csv('result/final_coexp_500epochs.csv')

Laod in the dataframe to only look at the final result.

In [7]:
coexp_cc_df = pd.read_csv('result/final_coexp_500epochs.csv', index_col=0)

### DeepCOLOR colocalization result

The full table (top 20 on coactivity score) is reported here.

In [18]:
coexp_cc_df.sort_values('coactivity', ascending=False).head(20)

Unnamed: 0,cell1_type,cell2_type,ligand,coactivity
134,MoMac2,cDC2s,Cd209a,36407
6456,MoMac2,Monocytes,F13a1,31980
2106,MoMac2,Lymphatic Endothelial cells,Cd9,29695
4281,MoMac2,Capsule fibroblasts,Dcn,27053
9994,MoMac2,Stellate cells_central,Igfbp3,25982
15881,MoMac2,Stellate cells_central,Reln,25939
4948,MoMac2,Stellate cells_central,Dcn,23655
4941,KCs,Stellate cells_central,Dcn,23577
4107,MoMac2,Stellate cells_central,Cxcl12,23162
10023,MoMac2,Stellate cells_portal,Igfbp3,22311


The final result filtered for KCs is shown below. This is used for Table 3.5 in the dissertation.

In [8]:
coexp_cc_df[(coexp_cc_df['cell1_type']=='KCs')&(coexp_cc_df['coactivity']>0)].sort_values('coactivity', ascending=False).head(20)

Unnamed: 0,cell1_type,cell2_type,ligand,coactivity
4941,KCs,Stellate cells_central,Dcn,23577
6449,KCs,Monocytes,F13a1,19151
4970,KCs,Stellate cells_portal,Dcn,18874
10016,KCs,Stellate cells_portal,Igfbp3,17496
9987,KCs,Stellate cells_central,Igfbp3,15446
11669,KCs,Stellate cells_central,Lpl,6801
4100,KCs,Stellate cells_central,Cxcl12,4242
11698,KCs,Stellate cells_portal,Lpl,4147
4129,KCs,Stellate cells_portal,Cxcl12,3905
11495,KCs,Monocytes,Lpl,3310


#### Find the unique ligand-receptor pairs for KCs

In [10]:
# Cell types and ligands for the two macrophages
momac = coexp_cc_df[coexp_cc_df['cell1_type'].isin(['MoMac1','MoMac2'])&(coexp_cc_df['coactivity']>0)]

In [11]:
# Result for KCs
kc = coexp_cc_df[(coexp_cc_df['cell1_type']=='KCs')&(coexp_cc_df['coactivity']>0)]
# Iter over the two macrophages and remove any rows from the kc that exist in the two macrophages
for _, momac_row in momac.iterrows():
    mo_cell = momac_row['cell2_type']
    mo_ligand = momac_row['ligand']
    for index, row in kc.iterrows():
        cell = row['cell2_type']
        ligand = row['ligand']
        if cell == mo_cell and ligand == mo_ligand:
            kc.drop(index, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kc.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kc.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kc.drop(index, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  kc.drop(index, inplace=True)
A value is trying to be set on a copy of a slice fro

In [15]:
kc.sort_values('coactivity', ascending=False)

Unnamed: 0,cell1_type,cell2_type,ligand,coactivity
11205,KCs,Hepatocytes_portal,Lpl,19
2940,KCs,Lymphatic Endothelial cells,Col1a1,14
3752,KCs,LSECs_portal,Cxcl12,11
8566,KCs,Central Vein Endothelial cells,Hgf,3
11321,KCs,LSECs_portal,Lpl,3
8044,KCs,Mig. cDCs,Gdf15,2
12017,KCs,Hepatocytes_central,Mmp12,2
2679,KCs,Central Vein Endothelial cells,Col1a1,1
8537,KCs,cDC2s,Hgf,1
12162,KCs,LSECs_portal,Mmp12,1


#### visualize results

The visualization result given by DeepCOLOR is difficult to visualize, as it contains the entire result and reports the two macrophages as receivers as well.
Therefore, only the top 20 results with KCs as the receiver are visualized in this section.

In [None]:
kc2 = coexp_cc_df[(coexp_cc_df['cell1_type']=='KCs')&(coexp_cc_df['coactivity']>0)].sort_values('coactivity', ascending=False).head(20)

In [None]:
link1 = kc2.groupby(['cell2_type', 'ligand'])['cell1_type'].count().reset_index()
link1 = link1.rename(columns={'cell2_type':'source','ligand':'target','cell1_type':'value'})
link1 = link1[link1['value']>0]
link2 = kc2.groupby(['ligand', 'cell1_type'])['cell2_type'].count().reset_index()
link2 = link2.rename(columns={'cell2_type':'value','ligand':'source','cell1_type':'target'})
link2 = link2[link2['value']>0]
link = pd.concat([link1, link2], ignore_index=True)
unique_source_target = list(pd.unique(link[['source', 'target']].values.ravel('K')))
mapping_dict = {k: v for v, k in enumerate(unique_source_target)}
link['source'] = link['source'].map(mapping_dict)
link['target'] = link['target'].map(mapping_dict)
links_dict = link.to_dict(orient='list')

In [None]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = unique_source_target,
      
    ),
    link = dict(
      source = links_dict["source"],
      target = links_dict["target"],
      value = links_dict["value"],
  
  ))])

fig.update_layout(font_size=15, width=1000, height=600)
fig.show()