In [1]:
import os

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

import scanpy as sc
sc.settings.n_jobs = 56
sc.settings.set_figure_params(dpi=180, dpi_save=300, frameon=False, figsize=(4, 4), fontsize=8, facecolor='white')

import sys
sys.path.append('/home/xingjiepan/Softwares/ZhuangLab/MERFISH_analysis/basic/util')
from plot import get_color_mapping_df

In [2]:
def calc_spatial_fingerprint_df_2d(adata_query, adata_ref, cell_type_col, 
                              slice_col='slice_id', xy_cols=['global_x', 'global_y'],
                              K = 50):
    
    # Convert the cell types to indices in the neighborhood vector
    ct_to_neighbor_id_dict = {}

    unique_cts = np.unique(adata_ref.obs[cell_type_col])
    
    for i in range(len(unique_cts)):
        ct_to_neighbor_id_dict[unique_cts[i]] = i

    L_neighbor_vec = len(unique_cts)
    
    # Generate a matrix of neighborhood cell types
    all_knn_neighor_vecs = np.zeros((adata_query.shape[0], L_neighbor_vec), dtype=np.float32)


    # Iterate through the slices
    slice_ids = np.unique(adata_query.obs[slice_col])
    for slice_id in slice_ids:
        print(f'Generate neighbor vectors for slice {slice_id}')
    
        slice_mask_q = adata_query.obs[slice_col] == slice_id
        df_slice_q = adata_query.obs[slice_mask_q]
        slice_mask_r = adata_ref.obs[slice_col] == slice_id
        df_slice_r = adata_ref.obs[slice_mask_r]
    
        # Calculate spatial K nearest neighbors
        X_spatial_query = np.array(df_slice_q[xy_cols])
        X_spatial_ref = np.array(df_slice_r[xy_cols])

        nbrs = NearestNeighbors(n_neighbors=K, algorithm='ball_tree').fit(X_spatial_ref)
        distances, indices = nbrs.kneighbors(X_spatial_query)
        print('Mean KNN distance =', np.mean(distances))
    
        # Get the cell types of each cell's KNN
        knn_cts = np.array(df_slice_r[cell_type_col])[indices]
    
        # Generate the matrix of neighbor cell types
        knn_neighbor_vecs = np.zeros((df_slice_q.shape[0], L_neighbor_vec))
        for i in range(df_slice_q.shape[0]):
            
            # Estimate the charasteristic neighbor distance
            neighbor_R0 = 2 * np.quantile(distances[i], max(0.1, 5 / K))
            for j in range(K):
                ct_neighbor_id = ct_to_neighbor_id_dict[knn_cts[i, j]]
            
                knn_neighbor_vecs[i, ct_neighbor_id] += np.exp(- (distances[i, j] / neighbor_R0) ** 2)
            
        all_knn_neighor_vecs[slice_mask_q] = knn_neighbor_vecs
        
    return pd.DataFrame(all_knn_neighor_vecs, adata_query.obs.index, list(unique_cts))

In [3]:
# Load the data
adata = sc.read_h5ad('/home/xingjiepan/data/whole_brain/analysis/20230723_final_integration/integration_workspace/adata_merfish_label_transfer.h5ad')
adata

AnnData object with n_obs × n_vars = 9343481 × 1122
    obs: 'integration_partition_transfer', 'integration_partition_confidence', 'subclass_label_transfer', 'subclass_label_confidence', 'cl_transfer', 'cl_confidence', 'cluster_cosine_similarity', 'adjusted_subclass_label_confidence', 'adjusted_cl_confidence'

In [4]:
# Filter out low quality and poorly mapped cells
adata = adata[adata.obs['adjusted_subclass_label_confidence'] > 0.8]
adata = adata[adata.obs['adjusted_cl_confidence'] > 0.5]
adata = adata[adata.obs['subclass_label_transfer'] != 'LQ']
adata

View of AnnData object with n_obs × n_vars = 6461549 × 1122
    obs: 'integration_partition_transfer', 'integration_partition_confidence', 'subclass_label_transfer', 'subclass_label_confidence', 'cl_transfer', 'cl_confidence', 'cluster_cosine_similarity', 'adjusted_subclass_label_confidence', 'adjusted_cl_confidence'

In [5]:
# Get the spatial coordinates from the metadata file
cell_metadata_df = pd.read_csv('/home/xingjiepan/data/whole_brain/MERFISH/20230602_cellpose2/cell_metadata_cp2.csv', 
                               index_col=0)
adata.obs['slice_id'] = cell_metadata_df['slice_id']
adata.obs['center_x'] = cell_metadata_df['center_x']
adata.obs['center_y'] = cell_metadata_df['center_y']

adata = adata[~adata.obs['slice_id'].isna()]
adata

  adata.obs['slice_id'] = cell_metadata_df['slice_id']


View of AnnData object with n_obs × n_vars = 6461532 × 1122
    obs: 'integration_partition_transfer', 'integration_partition_confidence', 'subclass_label_transfer', 'subclass_label_confidence', 'cl_transfer', 'cl_confidence', 'cluster_cosine_similarity', 'adjusted_subclass_label_confidence', 'adjusted_cl_confidence', 'slice_id', 'center_x', 'center_y'

In [6]:
# Remove cell types that are not informative for defining spatial modules in the reference cell types
subclasses_to_remove = ['LQ', 'Endo NN', 'SMC NN', 'Peri NN', 'VLMC NN', 'BAM NN', 'DC NN', 'Lymphoid NN',
                       'Microglia NN', 'Monocytes NN',]
adata_ref = adata[~adata.obs['subclass_label_transfer'].isin(subclasses_to_remove)]
adata_ref

View of AnnData object with n_obs × n_vars = 5198857 × 1122
    obs: 'integration_partition_transfer', 'integration_partition_confidence', 'subclass_label_transfer', 'subclass_label_confidence', 'cl_transfer', 'cl_confidence', 'cluster_cosine_similarity', 'adjusted_subclass_label_confidence', 'adjusted_cl_confidence', 'slice_id', 'center_x', 'center_y'

In [7]:
%%time
# Generate data frames of spatial fingerprint vectors
spatial_fingerprint_df_subclass = calc_spatial_fingerprint_df_2d(adata, adata_ref, 'subclass_label_transfer',
                                                                xy_cols=['center_x', 'center_y'])

Generate neighbor vectors for slice co1_slice1
Mean KNN distance = 54.77528023572422
Generate neighbor vectors for slice co1_slice10
Mean KNN distance = 61.3117372504746
Generate neighbor vectors for slice co1_slice11
Mean KNN distance = 60.17967477710854
Generate neighbor vectors for slice co1_slice12
Mean KNN distance = 70.86916604299168
Generate neighbor vectors for slice co1_slice13
Mean KNN distance = 90.81309210578625
Generate neighbor vectors for slice co1_slice14
Mean KNN distance = 86.35911360053306
Generate neighbor vectors for slice co1_slice15
Mean KNN distance = 89.51864100330845
Generate neighbor vectors for slice co1_slice16
Mean KNN distance = 87.09553513523981
Generate neighbor vectors for slice co1_slice17
Mean KNN distance = 101.34208421093629
Generate neighbor vectors for slice co1_slice18
Mean KNN distance = 116.656991857002
Generate neighbor vectors for slice co1_slice19
Mean KNN distance = 94.51453399616733
Generate neighbor vectors for slice co1_slice2
Mean KNN 

Mean KNN distance = 82.21492717476796
Generate neighbor vectors for slice co2_slice124
Mean KNN distance = 84.46280557981443
Generate neighbor vectors for slice co2_slice125
Mean KNN distance = 81.4079114690481
Generate neighbor vectors for slice co2_slice126
Mean KNN distance = 81.13990467421196
Generate neighbor vectors for slice co2_slice127
Mean KNN distance = 84.93794821085501
Generate neighbor vectors for slice co2_slice128
Mean KNN distance = 85.5336165485221
Generate neighbor vectors for slice co2_slice129
Mean KNN distance = 79.31510204877885
Generate neighbor vectors for slice co2_slice13
Mean KNN distance = 57.67032914030486
Generate neighbor vectors for slice co2_slice130
Mean KNN distance = 78.06763939608865
Generate neighbor vectors for slice co2_slice131
Mean KNN distance = 84.12422469557396
Generate neighbor vectors for slice co2_slice132
Mean KNN distance = 82.93547460769177
Generate neighbor vectors for slice co2_slice133
Mean KNN distance = 78.40859188185601
Generate

Mean KNN distance = 85.22065173348885
Generate neighbor vectors for slice co2_slice75
Mean KNN distance = 78.83236218561412
Generate neighbor vectors for slice co2_slice76
Mean KNN distance = 81.65992971407955
Generate neighbor vectors for slice co2_slice77
Mean KNN distance = 83.1663811356352
Generate neighbor vectors for slice co2_slice78
Mean KNN distance = 77.02205797315139
Generate neighbor vectors for slice co2_slice79
Mean KNN distance = 80.83403925168932
Generate neighbor vectors for slice co2_slice8
Mean KNN distance = 52.98449967384243
Generate neighbor vectors for slice co2_slice80
Mean KNN distance = 74.98566468266492
Generate neighbor vectors for slice co2_slice81
Mean KNN distance = 81.54603431482138
Generate neighbor vectors for slice co2_slice82
Mean KNN distance = 85.84025046862631
Generate neighbor vectors for slice co2_slice83
Mean KNN distance = 82.7533078900878
Generate neighbor vectors for slice co2_slice84
Mean KNN distance = 86.48412726607539
Generate neighbor v

In [8]:
%%time
# Build a Anndata for the KNN cell types
adata_spatial_fingerprint_subclass = sc.AnnData(spatial_fingerprint_df_subclass)
adata_spatial_fingerprint_subclass.obs = adata.obs.copy()
adata_spatial_fingerprint_subclass.write('adata_spatial_fingerprint_subclass_raw.h5ad')

CPU times: user 8.26 s, sys: 3.97 s, total: 12.2 s
Wall time: 12.2 s
