In [1]:
import os
import scanpy as sc
import pandas as pd
import numpy as np
import anndata as ad
import seaborn as sns
import scipy.sparse as sp
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

sc.settings.verbosity = 0
sc.settings.set_figure_params(dpi=50, dpi_save=500)
from matplotlib import rcParams
rcParams["figure.dpi"] = 50
rcParams["savefig.dpi"] = 500

In [2]:
data_dir = '../../../Data/Spatial/Transcriptomics/MERFISH_WMB_Zhang2023/animal3_sagittal/'
if not os.path.exists(data_dir + 'processed/'):
    os.makedirs(data_dir + 'processed/')

In [3]:
adata_concat = ad.read_h5ad(data_dir + 'WB_MERFISH_animal3_sagittal.h5ad')
adata_concat

AnnData object with n_obs × n_vars = 2081549 × 1122
    obs: 'organism_ontology_term_id', 'donor_id', 'development_stage_ontology_term_id', 'sex_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'disease_ontology_term_id', 'tissue_ontology_term_id', 'cell_type_ontology_term_id', 'assay_ontology_term_id', 'suspension_type', 'cluster_id_transfer', 'subclass_transfer', 'cluster_confidence_score', 'subclass_confidence_score', 'high_quality_transfer', 'major_brain_region', 'ccf_region_name', 'brain_section_label', 'is_primary_data', 'tissue_type', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid'
    var: 'gene_name', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type'
    uns: 'citation', 'schema_reference', 'schema_version', 'title'
    obsm: 'X_CCF', 'X_spatial_coords', 'X_umap'

In [None]:
adata_concat_new = ad.AnnData(X=adata_concat.raw.X.copy())
adata_concat_new.obs_names = adata_concat.obs_names.copy()
adata_concat_new.var_names = list(adata_concat.var['gene_name'].reindex(adata_concat.var_names))

adata_concat_new.var['feature_name'] = adata_concat.var['feature_name'].copy()
adata_concat_new.var['feature_type'] = adata_concat.var['feature_type'].copy()

obs_list = ['donor_id', 'cell_type_ontology_term_id', 'tissue_ontology_term_id', 
            'subclass_transfer', 'major_brain_region', 'ccf_region_name', 'brain_section_label',
            'cell_type', 'tissue', 'development_stage']
for key in obs_list:
    adata_concat_new.obs[key] = adata_concat.obs[key].copy()
    
adata_concat_new.obsm['spatial'] = adata_concat.obsm['X_spatial_coords']
adata_concat_new.obsm['spatial_ccf'] = adata_concat.obsm['X_CCF']
adata_concat_new

AnnData object with n_obs × n_vars = 2081549 × 1122
    obs: 'donor_id', 'cell_type_ontology_term_id', 'tissue_ontology_term_id', 'subclass_transfer', 'major_brain_region', 'ccf_region_name', 'brain_section_label', 'cell_type', 'tissue', 'development_stage'
    var: 'feature_name', 'feature_type'
    obsm: 'spatial', 'spatial_ccf'

In [5]:
slice_name_list = sorted(set(adata_concat_new.obs['brain_section_label']))
slice_name_list

['C57BL6J-3.001',
 'C57BL6J-3.002',
 'C57BL6J-3.003',
 'C57BL6J-3.004',
 'C57BL6J-3.005',
 'C57BL6J-3.006',
 'C57BL6J-3.007',
 'C57BL6J-3.008',
 'C57BL6J-3.009',
 'C57BL6J-3.010',
 'C57BL6J-3.011',
 'C57BL6J-3.012',
 'C57BL6J-3.013',
 'C57BL6J-3.015',
 'C57BL6J-3.016',
 'C57BL6J-3.017',
 'C57BL6J-3.019',
 'C57BL6J-3.020',
 'C57BL6J-3.021',
 'C57BL6J-3.022',
 'C57BL6J-3.023',
 'C57BL6J-3.024',
 'C57BL6J-3.025']

In [None]:
for slice_name in slice_name_list:

    adata = adata_concat_new[adata_concat_new.obs['brain_section_label'] == slice_name, :].copy()
    print(f"{slice_name}: {adata.shape}")

#     adata.write_h5ad(data_dir + f'processed/{slice_name}.h5ad')

# with open(data_dir + "processed/slice_name_list.txt", "w") as f:
#     for file_name in slice_name_list:
#         f.write(file_name + '\n')

C57BL6J-3.001: (107775, 1122)
C57BL6J-3.002: (128295, 1122)
C57BL6J-3.003: (146832, 1122)
C57BL6J-3.004: (155538, 1122)
C57BL6J-3.005: (156668, 1122)
C57BL6J-3.006: (127327, 1122)
C57BL6J-3.007: (137117, 1122)
C57BL6J-3.008: (126309, 1122)
C57BL6J-3.009: (113720, 1122)
C57BL6J-3.010: (110927, 1122)
C57BL6J-3.011: (105954, 1122)
C57BL6J-3.012: (93532, 1122)
C57BL6J-3.013: (91250, 1122)
C57BL6J-3.015: (92271, 1122)
C57BL6J-3.016: (70494, 1122)
C57BL6J-3.017: (65952, 1122)
C57BL6J-3.019: (57649, 1122)
C57BL6J-3.020: (52960, 1122)
C57BL6J-3.021: (46707, 1122)
C57BL6J-3.022: (34694, 1122)
C57BL6J-3.023: (24540, 1122)
C57BL6J-3.024: (20838, 1122)
C57BL6J-3.025: (14200, 1122)


In [7]:
nan_rows = np.any(np.isnan(adata_concat_new.obsm['spatial_ccf']), axis=1)

print("indices:", np.where(nan_rows)[0])
print("number:", np.sum(nan_rows))

indices: [  45378   45379   45380 ... 2081546 2081547 2081548]
number: 25179
