In [1]:
import cellxgene_census
import tiledbsoma as soma
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix

## QC results

Light QC on test build shows no issues

Checks

✅ High-level data is accesible and looks in order for `census["census_info"]` and `census["census_data"]`

✅ Expected increase in cells from latest LTS 154M to 211M

✅ Similar expected increase in other categories (datasets, donors, unique cells)

❌ Duplicated rows for `homo_sapiens` and `mus_musculus` in `census_new["census_info"]["organisms"]`

✅ All assays increased in N cells, no decreases 

✅ Normalized layer was properly created

✅ Normalized layer was properly created for full-gene sequencing asssays

✅ Spot checking obs and var contents across all organisms shows compliance with Discover schema 7.0.0

✅ Spot checking expression matrices across all organisms shows expected values

✅ Anndata exporting works for all organisms
 


In [2]:
DEFAULT_TILEDB_CONFIGURATION = {
    # https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
    "py.init_buffer_bytes": 1 * 1024**3,
    "soma.init_buffer_bytes": 1 * 1024**3,
    # S3 requests should not be signed, since we want to allow anonymous access
    "vfs.s3.no_sign_request": "false",
    "vfs.s3.region": "us-west-2",
}
ctx = soma.options.SOMATileDBContext().replace(tiledb_config=DEFAULT_TILEDB_CONFIGURATION)
census_old = cellxgene_census.open_soma(census_version="2025-01-30")
census_new = cellxgene_census.open_soma(uri="s3://cellxgene-census-dev/cell-census/2025-10-27/soma", context=ctx)

## Checking census["info"]

In [3]:
census_old["census_info"]["summary"].read().concat().to_pandas()

Unnamed: 0,soma_joinid,label,value
0,0,census_schema_version,2.1.0
1,1,census_build_date,spatial-build-rc3-io2-2025-01-30T21:20:53Z
2,2,dataset_schema_version,5.2.0
3,3,total_cell_count,154437194
4,4,unique_cell_count,85809959
5,5,number_donors_homo_sapiens,390
6,6,number_donors_mus_musculus,172


In [4]:
census_new["census_info"]["summary"].read().concat().to_pandas()

Unnamed: 0,soma_joinid,label,value
0,0,census_schema_version,2.4.0
1,1,census_build_date,2025-10-27
2,2,dataset_schema_version,7.0.0
3,3,total_cell_count,211913318
4,4,unique_cell_count,124710875


In [5]:
census_new["census_info"]["organisms"].read().concat().to_pandas()

Unnamed: 0,soma_joinid,organism_ontology_term_id,organism_label,organism
0,0,NCBITaxon:9606,Homo sapiens,homo_sapiens
1,1,NCBITaxon:10090,Mus musculus,mus_musculus
2,2,NCBITaxon:9606,Homo sapiens,homo_sapiens
3,3,NCBITaxon:10090,Mus musculus,mus_musculus
4,4,NCBITaxon:9483,Callithrix jacchus,callithrix_jacchus
5,5,NCBITaxon:9544,Macaca mulatta,macaca_mulatta
6,6,NCBITaxon:9598,Pan troglodytes,pan_troglodytes


In [6]:
census_old["census_info"]["datasets"].read().concat().to_pandas()

Unnamed: 0,soma_joinid,citation,collection_id,collection_name,collection_doi,collection_doi_label,dataset_id,dataset_version_id,dataset_title,dataset_h5ad_path,dataset_total_cell_count
0,0,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,4eb29386-de81-452f-b3c0-e00844e8c7fd,f76861bb-becb-4eb7-82fc-782dc96ccc7f,Spatial transcriptomics in mouse: Puck_191112_05,4eb29386-de81-452f-b3c0-e00844e8c7fd.h5ad,10888
1,1,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,78d59e4a-82eb-4a61-a1dc-da974d7ea54b,7d7ec1b6-6e3f-4aaa-9442-4b22f3424396,Spatial transcriptomics in mouse: Puck_191112_08,78d59e4a-82eb-4a61-a1dc-da974d7ea54b.h5ad,10250
2,2,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,add5eb84-5fc9-4f01-982e-a346dd42ee82,de54aed8-4f73-48f6-9229-418a840e2d82,Spatial transcriptomics in mouse: Puck_191109_20,add5eb84-5fc9-4f01-982e-a346dd42ee82.h5ad,12906
3,3,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,b020294c-ab82-4547-b5a7-63d8ffa575ed,abe4fce1-0859-4a56-ad1e-734d79f0e6c8,Spatial transcriptomics in mouse: Puck_191112_13,b020294c-ab82-4547-b5a7-63d8ffa575ed.h5ad,15161
4,4,Publication: https://doi.org/10.1038/s41591-02...,a96133de-e951-4e2d-ace6-59db8b3bfb1d,HTAN/HTAPP Broad - Spatio-molecular dissection...,10.1038/s41591-024-03215-z,Klughammer et al. (2024) Nat Med,d7476ae2-e320-4703-8304-da5c42627e71,863fc5e4-bd4a-4681-9c3d-0ee7ef54e327,HTAPP-330-SMP-1082 scRNA-seq,d7476ae2-e320-4703-8304-da5c42627e71.h5ad,565
...,...,...,...,...,...,...,...,...,...,...,...
1568,1568,Publication: https://doi.org/10.1038/s41586-02...,45d5d2c3-bc28-4814-aed6-0bb6f0e11c82,A single-cell transcriptional timelapse of mou...,10.1038/s41586-024-07069-w,Qiu et al. (2024) Nature,dcfa2614-7ca7-4d82-814c-350626eccb26,3002a659-a1a9-4406-9976-99e658e1fbb5,Major cell cluster: Mesoderm,dcfa2614-7ca7-4d82-814c-350626eccb26.h5ad,3267338
1569,1569,Publication: https://doi.org/10.1126/science.a...,e5f58829-1a66-40b5-a624-9046778e74f5,Tabula Sapiens,10.1126/science.abl4896,The Tabula Sapiens Consortium* et al. (2022) S...,53d208b0-2cfd-4366-9866-c3c6114081bc,10df7690-6d10-4029-a47e-0f071bb2df83,Tabula Sapiens - All Cells,53d208b0-2cfd-4366-9866-c3c6114081bc.h5ad,1136218
1570,1570,Publication: https://doi.org/10.1038/s41586-02...,45d5d2c3-bc28-4814-aed6-0bb6f0e11c82,A single-cell transcriptional timelapse of mou...,10.1038/s41586-024-07069-w,Qiu et al. (2024) Nature,dcfd4feb-18a3-4b30-81d7-1b0c544a8ab3,3817734b-0f82-433b-8c38-55b214200fff,Whole dataset: Raw counts only,dcfd4feb-18a3-4b30-81d7-1b0c544a8ab3.h5ad,11441407
1571,1571,Publication: https://doi.org/10.1038/s41593-02...,1ca90a2d-2943-483d-b678-b809bf464c30,SEA-AD: Seattle Alzheimer’s Disease Brain Cell...,10.1038/s41593-024-01774-5,Gabitto et al. (2024) Nat Neurosci,c2876b1b-06d8-4d96-a56b-5304f815b99a,c32964d2-3339-441f-8e56-7177234c7876,Whole Taxonomy - MTG: Seattle Alzheimer's Dise...,c2876b1b-06d8-4d96-a56b-5304f815b99a.h5ad,1226855


In [7]:
census_new["census_info"]["datasets"].read().concat().to_pandas()

Unnamed: 0,soma_joinid,citation,collection_id,collection_name,collection_doi,collection_doi_label,dataset_id,dataset_version_id,dataset_title,dataset_h5ad_path,dataset_total_cell_count
0,0,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,4eb29386-de81-452f-b3c0-e00844e8c7fd,66699060-0389-4fbd-b3a5-196b3b4e32d6,Spatial transcriptomics in mouse: Puck_191112_05,4eb29386-de81-452f-b3c0-e00844e8c7fd.h5ad,10888
1,1,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,78d59e4a-82eb-4a61-a1dc-da974d7ea54b,f64950a2-a3c8-490a-8431-7121eeb4f5f4,Spatial transcriptomics in mouse: Puck_191112_08,78d59e4a-82eb-4a61-a1dc-da974d7ea54b.h5ad,10250
2,2,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,add5eb84-5fc9-4f01-982e-a346dd42ee82,781a724a-b0f5-46c4-9a13-e6293ef4364f,Spatial transcriptomics in mouse: Puck_191109_20,add5eb84-5fc9-4f01-982e-a346dd42ee82.h5ad,12906
3,3,Publication: https://doi.org/10.1016/j.isci.20...,8e880741-bf9a-4c8e-9227-934204631d2a,High Resolution Slide-seqV2 Spatial Transcript...,10.1016/j.isci.2022.104097,Marshall et al. (2022) iScience,b020294c-ab82-4547-b5a7-63d8ffa575ed,96a79598-297b-4ade-a6c1-a431ab243548,Spatial transcriptomics in mouse: Puck_191112_13,b020294c-ab82-4547-b5a7-63d8ffa575ed.h5ad,15161
4,4,Publication: https://doi.org/10.1038/s41591-02...,a96133de-e951-4e2d-ace6-59db8b3bfb1d,HTAN/HTAPP Broad - Spatio-molecular dissection...,10.1038/s41591-024-03215-z,Klughammer et al. (2024) Nat Med,d7476ae2-e320-4703-8304-da5c42627e71,ac9fe945-6784-48fa-a2d5-a8646196d37e,HTAPP-330-SMP-1082 scRNA-seq,d7476ae2-e320-4703-8304-da5c42627e71.h5ad,565
...,...,...,...,...,...,...,...,...,...,...,...
1810,1810,Publication: https://doi.org/10.1038/s41586-02...,45d5d2c3-bc28-4814-aed6-0bb6f0e11c82,A single-cell transcriptional timelapse of mou...,10.1038/s41586-024-07069-w,Qiu et al. (2024) Nature,dcfa2614-7ca7-4d82-814c-350626eccb26,ca20ef35-13c0-4850-bec4-ba00e6bbd6f9,Major cell cluster: Mesoderm,dcfa2614-7ca7-4d82-814c-350626eccb26.h5ad,3267338
1811,1811,Publication: https://doi.org/10.1126/science.a...,e5f58829-1a66-40b5-a624-9046778e74f5,Tabula Sapiens,10.1126/science.abl4896,The Tabula Sapiens Consortium* et al. (2022) S...,53d208b0-2cfd-4366-9866-c3c6114081bc,5a495302-b7cd-4bf9-853e-95627b00bb03,Tabula Sapiens - All Cells,53d208b0-2cfd-4366-9866-c3c6114081bc.h5ad,1136218
1812,1812,Publication: https://doi.org/10.1038/s41586-02...,45d5d2c3-bc28-4814-aed6-0bb6f0e11c82,A single-cell transcriptional timelapse of mou...,10.1038/s41586-024-07069-w,Qiu et al. (2024) Nature,dcfd4feb-18a3-4b30-81d7-1b0c544a8ab3,a5a85963-8004-41a1-8eb5-ca65266d89c3,Whole dataset: Raw counts only,dcfd4feb-18a3-4b30-81d7-1b0c544a8ab3.h5ad,11441407
1813,1813,Publication: https://doi.org/10.1038/s41593-02...,1ca90a2d-2943-483d-b678-b809bf464c30,SEA-AD: Seattle Alzheimer’s Disease Brain Cell...,10.1038/s41593-024-01774-5,Gabitto et al. (2024) Nat Neurosci,c2876b1b-06d8-4d96-a56b-5304f815b99a,92b37feb-aa2c-40d7-bd90-0a9b5ddb3b27,Whole Taxonomy - MTG: Seattle Alzheimer's Dise...,c2876b1b-06d8-4d96-a56b-5304f815b99a.h5ad,1378211


## Checking list of all assays and veryfying new ones

In [8]:
assays_old = census_old["census_data"]["homo_sapiens"].obs.read(column_names=["assay"]).concat().to_pandas().value_counts().reset_index()

In [9]:
assays_new = census_new["census_data"]["homo_sapiens"].obs.read(column_names=["assay"]).concat().to_pandas().value_counts().reset_index()

In [10]:
assays_merged = pd.merge(assays_old, assays_new, how="outer", on="assay")
assays_merged["delta"] = assays_merged["count_y"] - assays_merged["count_x"]
assays_merged = assays_merged.sort_values("count_y", ascending=False)
assays_merged

Unnamed: 0,assay,count_x,count_y,delta
3,10x 3' v3,59668147.0,96655665,36987518.0
2,10x 3' v2,22750589.0,24507159,1756570.0
6,10x 5' v2,4363694.0,8748986,4385292.0
5,10x 5' v1,7448617.0,7683010,234393.0
36,sci-RNA-seq3,5064268.0,5230353,166085.0
8,10x multiome,,3033426,
4,10x 5' transcription profiling,1968545.0,2125643,157098.0
14,Drop-seq,1048377.0,1048377,0.0
23,ScaleBio single cell RNA sequencing,700524.0,700524,0.0
0,10x 3' transcription profiling,665642.0,665642,0.0


## Normalized layer for SMART-like data

In [11]:
# From https://github.com/chanzuckerberg/cellxgene-census/blob/7d3ab1bbaa30dec8b09a96ec38c21374b0073333/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/globals.py#L336 
FULL_GENE_ASSAY = {
    "EFO:0003755": "FL-cDNA",
    "EFO:0008747": "FRISCR",
    "EFO:0008763": "Hi-SCL",
    "EFO:0008797": "MATQ-seq",
    "EFO:0008877": "Quartz-seq",
    "EFO:0008930": "Smart-seq",
    "EFO:0008931": "Smart-seq2",
    "EFO:0008956": "SUPeR-seq",
    "EFO:0009999": "G&T-Seq",
    "EFO:0010004": "SCRB-seq",
    "EFO:0010006": "scM&T-seq",
    "EFO:0010022": "Smart-3Seq",
    "EFO:0010058": "Fluidigm C1-based SMARTer library preparation",
    "EFO:0010184": "Smart-like",
    "EFO:0022396": "TruSeq",
    "EFO:0022488": "Smart-seq3",
    "EFO:0022839": "STORM-seq",
    "EFO:0030031": "SCOPE-chip",
    "EFO:0030060": "mCT-seq",
    "EFO:0030061": "mcSCRB-seq",
    "EFO:0700016": "Smart-seq v4",
}

In [12]:
assays_new_full_gene = assays_merged[assays_merged["assay"].isin(FULL_GENE_ASSAY.values())]
assays_new_full_gene

Unnamed: 0,assay,count_x,count_y,delta
28,Smart-seq2,260350.0,267948,7598.0
27,Smart-seq v4,63920.0,63920,0.0
32,mCT-seq,,37121,
29,Smart-seq3,5005.0,5005,0.0
19,Quartz-seq,2882.0,2882,0.0
26,Smart-seq,172.0,172,0.0


In [13]:
# get a few cells from each assay present
smartseq1_ids = census_new["census_data"]["homo_sapiens"].obs.read(column_names=["soma_joinid"], value_filter = f"assay == 'Quartz-seq'").concat().to_pandas()
smartseq4_ids = census_new["census_data"]["homo_sapiens"].obs.read(column_names=["soma_joinid"], value_filter = f"assay == 'Smart-seq v4'").concat().to_pandas()

smartseq = smartseq1_ids["soma_joinid"].tolist()[:1000] + smartseq4_ids["soma_joinid"].tolist()[:1000]

adata = cellxgene_census.get_anndata(
    census_new,
    organism="homo_sapiens",
    obs_coords=smartseq,
    X_layers=["normalized"]
)


  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)


In [14]:
adata.layers["normalized"]

<Compressed Sparse Row sparse matrix of dtype 'float32'
	with 13036267 stored elements and shape (2000, 61479)>

In [15]:
adata.obs["assay"].drop_duplicates()

0       Smart-seq v4
1000      Quartz-seq
Name: assay, dtype: category
Categories (37, object): ['10x 3' transcription profiling', '10x 3' v1', '10x 3' v2', '10x 3' v3', ..., 'microwell-seq', 'modified STRT-seq', 'particle-templated instant partition sequencing', 'sci-RNA-seq3']

In [16]:
min(adata.layers["normalized"].data)

np.float32(5.2735913e-09)

In [17]:
max(adata.layers["normalized"].data)

np.float32(0.063144684)

In [18]:
adata.layers["normalized"].sum(axis=1).min()

np.float32(0.99999875)

In [19]:
adata.layers["normalized"].sum(axis=1).max()

np.float32(1.0000011)

In [20]:
adata.layers["normalized_local"] = adata.X.copy()
adata.layers["normalized_local"] = csr_matrix(adata.layers["normalized_local"].multiply(1 / adata.var["feature_length"].values[None,:]))
adata.layers["normalized_local"] = csr_matrix(adata.layers["normalized_local"].multiply( 1 / adata.layers["normalized_local"].sum(1).A))

In [21]:
adata.layers["normalized_local"].data

array([2.45865267e-05, 2.83067595e-06, 2.67409696e-05, ...,
       8.28286526e-05, 7.54188263e-05, 1.28428137e-04], shape=(13036267,))

In [22]:
adata.layers["normalized"].data

array([2.4586450e-05, 2.8306968e-06, 2.6741065e-05, ..., 8.2828104e-05,
       7.5418502e-05, 1.2842938e-04], shape=(13036267,), dtype=float32)

In [23]:
pre_calc = adata.layers["normalized"].data.copy()
fly_calc = adata.layers["normalized_local"].data.copy()
for r in reversed(range(11)):
    n_differing = np.nonzero(np.round(pre_calc.copy(), r).astype(np.float32) != np.round(fly_calc.copy(), r).astype(np.float32))[0].shape[0]
    percent_differing = n_differing / fly_calc.shape[0]
    print("Decimal points: ", r, " values differing: n = ", n_differing, ", fracion = ", percent_differing)

Decimal points:  10  values differing: n =  9389378 , fracion =  0.7202505134330249
Decimal points:  9  values differing: n =  4750525 , fracion =  0.364408384700927
Decimal points:  8  values differing: n =  924218 , fracion =  0.07089590908194808
Decimal points:  7  values differing: n =  108095 , fracion =  0.008291867602895828
Decimal points:  6  values differing: n =  11142 , fracion =  0.0008546925281600937
Decimal points:  5  values differing: n =  1096 , fracion =  8.407314762730772e-05
Decimal points:  4  values differing: n =  136 , fracion =  1.0432434377111177e-05
Decimal points:  3  values differing: n =  2 , fracion =  1.5341815260457615e-07
Decimal points:  2  values differing: n =  0 , fracion =  0.0
Decimal points:  1  values differing: n =  0 , fracion =  0.0
Decimal points:  0  values differing: n =  0 , fracion =  0.0


## Checking discover schema 7 changes

###  Field diff in obs/var

In [24]:
for organism in census_old["census_data"].keys():
    print(f"\n## {organism}")
    old_columns = list(census_old["census_data"][organism].obs.keys())
    new_columns = list(census_new["census_data"][organism].obs.keys())

    print(f"Shared obs columns: {sorted(set(old_columns) & set(new_columns))}")
    print(f"New obs columns: {sorted(set(new_columns) - set(old_columns))}")
    print(f"Removed obs columns: {sorted(set(old_columns) - set(new_columns))}")


## homo_sapiens
Shared obs columns: ['assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'dataset_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'n_measured_vars', 'nnz', 'observation_joinid', 'raw_mean_nnz', 'raw_sum', 'raw_variance_nnz', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'soma_joinid', 'suspension_type', 'tissue', 'tissue_general', 'tissue_general_ontology_term_id', 'tissue_ontology_term_id', 'tissue_type']
New obs columns: []
Removed obs columns: []

## mus_musculus


Shared obs columns: ['assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'dataset_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'n_measured_vars', 'nnz', 'observation_joinid', 'raw_mean_nnz', 'raw_sum', 'raw_variance_nnz', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'soma_joinid', 'suspension_type', 'tissue', 'tissue_general', 'tissue_general_ontology_term_id', 'tissue_ontology_term_id', 'tissue_type']
New obs columns: []
Removed obs columns: []


In [25]:
for organism in census_old["census_data"].keys():
    print(f"\n## {organism}")
    old_columns = list(census_old["census_data"][organism].ms["RNA"].var.keys())
    new_columns = list(census_new["census_data"][organism].ms["RNA"].var.keys())

    print(f"Shared obs columns: {sorted(set(old_columns) & set(new_columns))}")
    print(f"New obs columns: {sorted(set(new_columns) - set(old_columns))}")
    print(f"Removed obs columns: {sorted(set(old_columns) - set(new_columns))}")


## homo_sapiens
Shared obs columns: ['feature_id', 'feature_length', 'feature_name', 'feature_type', 'n_measured_obs', 'nnz', 'soma_joinid']
New obs columns: []
Removed obs columns: []

## mus_musculus
Shared obs columns: ['feature_id', 'feature_length', 'feature_name', 'feature_type', 'n_measured_obs', 'nnz', 'soma_joinid']
New obs columns: []
Removed obs columns: []


### New organisms

In [26]:
n_cells = census_new["census_data"][organism].ms["RNA"].X["raw"].shape
n_cells

(43653561, 53384)

In [30]:
for organism in census_new["census_data"].keys():
    print(f"\n## {organism}")
    n_cells = census_new["census_data"][organism].obs.domain[0][1]+1
    n_cell_x = census_new["census_data"][organism].ms["RNA"].X["raw"].shape[0]
    print(f"Number of obs cells: {n_cells}")
    print(f"Number of X cells: {n_cell_x}")

    n_genes = census_new["census_data"][organism].ms["RNA"].var.domain[0][1]+1
    n_gene_x = census_new["census_data"][organism].ms["RNA"].X["raw"].shape[1]
    print(f"Number of var genes: {n_genes}")
    print(f"Number of X genes: {n_gene_x}")

    adata = cellxgene_census.get_anndata(
        census_new,
        organism=organism,
        measurement_name="RNA",
        X_name="raw",
        X_layers=["normalized"],
        obs_coords=list(range(1000))
    )

    print("Min raw X value: ", adata.X.data.min())
    print("Max raw X value: ", adata.X.data.max())
    print("Min normalized X value: ", adata.layers["normalized"].data.min())
    print("Max normalized X value: ", adata.layers["normalized"].data.max())
    print("N obs cols: ", adata.obs.shape[1])
    print("N var cols: ", adata.var.shape[1])
    print("Obs columns: ", ", ".join(adata.obs.columns.astype(str).tolist()))
    
    for obs in adata.obs.columns:
        first_elements = ",".join(adata.obs[obs].astype(str).tolist()[:5])
        print(f"  First 5 elements of obs column {obs}: {first_elements}")

    for var in adata.var.columns:
        first_elements = ",".join(adata.var[var].astype(str).tolist()[:5])
        print(f"  First 5 elements of var column {var}: {first_elements}")


## callithrix_jacchus
Number of obs cells: 2275451
Number of X cells: 2275451
Number of var genes: 28348
Number of X genes: 28348


  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)


Min raw X value:  1.0
Max raw X value:  301.0
Min normalized X value:  1.9187108e-05
Max normalized X value:  0.043024063
N obs cols:  28
N var cols:  7
Obs columns:  soma_joinid, dataset_id, assay, assay_ontology_term_id, cell_type, cell_type_ontology_term_id, development_stage, development_stage_ontology_term_id, disease, disease_ontology_term_id, donor_id, is_primary_data, observation_joinid, self_reported_ethnicity, self_reported_ethnicity_ontology_term_id, sex, sex_ontology_term_id, suspension_type, tissue, tissue_ontology_term_id, tissue_type, tissue_general, tissue_general_ontology_term_id, raw_sum, nnz, raw_mean_nnz, raw_variance_nnz, n_measured_vars
  First 5 elements of obs column soma_joinid: 0,1,2,3,4
  First 5 elements of obs column dataset_id: 787138a6-a421-444e-8ae5-537c1842e70c,787138a6-a421-444e-8ae5-537c1842e70c,787138a6-a421-444e-8ae5-537c1842e70c,787138a6-a421-444e-8ae5-537c1842e70c,787138a6-a421-444e-8ae5-537c1842e70c
  First 5 elements of obs column assay: 10x 3' 

  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)


Min raw X value:  1.0
Max raw X value:  11551.0
Min normalized X value:  1.8775929e-05
Max normalized X value:  0.91412354
N obs cols:  28
N var cols:  7
Obs columns:  soma_joinid, dataset_id, assay, assay_ontology_term_id, cell_type, cell_type_ontology_term_id, development_stage, development_stage_ontology_term_id, disease, disease_ontology_term_id, donor_id, is_primary_data, observation_joinid, self_reported_ethnicity, self_reported_ethnicity_ontology_term_id, sex, sex_ontology_term_id, suspension_type, tissue, tissue_ontology_term_id, tissue_type, tissue_general, tissue_general_ontology_term_id, raw_sum, nnz, raw_mean_nnz, raw_variance_nnz, n_measured_vars
  First 5 elements of obs column soma_joinid: 0,1,2,3,4
  First 5 elements of obs column dataset_id: d7476ae2-e320-4703-8304-da5c42627e71,d7476ae2-e320-4703-8304-da5c42627e71,d7476ae2-e320-4703-8304-da5c42627e71,d7476ae2-e320-4703-8304-da5c42627e71,d7476ae2-e320-4703-8304-da5c42627e71
  First 5 elements of obs column assay: 10x 3'

  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)


Min raw X value:  1.0
Max raw X value:  231.0
Min normalized X value:  0.00018556416
Max normalized X value:  0.5714264
N obs cols:  28
N var cols:  7
Obs columns:  soma_joinid, dataset_id, assay, assay_ontology_term_id, cell_type, cell_type_ontology_term_id, development_stage, development_stage_ontology_term_id, disease, disease_ontology_term_id, donor_id, is_primary_data, observation_joinid, self_reported_ethnicity, self_reported_ethnicity_ontology_term_id, sex, sex_ontology_term_id, suspension_type, tissue, tissue_ontology_term_id, tissue_type, tissue_general, tissue_general_ontology_term_id, raw_sum, nnz, raw_mean_nnz, raw_variance_nnz, n_measured_vars
  First 5 elements of obs column soma_joinid: 0,1,2,3,4
  First 5 elements of obs column dataset_id: 885d325f-b478-408b-b184-510b4db4f85f,885d325f-b478-408b-b184-510b4db4f85f,885d325f-b478-408b-b184-510b4db4f85f,885d325f-b478-408b-b184-510b4db4f85f,885d325f-b478-408b-b184-510b4db4f85f
  First 5 elements of obs column assay: sci-RNA-s

  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)


Min raw X value:  1.0
Max raw X value:  1499.0
Min normalized X value:  8.5236505e-05
Max normalized X value:  0.31488037
N obs cols:  28
N var cols:  7
Obs columns:  soma_joinid, dataset_id, assay, assay_ontology_term_id, cell_type, cell_type_ontology_term_id, development_stage, development_stage_ontology_term_id, disease, disease_ontology_term_id, donor_id, is_primary_data, observation_joinid, self_reported_ethnicity, self_reported_ethnicity_ontology_term_id, sex, sex_ontology_term_id, suspension_type, tissue, tissue_ontology_term_id, tissue_type, tissue_general, tissue_general_ontology_term_id, raw_sum, nnz, raw_mean_nnz, raw_variance_nnz, n_measured_vars
  First 5 elements of obs column soma_joinid: 0,1,2,3,4
  First 5 elements of obs column dataset_id: a810e511-c18b-4b2a-8fdf-98a6a0d433a7,a810e511-c18b-4b2a-8fdf-98a6a0d433a7,a810e511-c18b-4b2a-8fdf-98a6a0d433a7,a810e511-c18b-4b2a-8fdf-98a6a0d433a7,a810e511-c18b-4b2a-8fdf-98a6a0d433a7
  First 5 elements of obs column assay: 10x 3' 

  return dispatch(args[0].__class__)(*args, **kw)
  return dispatch(args[0].__class__)(*args, **kw)
