## Code to query CxG (only works on Mac for now)

## Install and import libraries

In [3]:
%pip install cellxgene-census

import cellxgene_census

Note: you may need to restart the kernel to use updated packages.


## Get counts for CxG

In [20]:
# get column names

with cellxgene_census.open_soma() as census:
    obs = census["census_data"]["homo_sapiens"].obs

    # Get pyarrow schema
    schema = obs.schema

    # Get all column names (includes dimensions like "soma_joinid")
    column_names = schema.names

    print("Column names in obs:", column_names)

The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.


Column names in obs: ['soma_joinid', 'dataset_id', 'assay', 'assay_ontology_term_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'observation_joinid', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'tissue', 'tissue_ontology_term_id', 'tissue_type', 'tissue_general', 'tissue_general_ontology_term_id', 'raw_sum', 'nnz', 'raw_mean_nnz', 'raw_variance_nnz', 'n_measured_vars']


In [22]:
# this uses example code from the documentation at https://chanzuckerberg.github.io/cellxgene-census/cellxgene_census_docsite_quick_start.html
with cellxgene_census.open_soma() as census:

    # Reads SOMADataFrame as a slice
    cell_metadata = census["census_data"]["homo_sapiens"].obs.read(
        value_filter = "disease=='normal'",
        column_names = ["assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease", "donor_id", "dataset_id"]
    )

    # Concatenates results to pyarrow.Table
    cell_metadata = cell_metadata.concat()

    # Converts to pandas.DataFrame
    cell_metadata = cell_metadata.to_pandas()

    print(cell_metadata)

The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.


              assay                                          cell_type  \
0         10x 3' v2                                        plasma cell   
1         10x 3' v2                                      mature B cell   
2         10x 3' v2                                        plasma cell   
3         10x 3' v2                                      mature B cell   
4         10x 3' v2                                      mature B cell   
...             ...                                                ...   
80776633  10x 3' v3  L2/3-6 intratelencephalic projecting glutamate...   
80776634  10x 3' v3               pvalb GABAergic cortical interneuron   
80776635  10x 3' v3  L2/3-6 intratelencephalic projecting glutamate...   
80776636  10x 3' v3  L2/3-6 intratelencephalic projecting glutamate...   
80776637  10x 3' v3                   astrocyte of the cerebral cortex   

                                  tissue tissue_general suspension_type  \
0                  caudate lobe of l

In [23]:
cell_metadata.columns

Index(['assay', 'cell_type', 'tissue', 'tissue_general', 'suspension_type',
       'disease', 'donor_id', 'dataset_id'],
      dtype='object')

In [18]:
# get unique tissue
cell_metadata['tissue'].nunique()

325

In [19]:
# unique donors
cell_metadata['donor_id'].nunique()

5549

In [24]:
# unique datasets
cell_metadata['dataset_id'].nunique()

791

In [None]:
cell_metadata

Unnamed: 0,assay,cell_type,tissue,tissue_general,suspension_type,disease
0,10x 3' v2,plasma cell,caudate lobe of liver,liver,cell,normal
1,10x 3' v2,mature B cell,caudate lobe of liver,liver,cell,normal
2,10x 3' v2,plasma cell,caudate lobe of liver,liver,cell,normal
3,10x 3' v2,mature B cell,caudate lobe of liver,liver,cell,normal
4,10x 3' v2,mature B cell,caudate lobe of liver,liver,cell,normal
...,...,...,...,...,...,...
80776633,10x 3' v3,L2/3-6 intratelencephalic projecting glutamate...,dorsolateral prefrontal cortex,brain,nucleus,normal
80776634,10x 3' v3,pvalb GABAergic cortical interneuron,dorsolateral prefrontal cortex,brain,nucleus,normal
80776635,10x 3' v3,L2/3-6 intratelencephalic projecting glutamate...,dorsolateral prefrontal cortex,brain,nucleus,normal
80776636,10x 3' v3,L2/3-6 intratelencephalic projecting glutamate...,dorsolateral prefrontal cortex,brain,nucleus,normal
