# Querying data using the gget cellxgene module

gget is a free, open-source command-line tool and Python package that enables efficient querying of genomic databases. gget consists of a collection of separate but interoperable modules, each designed to facilitate one type of database querying in a single line of code.

In [2]:
import gget
gget.setup("cellxgene")

# Display all options of the cellxgene gget module
help(gget.cellxgene)

23:10:32 - INFO - Installing cellxgene-census package (requires pip).


23:10:33 - INFO - cellxgene_census installed succesfully.


Help on function cellxgene in module gget.gget_cellxgene:

cellxgene(species='homo_sapiens', gene=None, ensembl=False, column_names=['dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type'], meta_only=False, tissue=None, cell_type=None, development_stage=None, disease=None, sex=None, is_primary_data=True, dataset_id=None, tissue_general_ontology_term_id=None, tissue_general=None, assay_ontology_term_id=None, assay=None, cell_type_ontology_term_id=None, development_stage_ontology_term_id=None, disease_ontology_term_id=None, donor_id=None, self_reported_ethnicity_ontology_term_id=None, self_reported_ethnicity=None, sex_ontology_term_id=None, suspension_type=None, tissue_ontology_term_id=None, census_version='stable', verbose=True, out=None)
    Query data from CZ CELLxGENE Discover (https://cellxgene.cziscience.com/) using the
    CZ CELLxGENE Discover Census (https://github.com/chanzuckerberg/cellxgene-census).

    NOTE: Querying large datasets requires

# Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)

In [25]:
# Filter to only include the Microglia Supercluster by the dataset id == 700aed19-c16e-4ba8-9191-07da098a8626
adata = gget.cellxgene(
    dataset_id = "700aed19-c16e-4ba8-9191-07da098a8626",
    assay= "10X 3' v2",
    gene=["ENSG00000197405","ENSG00000171860","ENSG00000000971"],
    ensembl=True,  # Setting 'ensembl=True' here since the gene is passed as an Ensembl ID
)

adata

# Polls all the 'var' attributes in the dataset
# No 'obs' to report as we filtered by dataset_id

23:46:13 - INFO - Fetching AnnData object from CZ CELLxGENE Discover. This might take a few minutes...


The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.
  adata = cellxgene_census.get_anndata(


AnnData object with n_obs × n_vars = 0 × 3
    obs: 'dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type', 'is_primary_data'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_type', 'feature_length', 'nnz', 'n_measured_obs'

In [None]:
df = gget.cellxgene(
    meta_only=False,
    census_version="2023-05-15",  # Specify Census version for reproducibility over time
    gene=["ENSG00000197405","ENSG00000171860","ENSG00000000971"],
    ensembl=True,  # Setting 'ensembl=True' here since the gene is passed as an Ensembl ID
    tissue="brain",
    species="mus_musculus",  # Let's switch up the species
    cell_type="microglial cell",
)

df

23:42:40 - INFO - Fetching metadata from CZ CELLxGENE Discover...


Unnamed: 0,dataset_id,assay,suspension_type,sex,tissue_general,tissue,cell_type,is_primary_data
0,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True
1,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True
2,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True
3,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True
4,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True
...,...,...,...,...,...,...,...,...
16604,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True
16605,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True
16606,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True
16607,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True


In [27]:
# Fetch AnnData object based on specified genes, tissue and cell types
adata = gget.cellxgene(
    gene=["ACE2", "ABCA1", "SLC5A1"], tissue="lung", cell_type=["mucus secreting cell", "neuroendocrine cell"]
)

adata

import scanpy as sc
sc.pl.dotplot(adata, adata.var["feature_name"].values, groupby="cell_type", gene_symbols="feature_name")

23:52:15 - INFO - Fetching AnnData object from CZ CELLxGENE Discover. This might take a few minutes...
The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.
  adata = cellxgene_census.get_anndata(


KeyboardInterrupt: 