# Querying data using the gget cellxgene module

gget is a free, open-source command-line tool and Python package that enables efficient querying of genomic databases. gget consists of a collection of separate but interoperable modules, each designed to facilitate one type of database querying in a single line of code.

In [2]:
import gget
gget.setup("cellxgene")

# Display all options of the cellxgene gget module
help(gget.cellxgene)

19:27:35 - INFO - Installing cellxgene-census package (requires pip).
19:27:37 - INFO - cellxgene_census installed succesfully.


Help on function cellxgene in module gget.gget_cellxgene:

cellxgene(species='homo_sapiens', gene=None, ensembl=False, column_names=['dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type'], meta_only=False, tissue=None, cell_type=None, development_stage=None, disease=None, sex=None, is_primary_data=True, dataset_id=None, tissue_general_ontology_term_id=None, tissue_general=None, assay_ontology_term_id=None, assay=None, cell_type_ontology_term_id=None, development_stage_ontology_term_id=None, disease_ontology_term_id=None, donor_id=None, self_reported_ethnicity_ontology_term_id=None, self_reported_ethnicity=None, sex_ontology_term_id=None, suspension_type=None, tissue_ontology_term_id=None, census_version='stable', verbose=True, out=None)
    Query data from CZ CELLxGENE Discover (https://cellxgene.cziscience.com/) using the
    CZ CELLxGENE Discover Census (https://github.com/chanzuckerberg/cellxgene-census).

    NOTE: Querying large datasets requires

# Fetch an AnnData object by selecting gene(s), tissue(s) and cell type(s)

In [20]:
adata_homo = gget.cellxgene(
    meta_only=False,
    ensembl=False,  # Setting 'ensembl=True' here since the gene is passed as an Ensembl ID
    # collection_id="",
    # dataset_id = "700aed19-c16e-4ba8-9191-07da098a8626",
    # gene=["ENSG00000197405","ENSG00000171860","ENSG00000000971"],
    gene=["CFH", "C5AR1", "C3AR1"],
    assay=["10x 3' v3"],
    tissue="brain",
    # species="mus_musculus",  # Let's switch up the species
    cell_type="microglial cell",
    disease="normal",
)

adata_homo

20:45:09 - INFO - Fetching AnnData object from CZ CELLxGENE Discover. This might take a few minutes...
The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.
  adata = cellxgene_census.get_anndata(


AnnData object with n_obs × n_vars = 12 × 3
    obs: 'dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type', 'is_primary_data', 'disease'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_type', 'feature_length', 'nnz', 'n_measured_obs'

In [30]:
adata_mus = gget.cellxgene(
    meta_only=False,
    ensembl=False,  # Setting 'ensembl=True' here since the gene is passed as an Ensembl ID
    # collection_id="",
    # dataset_id = "700aed19-c16e-4ba8-9191-07da098a8626",
    # gene=["ENSG00000197405","ENSG00000171860","ENSG00000000971"],
    # gene=["CFH", "C5AR1", "C3AR1"],
    # assay=["10x 3' v3"],
    tissue="brain",
    species="mus_musculus",  # Let's switch up the species
    cell_type="microglial cell",
    disease="normal",
)

adata_mus

21:07:28 - INFO - Fetching AnnData object from CZ CELLxGENE Discover. This might take a few minutes...
The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.
  adata = cellxgene_census.get_anndata(


AnnData object with n_obs × n_vars = 16609 × 52483
    obs: 'dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type', 'is_primary_data', 'disease'
    var: 'soma_joinid', 'feature_id', 'feature_name', 'feature_type', 'feature_length', 'nnz', 'n_measured_obs'

In [31]:
adata_mus.obs

Unnamed: 0,dataset_id,assay,suspension_type,sex,tissue_general,tissue,cell_type,is_primary_data,disease
0,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True,normal
1,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True,normal
2,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True,normal
3,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True,normal
4,3bbb6cf9-72b9-41be-b568-656de6eb18b5,10x 3' v3,nucleus,female,brain,brain,microglial cell,True,normal
...,...,...,...,...,...,...,...,...,...
16604,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True,normal
16605,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True,normal
16606,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True,normal
16607,98e5ea9f-16d6-47ec-a529-686e76515e39,Smart-seq2,cell,male,brain,brain,microglial cell,True,normal


In [32]:
adata_mus.var

Unnamed: 0,soma_joinid,feature_id,feature_name,feature_type,feature_length,nnz,n_measured_obs
0,0,ENSMUSG00000021124,Vti1b,protein_coding,841,7020258,42776863
1,1,ENSMUSG00000039377,Hlx,protein_coding,1495,481426,42641734
2,2,ENSMUSG00000085604,Dhx58os,lncRNA,1594,117050,40553677
3,3,ENSMUSG00000085125,Gm16070,lncRNA,2146,2280673,41179548
4,4,ENSMUSG00000029439,Sfswap,protein_coding,692,17553003,42776863
...,...,...,...,...,...,...,...
52478,52478,ENSMUSG00002076971,Snord100,snoRNA,74,3653,73347
52479,52479,ENSMUSG00002076818,Snord14e,snoRNA,90,3579,73347
52480,52480,ENSMUSG00002076766,Snord33,snoRNA,84,612,73347
52481,52481,ENSMUSG00000118645,Gm55062,lncRNA,480,0,301796
