In [19]:
# Prepare the environment
import gget
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from numpy.lib.npyio import savez

gget.setup("cellxgene")

# Uncomment the following line to see the documentation
help(gget.cellxgene)

'''
Query the cellxgene database for the genes of interest
Filters:
disease: normal
tissue_general: brain

> Query Time: 3m 30s
'''

adata = gget.cellxgene(
    ensembl=True,
    verbose=True,
    meta_only=True,
    species='mus_musculus',
    sex=['male', 'female'],
    disease='normal',
    tissue_general='brain',

)

14:50:56 - INFO - Installing cellxgene-census package (requires pip).
14:50:58 - INFO - cellxgene_census installed succesfully.
14:50:58 - INFO - Fetching metadata from CZ CELLxGENE Discover...


Help on function cellxgene in module gget.gget_cellxgene:

cellxgene(species='homo_sapiens', gene=None, ensembl=False, column_names=['dataset_id', 'assay', 'suspension_type', 'sex', 'tissue_general', 'tissue', 'cell_type'], meta_only=False, tissue=None, cell_type=None, development_stage=None, disease=None, sex=None, is_primary_data=True, dataset_id=None, tissue_general_ontology_term_id=None, tissue_general=None, assay_ontology_term_id=None, assay=None, cell_type_ontology_term_id=None, development_stage_ontology_term_id=None, disease_ontology_term_id=None, donor_id=None, self_reported_ethnicity_ontology_term_id=None, self_reported_ethnicity=None, sex_ontology_term_id=None, suspension_type=None, tissue_ontology_term_id=None, census_version='stable', verbose=True, out=None)
    Query data from CZ CELLxGENE Discover (https://cellxgene.cziscience.com/) using the
    CZ CELLxGENE Discover Census (https://github.com/chanzuckerberg/cellxgene-census).

    NOTE: Querying large datasets requires

The "stable" release is currently 2025-01-30. Specify 'census_version="2025-01-30"' in future calls to open_soma() to ensure data consistency.


In [20]:
adata

In [None]:
# Print all unique values in the column 'assay' with non-zero counts
counts = adata['assay'].value_counts()
print(counts[counts > 0])

In [13]:
# Print all unique values in the column 'tissue' with non-zero counts
tissue_counts = adata['tissue'].value_counts()
print(tissue_counts[tissue_counts > 0])

tissue
cerebellum                              723345
primary motor cortex                    530134
hypothalamus                            384925
cerebral cortex                         352271
diencephalon                            249071
visual cortex                           236082
brain                                   113008
hippocampal formation                    89099
primary somatosensory cortex             86715
auditory cortex                          72549
medial orbital frontal cortex            72454
retrosplenial granular cortex            65710
anterior cingulate cortex                63318
entorhinal cortex                        62216
temporal cortex                          61332
subicular complex                        54666
agranular insular cortex                 53956
primary visual cortex                    47108
parietal cortex                          47094
frontal lobe                             34331
posterior parietal association areas     28873
latera

In [16]:
# Ensure all rows are displayed without truncation
pd.set_option('display.max_rows', None)

# Print all unique values in the column 'cell_type' with non-zero counts
counts = adata['cell_type'].value_counts()
print(counts[counts > 0])

cell_type
cerebellar granule cell                                                                501037
glutamatergic neuron                                                                   397512
L4/5 intratelencephalic projecting glutamatergic neuron of the primary motor cortex    347109
neuron                                                                                 254734
L2/3-6 intratelencephalic projecting glutamatergic neuron                              198449
L6 corticothalamic-projecting glutamatergic cortical neuron                            176719
oligodendrocyte                                                                        142462
GABAergic neuron                                                                       136771
astrocyte                                                                              117001
L6 intratelencephalic projecting glutamatergic neuron of the primary motor cortex       83902
oligodendrocyte precursor cell                    