In [2]:
# Prepare the environment
import gget
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
from numpy.lib.npyio import savez

In [None]:

gget.setup("cellxgene")

# Uncomment the following line to see the documentation
help(gget.cellxgene)

'''
Query the cellxgene database for the genes of interest
Filters:
disease: normal
tissue_general: brain

> Query Time: 3m 30s
'''

adata = gget.cellxgene(
    ensembl=True,
    species='mus_musculus',
    sex=['male', 'female'],
    dataset_id=['e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c',
                '35081d47-99bf-4507-9541-735428df9a9f',
                'dbb4e1ed-d820-4e83-981f-88ef7eb55a35',
                '79a2344d-eddd-45b1-b376-39eddfab1899',
                '1229ecc2-b067-4664-91da-0251aec31574',
                '72eb2332-b308-4014-8d25-95233a9aff1e',
                '3bbb6cf9-72b9-41be-b568-656de6eb18b5',
                '98e5ea9f-16d6-47ec-a529-686e76515e39',
                '58b01044-c5e5-4b0f-8a2d-6ebf951e01ff',
                ],
    disease='normal',
    tissue_general='brain',

)

In [4]:
adata

Unnamed: 0,dataset_id,assay,suspension_type,sex,tissue_general,tissue,cell_type,is_primary_data,disease
0,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
1,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
2,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
3,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
4,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
...,...,...,...,...,...,...,...,...,...
3391856,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,sst chodl GABAergic cortical interneuron,True,normal
3391857,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,sst chodl GABAergic cortical interneuron,True,normal
3391858,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,L2/3-6 intratelencephalic projecting glutamate...,True,normal
3391859,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,medial orbital frontal cortex,sst GABAergic cortical interneuron,True,normal


In [5]:
# Print all unique values in the column 'assay' with non-zero counts
counts = adata['dataset_id'].value_counts()
print(counts[counts > 0])

dataset_id
d7291f04-fbbb-4d65-990a-f01fa44e915b    1169213
e0ed3c55-aff6-4bb7-b6ff-98a2d90b890c     611034
35081d47-99bf-4507-9541-735428df9a9f     406187
dbb4e1ed-d820-4e83-981f-88ef7eb55a35     279538
79a2344d-eddd-45b1-b376-39eddfab1899     249071
1229ecc2-b067-4664-91da-0251aec31574     205722
72eb2332-b308-4014-8d25-95233a9aff1e     109477
12990215-ad1a-4106-8536-7388327a616f     107742
3bbb6cf9-72b9-41be-b568-656de6eb18b5      79667
812fa7bd-db15-4357-b2c9-efc8e1eb0450      66979
0e1c7067-8833-4f54-8847-ad7d33d09c10      38807
98e5ea9f-16d6-47ec-a529-686e76515e39      20666
a539c7af-fb65-44b1-8812-000b097eac99      15601
58b01044-c5e5-4b0f-8a2d-6ebf951e01ff      15019
5dec4249-8459-4df0-8998-37193135754c       9313
6347cc90-f284-41d8-a131-db4a37bd796f       6051
3a15ab1c-c36c-4842-9a3e-47e6ffd0ba6f       1679
28c696bb-9549-434b-9340-dc745a846f9a         95
Name: count, dtype: int64


In [13]:
# Print all unique values in the column 'tissue' with non-zero counts
tissue_counts = adata['tissue'].value_counts()
print(tissue_counts[tissue_counts > 0])

tissue
cerebellum                              723345
primary motor cortex                    530134
hypothalamus                            384925
cerebral cortex                         352271
diencephalon                            249071
visual cortex                           236082
brain                                   113008
hippocampal formation                    89099
primary somatosensory cortex             86715
auditory cortex                          72549
medial orbital frontal cortex            72454
retrosplenial granular cortex            65710
anterior cingulate cortex                63318
entorhinal cortex                        62216
temporal cortex                          61332
subicular complex                        54666
agranular insular cortex                 53956
primary visual cortex                    47108
parietal cortex                          47094
frontal lobe                             34331
posterior parietal association areas     28873
latera

In [16]:
# Ensure all rows are displayed without truncation
pd.set_option('display.max_rows', None)

# Print all unique values in the column 'cell_type' with non-zero counts
counts = adata['cell_type'].value_counts()
print(counts[counts > 0])

cell_type
cerebellar granule cell                                                                501037
glutamatergic neuron                                                                   397512
L4/5 intratelencephalic projecting glutamatergic neuron of the primary motor cortex    347109
neuron                                                                                 254734
L2/3-6 intratelencephalic projecting glutamatergic neuron                              198449
L6 corticothalamic-projecting glutamatergic cortical neuron                            176719
oligodendrocyte                                                                        142462
GABAergic neuron                                                                       136771
astrocyte                                                                              117001
L6 intratelencephalic projecting glutamatergic neuron of the primary motor cortex       83902
oligodendrocyte precursor cell                    

In [3]:
pdata = sc.read_h5ad("/Mm Census gget Query/Data Files/Complement Only_Mouse_Census_Brain.h5ad")

In [5]:
pdata.obs

Unnamed: 0,dataset_id,assay,suspension_type,sex,tissue_general,tissue,cell_type,is_primary_data,disease
0,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
1,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
2,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
3,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
4,6347cc90-f284-41d8-a131-db4a37bd796f,10x 3' v3,cell,male,brain,brain gray matter,oligodendrocyte,True,normal
...,...,...,...,...,...,...,...,...,...
3506065,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,sst chodl GABAergic cortical interneuron,True,normal
3506066,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,sst chodl GABAergic cortical interneuron,True,normal
3506067,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,primary motor cortex,L2/3-6 intratelencephalic projecting glutamate...,True,normal
3506068,d7291f04-fbbb-4d65-990a-f01fa44e915b,10x 3' v2,cell,male,brain,medial orbital frontal cortex,sst GABAergic cortical interneuron,True,normal
