In [1]:
import cellxgene_census
import pandas as pd

In [47]:
with cellxgene_census.open_soma() as census:
    
    # Reads SOMADataFrame as a slice
    cell_metadata = census["census_data"]["homo_sapiens"].obs.read(
        value_filter = "tissue_general in ['eye'] "
        "and disease == 'normal' "
        "and is_primary_data == True"

    )
    
    # Concatenates results to pyarrow.Table
    cell_metadata = cell_metadata.concat()
    
    # Converts to pandas.DataFrame
    cell_metadata = cell_metadata.to_pandas()
    
    print(cell_metadata)

The "stable" Census version is not yet available. Using "latest" Census version instead.
The "latest" release is currently 2023-05-08. Specify 'census_version="2023-05-08"' in future calls to open_soma() to ensure data consistency.


        soma_joinid                            dataset_id          assay   
0           6870420  f8c77961-67a7-4161-b8c2-61c3f917b54f      10x 3' v3  \
1           6870421  f8c77961-67a7-4161-b8c2-61c3f917b54f      10x 3' v3   
2           6870422  f8c77961-67a7-4161-b8c2-61c3f917b54f      10x 3' v3   
3           6870423  f8c77961-67a7-4161-b8c2-61c3f917b54f      10x 3' v3   
4           6870424  f8c77961-67a7-4161-b8c2-61c3f917b54f      10x 3' v3   
...             ...                                   ...            ...   
755159     50130808  2adb1f8a-a6b1-4909-8ee8-484814e2d4bf  microwell-seq   
755160     50130809  2adb1f8a-a6b1-4909-8ee8-484814e2d4bf  microwell-seq   
755161     50130810  2adb1f8a-a6b1-4909-8ee8-484814e2d4bf  microwell-seq   
755162     50130811  2adb1f8a-a6b1-4909-8ee8-484814e2d4bf  microwell-seq   
755163     50130812  2adb1f8a-a6b1-4909-8ee8-484814e2d4bf  microwell-seq   

       assay_ontology_term_id      cell_type cell_type_ontology_term_id   
0           

In [81]:
for i in list(set(cell_metadata['dataset_id'])):
    print(i)
    print(cell_metadata[cell_metadata['dataset_id'] == i]['cell_type'].value_counts())
    print('\n\n')

53d208b0-2cfd-4366-9866-c3c6114081bc
cell_type
conjunctival epithelial cell             4587
corneal epithelial cell                  1488
eye photoreceptor cell                    891
keratocyte                                595
retinal blood vessel endothelial cell     466
Mueller cell                              360
stromal cell                              325
T cell                                    237
microglial cell                           209
radial glial cell                         195
dendritic cell                            182
melanocyte                                144
stem cell                                 144
macrophage                                108
endothelial cell                          105
B cell                                    102
fibroblast                                 95
surface ectodermal cell                    54
epithelial cell of lacrimal sac            52
retinal pigment epithelial cell            49
plasma cell                      

cell_type
myeloid cell    395
Name: count, dtype: int64



4e38f019-f8e8-44ae-ad32-ba500de6f64c
cell_type
ON-bipolar cell     10621
OFF-bipolar cell     5919
Name: count, dtype: int64



389bfbb9-8ef1-4582-8c41-410131c3d0eb
cell_type
retinal rod cell    62509
Name: count, dtype: int64



2f6a20f1-173d-4b8d-860b-c47ffea120fa
cell_type
retina horizontal cell    2868
Name: count, dtype: int64



c3d381b2-3104-444e-8ad5-d3524407bbb6
cell_type
retina horizontal cell    1834
native cell                 41
Name: count, dtype: int64



8623d55f-d91c-41c2-ae68-ed2072fd268d
cell_type
ON-bipolar cell     14803
OFF-bipolar cell    11105
Name: count, dtype: int64



ab5b2256-b209-48b5-a801-c5d9a8c0de56
cell_type
Mueller cell                       5024
retinal pigment epithelial cell    1643
astrocyte                           508
myeloid cell                        395
pericyte                            188
melanocyte                          186
T cell                               87
endothelial

In [48]:
# pull doi
doi_uniq = []
for i in cell_metadata['dataset_id']:
    doi_uniq.append(i)
doi_uniq = list(set(doi_uniq))

In [49]:
census_datasets = census["census_info"]["datasets"].read().concat().to_pandas()
census_datasets = census_datasets.set_index("soma_joinid")
eye_sets = census_datasets[census_datasets['dataset_id'].isin(doi_uniq)]

In [50]:
set(eye_sets['collection_doi'].tolist())

{'10.1016/j.cell.2020.08.013',
 '10.1016/j.celrep.2019.12.082',
 '10.1038/s41467-019-12780-8',
 '10.1038/s41586-020-2157-4',
 '10.1038/s41598-020-66092-9',
 '10.1073/pnas.2200914119',
 '10.1126/science.aba7721',
 '10.1126/science.abl4896',
 '10.15252/embj.2018100811',
 '10.2139/ssrn.3991078'}

In [56]:
from habanero import Crossref
cr = Crossref()

In [84]:
for doi in set(eye_sets['collection_doi'].tolist()):
    ref = cr.works(ids = doi)
    print(doi)
    print(eye_sets[eye_sets['collection_doi'] == doi]['dataset_id'])
    print(ref['message']['title'])
    print(ref['message']['author'][0])
    print('\n')

10.1038/s41586-020-2157-4
soma_joinid
546    2adb1f8a-a6b1-4909-8ee8-484814e2d4bf
Name: dataset_id, dtype: object
['Construction of a human cell landscape at single-cell level']
{'given': 'Xiaoping', 'family': 'Han', 'sequence': 'first', 'affiliation': []}


10.1073/pnas.2200914119
soma_joinid
62    9ff99bf8-2524-4ab5-ab6e-4bc218e4a449
64    489318a0-24c3-4f5c-b105-f084ed0ea026
Name: dataset_id, dtype: object
['Cell atlas of the human ocular anterior segment: Tissue-specific and shared cell types']
{'given': 'Tavé', 'family': 'van Zyl', 'sequence': 'first', 'affiliation': [{'name': 'Department of Ophthalmology, Harvard Medical School, Boston, MA 02115'}, {'name': 'Center for Brain Science, Harvard University, Cambridge, MA 02138'}, {'name': 'Department of Molecular and Cellular Biology, Harvard University, Cambridge, MA 02138'}]}


10.15252/embj.2018100811
soma_joinid
368    d5c67a4e-a8d9-456d-a273-fa01adb1b308
Name: dataset_id, dtype: object
['A single‐cell transcriptome atlas of the 

In [83]:
eye_sets

Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_id,dataset_title,dataset_h5ad_path,dataset_total_cell_count
soma_joinid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
35,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,f8c77961-67a7-4161-b8c2-61c3f917b54f,Amacrine cells of human eye,f8c77961-67a7-4161-b8c2-61c3f917b54f.h5ad,6101
36,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,e6dad530-418b-47f9-af6e-472e56a7b314,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055
37,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,de17ac25-550a-4018-be75-bbb485a0636e,Myeloid cells of human eye,de17ac25-550a-4018-be75-bbb485a0636e.h5ad,395
38,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,d95ab381-2b7c-4885-b168-0097ed4e397f,Cone cells of human eye,d95ab381-2b7c-4885-b168-0097ed4e397f.h5ad,1378
39,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,cec9f9a5-8832-437d-99af-fb8237cde54b,Retinal ganglion cells of human eye,cec9f9a5-8832-437d-99af-fb8237cde54b.h5ad,1777
40,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,c3d381b2-3104-444e-8ad5-d3524407bbb6,Horizontal cells of human eye,c3d381b2-3104-444e-8ad5-d3524407bbb6.h5ad,1875
41,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,ab5b2256-b209-48b5-a801-c5d9a8c0de56,Non-neuronal cells of human eye,ab5b2256-b209-48b5-a801-c5d9a8c0de56.h5ad,8193
42,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,9cfee1e6-b24f-433d-a269-f01841655d6a,Retinal pigment epithelial cells of human eye,9cfee1e6-b24f-433d-a269-f01841655d6a.h5ad,1635
43,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,4e38f019-f8e8-44ae-ad32-ba500de6f64c,Bipolar cells of human eye,4e38f019-f8e8-44ae-ad32-ba500de6f64c.h5ad,16540
44,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,389bfbb9-8ef1-4582-8c41-410131c3d0eb,Rod cells of human eye,389bfbb9-8ef1-4582-8c41-410131c3d0eb.h5ad,62509


In [51]:
with cellxgene_census.open_soma() as census:
	adata = cellxgene_census.get_anndata(
    	census, 
		organism="Homo sapiens", 
		obs_value_filter=  "tissue_general in ['eye'] "
        "and disease == 'normal' "
        "and is_primary_data == True"
	)

The "stable" Census version is not yet available. Using "latest" Census version instead.
The "latest" release is currently 2023-05-08. Specify 'census_version="2023-05-08"' in future calls to open_soma() to ensure data consistency.


In [54]:
adata.write_h5ad('2023_05_08_census_eye_normal_primary.homo.h5ad')