# find duplicate cells

This notebook uses the X counts to search for potentially duplicate cells in the Census.
Based upon that potential equality, it provides a variety of reports - currently it reports:
* duplicate primary: cells with more than one copy marked `is_primary_data==True`
* missing primary: report cells with zero copies marked `is_primary_data==True`

The cell equality method is simplisitic - a hash of the cell counts (expression) vector across all genes.
This will only detect exact copies.  Future enhancements may include a fuzzier definition of equality.

Caveat: CELLxGENE Census internal QC tool

In [1]:
import math
import xxhash  # https://github.com/ifduyue/python-xxhash
from typing import Literal

import ipywidgets

import cellxgene_census
import numpy as np
import pandas as pd

from cellxgene_census.experimental.util import X_sparse_iter


"""
Configuration - pick the Census version and experiment to utilize
"""
census_version: str = (
    "latest"  # which Census version? Good options: latest, stable, or YYYY-MM-DD
)
experiment: Literal[
    "mus_musculus", "homo_sapiens"
] = "homo_sapiens"  # Which experiment? Pick one.

In [2]:
"""
Open Census, grab various data used for reports, and then calculate hashes for all cells.
"""

row_stride = 100_000  # row partition

with cellxgene_census.open_soma(census_version=census_version) as census:
    # Used for reporting
    datasets_df = (
        census["census_info"]["datasets"]
        .read()
        .concat()
        .to_pandas()
        .drop(columns=["soma_joinid"])
    )

    # Calculate all per-cell hashes
    exp = census["census_data"][experiment]
    with exp.axis_query(measurement_name="RNA") as query:
        # Notebook progress bar
        display(
            prog := ipywidgets.IntProgress(
                value=0,
                min=0,
                max=math.ceil(query.n_obs / row_stride),
                description="Hashing:",
            )
        )

        obs_df = (
            query.obs(column_names=["dataset_id", "soma_joinid", "is_primary_data"])
            .concat()
            .to_pandas()
            .set_index("soma_joinid")
        )
        hashes = pd.Series(data=np.full((len(obs_df),), ""), index=obs_df.index)

        for (obs_soma_joinids_chunk, _), X_chunk in X_sparse_iter(
            query, X_name="raw", stride=row_stride
        ):
            for r, row_soma_joinid in enumerate(obs_soma_joinids_chunk):
                X_row = X_chunk.getrow(r)
                hash = xxhash.xxh3_128(X_row.data.tobytes())
                hash.update(X_row.indptr)
                hashes.at[row_soma_joinid] = hash.hexdigest()

            prog.value += 1

        obs_df.insert(0, "hash", hashes)

The "latest" release is currently 2023-06-20. Specify 'census_version="2023-06-20"' in future calls to open_soma() to ensure data consistency.


IntProgress(value=0, description='Hashing:', max=573)

In [3]:
"""Compute a summary pivot on the hash and is_primary_data"""
hash_primary_pivot = (
    obs_df.value_counts(subset=["hash", "is_primary_data"])
    .to_frame()
    .reset_index()
    .pivot_table(index="hash", columns="is_primary_data", values="count", fill_value=0)
)
hash_primary_pivot

is_primary_data,False,True
hash,Unnamed: 1_level_1,Unnamed: 2_level_1
0000002d686d29b69502381275383e84,2,1
000000e8c73a5f138a296e7665d83d74,0,1
000001f315f757d91b36a3889b5f8313,0,1
0000024c59f0cfc3f81d49ab01846aad,1,0
00000282e87c16e700210ec29b696872,2,1
...,...,...
fffffd03071d0ece493eac48e405438b,0,1
fffffd05a6a990a812819581161de333,1,1
fffffd0dd830ce617a60bbfa7315b9da,0,1
fffffd3a536dccd6b0fae2ac48bd2ebc,2,1


In [4]:
"""
Case 1 - hashes lacking a cell marked primary. A hash/vector exists, but has no corresponding obs DataFrame record with is_primary_data == True.
"""
obs_missing_primary = (
    obs_df.reset_index()
    .set_index("hash")
    .loc[hash_primary_pivot[hash_primary_pivot.loc[:, True] == 0].index]
    .reset_index()
    .set_index("soma_joinid")
)
obs_missing_primary

Unnamed: 0_level_0,hash,dataset_id,is_primary_data
soma_joinid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7140178,0000024c59f0cfc3f81d49ab01846aad,9f222629-9e39-47d0-b83f-e08d610c7479,False
54711262,0000069e9322c5f54da7a90d10be4b00,572f3f3e-d3e4-4d13-8e2b-88215e508481,False
40024756,0000075334599b48fc57456ee324173a,066943a2-fdac-4b29-b348-40cede398e4e,False
47467096,0000075334599b48fc57456ee324173a,f72958f5-7f42-4ebb-98da-445b0c6de516,False
33031351,000008d404e59342106d535498dee929,1252c5fb-945f-42d6-b1a8-8a3bd864384b,False
...,...,...,...
32109753,fffff6bf5b0d4625e23b01301b239317,83b5e943-a1d5-4164-b3f2-f7a37f01b524,False
29819042,fffff9e56b8512de26c244c085cd0307,c888b684-6c51-431f-972a-6c963044cef0,False
45637022,fffffac078fa7c74d31146b63d1fe0d0,88c483bf-477d-4be5-90d3-4fb101dd601f,False
45663617,fffffac078fa7c74d31146b63d1fe0d0,8b2e5453-faf7-46ea-9073-aea69b283cb7,False


In [5]:
"""
Datasets containing a hash (cell) that lacks any copies with is_primary_data==True
I.e., all copies are marked "not primary"
"""
datasets_with_missing_primary = (
    obs_missing_primary.value_counts(subset=["dataset_id"])
    .to_frame()
    .rename(columns={"count": "dup_cell_count"})
    .join(datasets_df.set_index("dataset_id"), on="dataset_id")
    .reset_index()
)
datasets_with_missing_primary

Unnamed: 0,dataset_id,dup_cell_count,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
0,9f222629-9e39-47d0-b83f-e08d610c7479,1096207,6f6d381a-7701-4781-935c-db10d30de293,The integrated Human Lung Cell Atlas,10.1038/s41591-023-02327-2,An integrated cell atlas of the human lung in ...,9f222629-9e39-47d0-b83f-e08d610c7479.h5ad,2282447
1,066943a2-fdac-4b29-b348-40cede398e4e,584944,6f6d381a-7701-4781-935c-db10d30de293,The integrated Human Lung Cell Atlas,10.1038/s41591-023-02327-2,An integrated cell atlas of the human lung in ...,066943a2-fdac-4b29-b348-40cede398e4e.h5ad,584944
2,f72958f5-7f42-4ebb-98da-445b0c6de516,584884,2f75d249-1bec-459b-bf2b-b86221097ced,Azimuth meta-analysis of human scRNA-seq datasets,,Human - Lung v2 (HLCA),f72958f5-7f42-4ebb-98da-445b0c6de516.h5ad,584884
3,fd072bc3-2dfb-46f8-b4e3-467cb3223182,562562,b1a879f6-5638-48d3-8f64-f6592c1b1561,Mapping the developing human immune system acr...,10.1126/science.abo0510,Full dataset of single-cell RNA-seq profiles f...,fd072bc3-2dfb-46f8-b4e3-467cb3223182.h5ad,908046
4,48101fa2-1a63-4514-b892-53ea1d3a8657,397255,b1a879f6-5638-48d3-8f64-f6592c1b1561,Mapping the developing human immune system acr...,10.1126/science.abo0510,HSC/immune cells (all hematopoietic-derived ce...,48101fa2-1a63-4514-b892-53ea1d3a8657.h5ad,589390
...,...,...,...,...,...,...,...,...
147,98113e7e-f586-4065-a26a-60aa702f8d1c,2,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Supercluster: Deep-layer intratelencephalic,98113e7e-f586-4065-a26a-60aa702f8d1c.h5ad,228467
148,e6b2ce27-681b-4409-a053-2681875936e5,1,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Supercluster: Eccentric medium spiny neuron,e6b2ce27-681b-4409-a053-2681875936e5.h5ad,40144
149,2190bd4d-3be0-4bf7-8ca8-8d6f71228936,1,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Supercluster: Midbrain-derived inhibitory,2190bd4d-3be0-4bf7-8ca8-8d6f71228936.h5ad,126782
150,04a23820-ffa8-4be5-9f65-64db15631d1e,1,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Supercluster: Upper rhombic lip,04a23820-ffa8-4be5-9f65-64db15631d1e.h5ad,137162


In [6]:
"""
Case 2 - hashes with more than one cell marked is_primary_data == True
"""
obs_duplicate_primary = (
    obs_df.reset_index()
    .set_index("hash")
    .loc[hash_primary_pivot[hash_primary_pivot.loc[:, True] > 1].index]
    .reset_index()
    .set_index("soma_joinid")
)
obs_duplicate_primary = obs_duplicate_primary[
    obs_duplicate_primary.is_primary_data == True
]
obs_duplicate_primary

Unnamed: 0_level_0,hash,dataset_id,is_primary_data
soma_joinid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
13983736,0000bf62f69ca9af0ddb06c35ac8a5e4,e6dad530-418b-47f9-af6e-472e56a7b314,True
14082223,0000bf62f69ca9af0ddb06c35ac8a5e4,389bfbb9-8ef1-4582-8c41-410131c3d0eb,True
13991172,0000d427423f870a79c05aaf8e4af4b9,e6dad530-418b-47f9-af6e-472e56a7b314,True
14085698,0000d427423f870a79c05aaf8e4af4b9,389bfbb9-8ef1-4582-8c41-410131c3d0eb,True
20489088,0000de56bf406faf7c29caa8dad3df61,715327a6-7978-4896-ba91-69d6b04dbbfb,True
...,...,...,...
14057182,ffff659a02a704e201e7a22654f75a5a,389bfbb9-8ef1-4582-8c41-410131c3d0eb,True
24049304,ffff844c31a8c0cb2e1c0f4703ba66b5,a43aa46b-bd16-47fe-bc3e-19a052624e79,True
42060013,ffff844c31a8c0cb2e1c0f4703ba66b5,ddb22b3d-a75c-4dd1-9730-dff7fc8ca530,True
13966367,ffffc9c6d73e4ad4efdc30eb864b7b41,e6dad530-418b-47f9-af6e-472e56a7b314,True


In [7]:
"""
Datasets with duplicate cells marked "primary"
"""
datasets_with_dup_primary = (
    obs_duplicate_primary.value_counts(subset=["dataset_id"])
    .to_frame()
    .rename(columns={"count": "dup_cell_count"})
    .join(datasets_df.set_index("dataset_id"), on="dataset_id")
    .reset_index()
)
datasets_with_dup_primary

Unnamed: 0,dataset_id,dup_cell_count,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
0,e6dad530-418b-47f9-af6e-472e56a7b314,98326,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055
1,389bfbb9-8ef1-4582-8c41-410131c3d0eb,62509,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Rod cells of human eye,389bfbb9-8ef1-4582-8c41-410131c3d0eb.h5ad,62509
2,4e38f019-f8e8-44ae-ad32-ba500de6f64c,16540,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Bipolar cells of human eye,4e38f019-f8e8-44ae-ad32-ba500de6f64c.h5ad,16540
3,2ef8f3ce-bbff-447b-9e51-567e5d6c47bd,10411,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Dissection: Body of hippocampus (HiB) - Rostra...,2ef8f3ce-bbff-447b-9e51-567e5d6c47bd.h5ad,10411
4,715327a6-7978-4896-ba91-69d6b04dbbfb,10411,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Dissection: Body of hippocampus (HiB) - Rostra...,715327a6-7978-4896-ba91-69d6b04dbbfb.h5ad,10411
5,ab5b2256-b209-48b5-a801-c5d9a8c0de56,8193,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Non-neuronal cells of human eye,ab5b2256-b209-48b5-a801-c5d9a8c0de56.h5ad,8193
6,f8c77961-67a7-4161-b8c2-61c3f917b54f,6101,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Amacrine cells of human eye,f8c77961-67a7-4161-b8c2-61c3f917b54f.h5ad,6101
7,b252b015-b488-4d5c-b16e-968c13e48a2c,4886,4796c91c-9d8f-4692-be43-347b1727f9d8,MSK SPECTRUM – Ovarian cancer mutational proce...,10.1038/s41586-022-05496-1,MSK SPECTRUM – All cells,b252b015-b488-4d5c-b16e-968c13e48a2c.h5ad,929690
8,ddb22b3d-a75c-4dd1-9730-dff7fc8ca530,3123,44531dd9-1388-4416-a117-af0a99de2294,"Single-Cell, Single-Nucleus, and Spatial RNA S...",10.1002/hep4.1854,Healthy human liver: integrated,ddb22b3d-a75c-4dd1-9730-dff7fc8ca530.h5ad,73295
9,a43aa46b-bd16-47fe-bc3e-19a052624e79,3123,bd5230f4-cd76-4d35-9ee5-89b3e7475659,Single cell RNA sequencing of human liver reve...,10.1038/s41467-018-06318-7,Liver,a43aa46b-bd16-47fe-bc3e-19a052624e79.h5ad,8444


In [8]:
"""
For each duplicated hash (cell), find the datasets that overlap/contain the duplicate.

In other words, these are all of the datasets which have intersecting duplicate/primary hashes. 
"""
overlapping_dup_datasets = np.unique(
    obs_duplicate_primary.sort_values("dataset_id")[["hash", "dataset_id"]]
    .groupby(by="hash")
    .agg(list)
)

display(overlapping_dup_datasets)

for d in overlapping_dup_datasets:
    display(datasets_df.set_index('dataset_id').loc[d])

array([list(['214bf9eb-93db-48c8-8e3c-9bb22fa3bc63', '4b6af54a-4a21-46e0-bc8d-673c0561a836']),
       list(['2ef8f3ce-bbff-447b-9e51-567e5d6c47bd', '715327a6-7978-4896-ba91-69d6b04dbbfb']),
       list(['389bfbb9-8ef1-4582-8c41-410131c3d0eb', 'd95ab381-2b7c-4885-b168-0097ed4e397f', 'e6dad530-418b-47f9-af6e-472e56a7b314']),
       list(['389bfbb9-8ef1-4582-8c41-410131c3d0eb', 'e6dad530-418b-47f9-af6e-472e56a7b314']),
       list(['44882825-0da1-4547-b721-2c6105d4a9d1', '7970bd6b-f752-47a9-8643-2af16855ec49']),
       list(['4e38f019-f8e8-44ae-ad32-ba500de6f64c', 'e6dad530-418b-47f9-af6e-472e56a7b314']),
       list(['6a270451-b4d9-43e0-aa89-e33aac1ac74b', '6a270451-b4d9-43e0-aa89-e33aac1ac74b']),
       list(['9cfee1e6-b24f-433d-a269-f01841655d6a', 'ab5b2256-b209-48b5-a801-c5d9a8c0de56', 'e6dad530-418b-47f9-af6e-472e56a7b314']),
       list(['a43aa46b-bd16-47fe-bc3e-19a052624e79', 'ddb22b3d-a75c-4dd1-9730-dff7fc8ca530']),
       list(['ab5b2256-b209-48b5-a801-c5d9a8c0de56', 'de17ac25-55

Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
214bf9eb-93db-48c8-8e3c-9bb22fa3bc63,03cdc7f4-bd08-49d0-a395-4487c0e5a168,Emphysema Cell Atlas,10.1016/j.immuni.2023.01.032,AT2 cells,214bf9eb-93db-48c8-8e3c-9bb22fa3bc63.h5ad,3662
4b6af54a-4a21-46e0-bc8d-673c0561a836,03cdc7f4-bd08-49d0-a395-4487c0e5a168,Emphysema Cell Atlas,10.1016/j.immuni.2023.01.032,non-immune cells,4b6af54a-4a21-46e0-bc8d-673c0561a836.h5ad,18386


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2ef8f3ce-bbff-447b-9e51-567e5d6c47bd,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Dissection: Body of hippocampus (HiB) - Rostra...,2ef8f3ce-bbff-447b-9e51-567e5d6c47bd.h5ad,10411
715327a6-7978-4896-ba91-69d6b04dbbfb,283d65eb-dd53-496d-adb7-7570c7caa443,Transcriptomic diversity of cell types across ...,10.1101/2022.10.12.511898,Dissection: Body of hippocampus (HiB) - Rostra...,715327a6-7978-4896-ba91-69d6b04dbbfb.h5ad,10411


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
389bfbb9-8ef1-4582-8c41-410131c3d0eb,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Rod cells of human eye,389bfbb9-8ef1-4582-8c41-410131c3d0eb.h5ad,62509
d95ab381-2b7c-4885-b168-0097ed4e397f,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Cone cells of human eye,d95ab381-2b7c-4885-b168-0097ed4e397f.h5ad,1378
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
389bfbb9-8ef1-4582-8c41-410131c3d0eb,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Rod cells of human eye,389bfbb9-8ef1-4582-8c41-410131c3d0eb.h5ad,62509
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
44882825-0da1-4547-b721-2c6105d4a9d1,3a2af25b-2338-4266-aad3-aa8d07473f50,Single-cell analysis of human B cell maturatio...,10.1126/sciimmunol.abe6291,Human tonsil complete integrated cell set scRNA,44882825-0da1-4547-b721-2c6105d4a9d1.h5ad,32607
7970bd6b-f752-47a9-8643-2af16855ec49,3a2af25b-2338-4266-aad3-aa8d07473f50,Single-cell analysis of human B cell maturatio...,10.1126/sciimmunol.abe6291,Human tonsil memory B cells scRNA,7970bd6b-f752-47a9-8643-2af16855ec49.h5ad,21595


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4e38f019-f8e8-44ae-ad32-ba500de6f64c,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Bipolar cells of human eye,4e38f019-f8e8-44ae-ad32-ba500de6f64c.h5ad,16540
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6a270451-b4d9-43e0-aa89-e33aac1ac74b,a48f5033-3438-4550-8574-cdff3263fdfd,HTAN VUMC - Differential pre-malignant program...,10.1016/j.cell.2021.11.031,VAL and DIS datasets: Non-Epithelial,6a270451-b4d9-43e0-aa89-e33aac1ac74b.h5ad,10700
6a270451-b4d9-43e0-aa89-e33aac1ac74b,a48f5033-3438-4550-8574-cdff3263fdfd,HTAN VUMC - Differential pre-malignant program...,10.1016/j.cell.2021.11.031,VAL and DIS datasets: Non-Epithelial,6a270451-b4d9-43e0-aa89-e33aac1ac74b.h5ad,10700


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9cfee1e6-b24f-433d-a269-f01841655d6a,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Retinal pigment epithelial cells of human eye,9cfee1e6-b24f-433d-a269-f01841655d6a.h5ad,1635
ab5b2256-b209-48b5-a801-c5d9a8c0de56,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Non-neuronal cells of human eye,ab5b2256-b209-48b5-a801-c5d9a8c0de56.h5ad,8193
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
a43aa46b-bd16-47fe-bc3e-19a052624e79,bd5230f4-cd76-4d35-9ee5-89b3e7475659,Single cell RNA sequencing of human liver reve...,10.1038/s41467-018-06318-7,Liver,a43aa46b-bd16-47fe-bc3e-19a052624e79.h5ad,8444
ddb22b3d-a75c-4dd1-9730-dff7fc8ca530,44531dd9-1388-4416-a117-af0a99de2294,"Single-Cell, Single-Nucleus, and Spatial RNA S...",10.1002/hep4.1854,Healthy human liver: integrated,ddb22b3d-a75c-4dd1-9730-dff7fc8ca530.h5ad,73295


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ab5b2256-b209-48b5-a801-c5d9a8c0de56,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Non-neuronal cells of human eye,ab5b2256-b209-48b5-a801-c5d9a8c0de56.h5ad,8193
de17ac25-550a-4018-be75-bbb485a0636e,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Myeloid cells of human eye,de17ac25-550a-4018-be75-bbb485a0636e.h5ad,395
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ab5b2256-b209-48b5-a801-c5d9a8c0de56,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Non-neuronal cells of human eye,ab5b2256-b209-48b5-a801-c5d9a8c0de56.h5ad,8193
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
b252b015-b488-4d5c-b16e-968c13e48a2c,4796c91c-9d8f-4692-be43-347b1727f9d8,MSK SPECTRUM – Ovarian cancer mutational proce...,10.1038/s41586-022-05496-1,MSK SPECTRUM – All cells,b252b015-b488-4d5c-b16e-968c13e48a2c.h5ad,929690
b252b015-b488-4d5c-b16e-968c13e48a2c,4796c91c-9d8f-4692-be43-347b1727f9d8,MSK SPECTRUM – Ovarian cancer mutational proce...,10.1038/s41586-022-05496-1,MSK SPECTRUM – All cells,b252b015-b488-4d5c-b16e-968c13e48a2c.h5ad,929690


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
c3d381b2-3104-444e-8ad5-d3524407bbb6,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Horizontal cells of human eye,c3d381b2-3104-444e-8ad5-d3524407bbb6.h5ad,1875
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
cec9f9a5-8832-437d-99af-fb8237cde54b,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Retinal ganglion cells of human eye,cec9f9a5-8832-437d-99af-fb8237cde54b.h5ad,1777
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
d7dcfd8f-2ee7-4385-b9ac-e074c23ed190,120e86b4-1195-48c5-845b-b98054105eec,Spatiotemporal immune zonation of the human ki...,10.1126/science.aat5031,Fetal kidney dataset: full,d7dcfd8f-2ee7-4385-b9ac-e074c23ed190.h5ad,27203
d7dcfd8f-2ee7-4385-b9ac-e074c23ed190,120e86b4-1195-48c5-845b-b98054105eec,Spatiotemporal immune zonation of the human ki...,10.1126/science.aat5031,Fetal kidney dataset: full,d7dcfd8f-2ee7-4385-b9ac-e074c23ed190.h5ad,27203


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
d95ab381-2b7c-4885-b168-0097ed4e397f,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Cone cells of human eye,d95ab381-2b7c-4885-b168-0097ed4e397f.h5ad,1378
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055


Unnamed: 0_level_0,collection_id,collection_name,collection_doi,dataset_title,dataset_h5ad_path,dataset_total_cell_count
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
e6dad530-418b-47f9-af6e-472e56a7b314,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,All cell types of human eye,e6dad530-418b-47f9-af6e-472e56a7b314.h5ad,100055
f8c77961-67a7-4161-b8c2-61c3f917b54f,939769a8-d8d2-4d01-abfc-55699893fd49,Integration of eQTL and a Single-Cell Atlas in...,10.1016/j.celrep.2019.12.082,Amacrine cells of human eye,f8c77961-67a7-4161-b8c2-61c3f917b54f.h5ad,6101
