In [15]:
#| default_exp catalogs

In [33]:
#| export
import intake
from fastcore.basics import basic_repr


In [17]:
#| export
def load_readable_name_to_catalog_name(intake_catalog):
    return {intake_catalog[data_name].description: data_name for data_name in intake_catalog}

In [20]:
#| export
intake_catalog_folder = "/lab/corradin_biobank/FOR_AN/lab-central-webapp/data/00_intake_catalogs/"


In [35]:
#| export

class IntakeCatalogWrapper():
    __repr__ = basic_repr("num_datasets")
    def __init__(self, cat_uri):
        self.catalog = intake.open_catalog(cat_uri)
    
    @property
    def num_datasets(self):
        return f"Intake catalog. Number of datasets: {len(list(self.catalog.keys()))}"
    
    @property
    def readable_name_to_data_name_dict(self):
        return load_readable_name_to_catalog_name(self.catalog)
    
    def load_data(self, data_name):
        df = self.catalog[data_name].read()
        return df

    def load_data_by_readable_name(self,readable_name):
        data_name = self.readable_name_to_data_name_dict[readable_name]
        return self.load_data(data_name)
    
    def load_data_dict_by_readable_names(readable_names):
        return {readable_name: load_data_by_readable_name(readable_name) for readable_name in readable_names}
        

In [30]:
#| export
intake_catalog = IntakeCatalogWrapper(f"{intake_catalog_folder}/*.yaml")
intake_catalog

<__main__.IntakeCatalogWrapper at 0x1523d0742460>

In [28]:
intake_catalog.readable_name_to_data_name_dict

{'scATAC-seq peaks from Yang et al 2022, biorxiv': 'yang_2022_brain_scATAC'}

In [29]:
intake_catalog.load_data_by_readable_name('scATAC-seq peaks from Yang et al 2022, biorxiv')

Unnamed: 0,chrom,start,stop,cCRE_ID,cell_type,peak,cell_type_name
0,chr1,9848,10347,cCRE_1,"AMY,ASCT_1,ASCT_2,ASCT_3,COP,ERC_1,ITL23_1,ITL...",chr1_9848_10347,"Glutamatergic neurons from amygdala,Telencepha..."
1,chr1,180570,181069,cCRE_2,"ACBGM,AMY,ASCNT_1,ASCNT_3,ASCT_1,ASCT_2,ASCT_3...",chr1_180570_181069,"Bergmann glia,Glutamatergic neurons from amygd..."
2,chr1,191227,191726,cCRE_3,"ASCNT_1,ASCNT_2,ASCNT_3,ASCT_1,ASCT_2,ASCT_3,C...",chr1_191227_191726,"Non-telencephalon astrocytes - type 1,Non-tele..."
3,chr1,267759,268258,cCRE_4,"ASCT_2,ASCT_3,OGC_1,OGC_2,OGC_3,OPC",chr1_267759_268258,"Telencephalon astrocytes - type 2,Telencephalo..."
4,chr1,585948,586447,cCRE_5,"ASCT_2,OGC_2,OGC_3,OPC",chr1_585948_586447,"Telencephalon astrocytes - type 2,Oligodendroc..."
...,...,...,...,...,...,...,...
92827,chrY,56727893,56728392,cCRE_544731,"ACBGM,ASCNT_1,ASCT_1,ASCT_2,ASCT_3,CBGRC,ITL23...",chrY_56727893_56728392,"Bergmann glia,Non-telencephalon astrocytes - t..."
92828,chrY,56734541,56735040,cCRE_544732,"ASCNT_1,ASCNT_2,ASCNT_3,ASCT_1,ASCT_2,ASCT_3,C...",chrY_56734541_56735040,"Non-telencephalon astrocytes - type 1,Non-tele..."
92829,chrY,56742465,56742964,cCRE_544733,"ASCT_1,OGC_1",chrY_56742465_56742964,"Telencephalon astrocytes - type 1,Oligodendroc..."
92830,chrY,56763271,56763770,cCRE_544734,"AMY,ASCNT_1,ASCT_1,ASCT_2,ASCT_3,BFEXA,CBGRC,C...",chrY_56763271_56763770,"Glutamatergic neurons from amygdala,Non-telenc..."
