In [1]:
#| eval: false
! [ -e /content ] && pip install -Uqq fastai # upgrade fastai on colab

In [2]:
#| default_exp data.external

In [3]:
from __future__ import annotations
from fastdownload import FastDownload
from functools import lru_cache
from xcube.imports import *
import xcube.data

In [4]:
from nbdev.showdoc import *

In [5]:
%load_ext autoreload
%autoreload 2

In [7]:
@lru_cache(maxsize=None)
def xcube_cfg() -> Config: # Config that contains default download paths for `data`, `model`, `storage` and `archive`
    "`Config` object for xcube's `config.ini`"
    return Config(Path(os.getenv('XCUBE_HOME', '~/.xcube')), 'config.ini', create=dict(
        data = 'data', archive = 'archive', storage = 'tmp', model = 'models'))

In [8]:
cfg = xcube_cfg()
cfg.data, cfg.path('archive')

('data', Path('/home/deb/.xcube/archive'))

In [9]:
def xcube_path(folder:str) -> Path: 
    "Local path to `folder` in `Config`"
    return xcube_cfg().path(folder)

In [10]:
xcube_path('archive')

Path('/home/deb/.xcube/archive')

In [11]:
class XURLs():
    "Global cosntants for datasets and model URLs."
    LOCAL_PATH = Path.cwd()
    S3 = 'https://xcubebucket.s3.us-east-2.amazonaws.com/'
    
    #main datasets
    MIMIC3 = ''
    MIMIC3_L2R = f'{S3}mimic/mimic3.tgz'
    
    def path(
        url:str='.', # File to download
        c_key:str='archive' # Key in `Config` where to save URL
    ) -> Path:
        "Local path where to download based on `c_key`"
        fname = url.split('/')[-1]
        local_path = XURLs.LOCAL_PATH/('models' if c_key=='model' else 'data')/fname
        if local_path.exists(): return local_path
        return xcube_path(c_key)/fname

The default local path is at `~/.xcube/archive/` but this can be updated by passing a different `c_key`. Note: `c_key` should be one of `'archive', 'data', 'model', 'storage'`.

In [12]:
url = XURLs.MIMIC3_L2R
local_path = XURLs.path(url)
test_eq(local_path.parent, xcube_path('archive'))
local_path

Path('/home/deb/.xcube/archive/mimic3.tgz')

In [13]:
local_path = XURLs.path(url, c_key='model')
test_eq(local_path.parent, xcube_path('model'))
local_path

Path('/home/deb/.xcube/models/mimic3.tgz')

## untar_xxx -

In [14]:
def untar_xxx(
    url:str, # File to download
    archive:Path=None, # Optional override for `Config`'s `archive` key
    data:Path=None, # Optional override for `Config`'s `data` key
    c_key:str='data', # Key in `Config` where to extract file
    force_download:bool=False, # Setting to `True` will overwrite any existing copy of data
    base:str='~/.xcube' # Directory containing config file and base of relative paths
) -> Path: # Path to extracted file(s)
    "Download `url` using `FastDownload.get`"
    d = FastDownload(xcube_cfg(), module=xcube.data, archive=archive, data=data, base=base)
    return d.get(url, force=force_download, extract_key=c_key)

`untar_xxx` is a thin wrapper for `FastDownload.get`. It downloads and extracts `url`, by default to subdirectories of `~/.xcube`, and returns the path to the extracted data. Setting the `force_download` flag to 'True' will overwrite any existing copy of the data already present. For an explanation of the `c_key` parameter, see `XURLs`.

In [15]:
XURLs.MIMIC3_L2R

'https://xcubebucket.s3.us-east-2.amazonaws.com/mimic/mimic3.tgz'

In [16]:
p = untar_xxx(XURLs.MIMIC3_L2R)
p

Path('/home/deb/.xcube/data/mimic3')

In [17]:
list(p.glob('**/*.pkl'))

[Path('/home/deb/.xcube/data/mimic3/l2r/info.pkl'),
 Path('/home/deb/.xcube/data/mimic3/l2r/p_TL.pkl')]

## Export -

In [18]:
from nbdev import nbdev_export
nbdev_export()