In [1]:
# using Revise
using Pkg
Pkg.activate(".")
using DataManifest

[32m[1m  Activating[22m[39m project at `~/Projects/DataManifest.jl`


In [2]:
db = Database("example.toml")

Database(
  datasets=Dict(
  ),
  datasets_folder="/home/perrette/.cache/Datasets"
  datasets_toml="example.toml"
)

In [3]:
db = Database(datasets_folder="datasets-test", persist=false)
rm("datasets-test"; force=true, recursive=true)
db

Database(
  datasets=Dict(
  ),
  datasets_folder="datasets-test"
  datasets_toml="" (in-memory database)
)

In [4]:
register_dataset(db, "https://doi.pangaea.de/10.1594/PANGAEA.930512?format=zip";
    name="herzschuh2023",
    doi="10.1594/PANGAEA.930512",
)

register_dataset(db, "https://download.pangaea.de/dataset/962852/files/LGM_foraminifera_assemblages_20240110.csv";
    name="jonkers2024",
    doi="10.1594/PANGAEA.962852",
)

register_dataset(db, "git@github.com:jesstierney/lgmDA.git")

register_dataset(db, "ssh://albedo1.dmawi.de:/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm/tos_CLIM"; name="CMIP6_lgm_tos")

println(db)

Database:
- CMIP6_lgm_tos => albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data...
- herzschuh2023 => doi.pangaea.de/10.1594/PANGAEA.930512
- jonkers2024 => download.pangaea.de/dataset/962852/files/LGM_foraminifera_assem...
- jesstierney/lgmDA => github.com/jesstierney/lgmDA.git
datasets_folder: datasets-test
datasets_toml: "" (in-memory database)


In [5]:
db

Database(
  datasets=Dict(
    CMIP6_lgm_tos => DatasetEntry(uri="ssh:/albedo1.dmawi.de:/albedo/work/projects...),
    herzschuh2023 => DatasetEntry(uri="https:/doi.pangaea.de/10.1594...),
    jonkers2024 => DatasetEntry(uri="https:/download.pangaea.de/dataset/962852/files...),
    jesstierney/lgmDA => DatasetEntry(uri="git@github.com:jesstierney/lgmDA.git"...),
  ),
  datasets_folder="datasets-test"
  datasets_toml="" (in-memory database)
)

In [6]:
db.datasets["jesstierney/lgmDA"]

DatasetEntry(uri="git@github.com:jesstierney...)

In [7]:
get_dataset_path(db, "herzschuh2023"), db.datasets_folder, db.datasets["herzschuh2023"]

("datasets-test/doi.pangaea.de/10.1594/PANGAEA.930512", "datasets-test", DatasetEntry:
- uri=https://doi.pangaea.de/10.1594/PANGAEA.930512?format=zip
- doi=10.1594/PANGAEA.930512)

In [8]:
println(string(db.datasets["herzschuh2023"]))
println(string_short(db.datasets["herzschuh2023"]))
println(repr(db.datasets["herzschuh2023"]))
println(repr_short(db.datasets["herzschuh2023"]))
db.datasets["herzschuh2023"]

DatasetEntry:
- uri=https://doi.pangaea.de/10.1594/PANGAEA.930512?format=zip
- doi=10.1594/PANGAEA.930512
doi.pangaea.de/10.1594/PANGAEA.930512
DatasetEntry(uri="https:/doi.pangaea.de/10.1594..., doi="10.1594/PANGAEA.930512")
DatasetEntry(uri="https:/doi.pangaea.de/10.1594...)


DatasetEntry(uri="https:/doi.pangaea.de/10.1594..., doi="10.1594/PANGAEA.930512")

In [9]:
println(string(db))
db

Database:
- CMIP6_lgm_tos => albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data...
- herzschuh2023 => doi.pangaea.de/10.1594/PANGAEA.930512
- jonkers2024 => download.pangaea.de/dataset/962852/files/LGM_foraminifera_assem...
- jesstierney/lgmDA => github.com/jesstierney/lgmDA.git
datasets_folder: datasets-test
datasets_toml: "" (in-memory database)


Database(
  datasets=Dict(
    CMIP6_lgm_tos => DatasetEntry(uri="ssh:/albedo1.dmawi.de:/albedo/work/projects...),
    herzschuh2023 => DatasetEntry(uri="https:/doi.pangaea.de/10.1594...),
    jonkers2024 => DatasetEntry(uri="https:/download.pangaea.de/dataset/962852/files...),
    jesstierney/lgmDA => DatasetEntry(uri="git@github.com:jesstierney/lgmDA.git"...),
  ),
  datasets_folder="datasets-test"
  datasets_toml="" (in-memory database)
)

In [10]:
import TOML
TOML.print(db)

[CMIP6_lgm_tos]
uri = "ssh://albedo1.dmawi.de:/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm/tos_CLIM"

[herzschuh2023]
uri = "https://doi.pangaea.de/10.1594/PANGAEA.930512?format=zip"
doi = "10.1594/PANGAEA.930512"

[jonkers2024]
uri = "https://download.pangaea.de/dataset/962852/files/LGM_foraminifera_assemblages_20240110.csv"
doi = "10.1594/PANGAEA.962852"

["jesstierney/lgmDA"]
uri = "git@github.com:jesstierney/lgmDA.git"


In [11]:
write(db, "test.toml")

In [12]:
run(`cat test.toml`);

[CMIP6_lgm_tos]
uri = "ssh://albedo1.dmawi.de:/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm/tos_CLIM"

[herzschuh2023]
uri = "https://doi.pangaea.de/10.1594/PANGAEA.930512?format=zip"
doi = "10.1594/PANGAEA.930512"

[jonkers2024]
uri = "https://download.pangaea.de/dataset/962852/files/LGM_foraminifera_assemblages_20240110.csv"
doi = "10.1594/PANGAEA.962852"

["jesstierney/lgmDA"]
uri = "git@github.com:jesstierney/lgmDA.git"


In [13]:
other = read("test.toml", "datasets-test")
other == db

false

In [14]:
local_path = download_dataset(db, "jonkers2024") # will download only if not present

Downloads.RequestError: RequestError: HTTP/2 500 while requesting https://download.pangaea.de/dataset/962852/files/LGM_foraminifera_assemblages_20240110.csv

In [15]:
download_datasets(db)

ssh: Could not resolve hostname albedo1.dmawi.de: Name or service not known
rsync: connection unexpectedly closed (0 bytes received so far) [Receiver]
rsync error: unexplained error (code 255) at io.c(232) [Receiver=3.2.7]


ProcessFailedException: failed process: Process(`rsync -arvzL albedo1.dmawi.de:/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm/tos_CLIM datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm/`, ProcessExited(255)) [255]


In [16]:
run(`find datasets-test`)

datasets-test
datasets-test/albedo1.dmawi.de
datasets-test/albedo1.dmawi.de/albedo
datasets-test/albedo1.dmawi.de/albedo/work
datasets-test/albedo1.dmawi.de/albedo/work/projects
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc
datasets-test/albedo1.dmawi.de/albedo/work/projects/p_forclima/preproc_data_esmvaltool/LGM/recipe_cmip6_lgm_tos_20241114_151009/preproc/lgm
datasets-test/download.pangaea.de
datasets-test/download.pangaea.de/dataset
datasets-test/download.pangaea.de/dataset/962852
datasets-test/download.pangaea.de/dataset/962852/fil

Process(`[4mfind[24m [4mdatasets-test[24m`, ProcessExited(0))