/
restDataverse.jl
92 lines (76 loc) · 2.59 KB
/
restDataverse.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
module restDataverse
using HTTP, JSON, DataFrames
"""
file_list(DOI::String="doi:10.7910/DVN/ODM2IQ")
Use HTTP, JSON, and DataFrames to list files in a dataset.
Return a DataFrame with filename, filesize, and id.
```
file_list("doi:10.7910/DVN/ODM2IQ")
```
"""
function file_list(doi="doi:10.7910/DVN/EE3C40")
r=HTTP.get("https://dataverse.harvard.edu/api/datasets/:persistentId/?persistentId=$(doi)")
tmp=JSON.parse(String(r.body))
files=tmp["data"]["latestVersion"]["files"]
files_to_DataFrame(files)
end
"""
file_list(nam::Symbol=:OCCA_clim)
Lookup DOI from list of demo data sets (:OCCA_clim or :ECCO_clim).
"""
function file_list(nam::Symbol)
DOI=(OCCA_clim="doi:10.7910/DVN/RNXA2A",ECCO_clim="doi:10.7910/DVN/3HPRZI")
file_list(DOI[nam])
end
"""
dataverse_scan(nam::Symbol=:ECCOv4r2)
Use HTTP, JSON, and DataFrames to list contents in a dataverse.
Returns header (Dict), dataverses (DataFrame), and datasets (DataFrame).
```
(header,dataverses,datasets)=Dataverse.dataverse_scan()
Dataverse.file_list(datasets.persistentUrl[1])
```
"""
function dataverse_scan(nam::Symbol=:ECCOv4r2)
#header
r=HTTP.get("https://dataverse.harvard.edu/api/dataverses/$(nam)")
header=JSON.parse(String(r.body))["data"]
#contents
r=HTTP.get("https://dataverse.harvard.edu/api/dataverses/$(nam)/contents")
tmp=JSON.parse(String(r.body))["data"]
type=[f["type"] for f in tmp]
#1. dataverses
ii=findall(type.=="dataverse")
if !isempty(ii)
id=[f["id"] for f in tmp[ii]]
title=[f["title"] for f in tmp[ii]]
dataverses=DataFrame(id=id,type=type[ii],title=title)
else
dataverses=DataFrame(id=[],type=[],title=[])
end
#2. datasets
ii=findall(type.=="dataset")
if !isempty(ii)
id=[f["id"] for f in tmp[ii]]
persistentUrl=[f["persistentUrl"] for f in tmp[ii]]
datasets=DataFrame(id=id,type=type[ii],persistentUrl=persistentUrl)
else
dataverses=DataFrame(id=[],type=[],persistentUrl=[])
end
#
return header,dataverses,datasets
end
"""
files_to_DataFrame(files)
Convert output from `dataset.json()["data"]["latestVersion"]["files"]` to a `DataFrame`.
"""
function files_to_DataFrame(files)
nf=length(files)
filename=[files[ff]["dataFile"]["filename"] for ff in 1:nf]
filesize=[files[ff]["dataFile"]["filesize"] for ff in 1:nf]
id=[files[ff]["dataFile"]["id"] for ff in 1:nf]
tmp="https://dataverse.harvard.edu/api/access/datafile/"
url=[tmp*"$(id[ff])" for ff in 1:nf]
DataFrame(filename=filename,filesize=filesize,id=id,url=url)
end
end