-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
datasets.py
197 lines (144 loc) · 5.37 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
"""Module for accessing the Earth Engine Data Catalog with dot notation."""
# *******************************************************************************#
# This module contains extra features of the geemap package. #
# The geemap community will maintain the extra features. #
# *******************************************************************************#
import json
import os
import shutil
import urllib.request
from pathlib import Path
import ipywidgets as widgets
import pkg_resources
from box import Box
from IPython.display import display
from .common import download_from_url, ee_data_html, search_ee_data
def get_data_csv():
"""Gets the file path to the CSV file containing the information about the Earth Engine Data Catalog.
Returns:
str: File path to the CSV file.
"""
pkg_dir = os.path.dirname(pkg_resources.resource_filename("geemap", "geemap.py"))
template_dir = os.path.join(pkg_dir, "data/template")
data_csv = os.path.join(template_dir, "ee_data_catalog.csv")
return data_csv
def update_data_list(out_dir="."):
"""Updates the Earth Engine Data Catalog dataset list.
Args:
out_dir (str, optional): The output directory to save the GitHub repository. Defaults to ".".
Raises:
Exception: If the CSV file fails to save.
"""
try:
url = (
"https://github.com/samapriya/Earth-Engine-Datasets-List/archive/master.zip"
)
filename = "Earth-Engine-Datasets-List-master.zip"
dir_name = filename.replace(".zip", "")
out_dir = os.path.abspath(out_dir)
if not os.path.exists(out_dir):
os.makedirs(out_dir)
download_from_url(
url, out_file_name=filename, out_dir=out_dir, unzip=True, verbose=False
)
work_dir = os.path.join(out_dir, dir_name)
in_csv = list(Path(work_dir).rglob("*.csv"))[0]
out_csv = get_data_csv()
shutil.copyfile(in_csv, out_csv)
os.remove(os.path.join(out_dir, filename))
shutil.rmtree(os.path.join(out_dir, dir_name))
except Exception as e:
raise Exception(e)
def get_data_list():
"""Gets a list of Earth Engine datasets.
Returns:
list: The list of dataset ids.
"""
datasets = get_ee_stac_list()
extra_datasets = get_geemap_data_list()
community_datasets = get_community_data_list()
return datasets + extra_datasets + community_datasets
def get_geemap_data_list():
"""Gets the list of the public datasets from GEE users.
Returns:
list: The list of public datasets from GEE users.
"""
extra_ids = [
"countries",
"us_states",
"us_cities",
"chn_admin_line",
"chn_admin_level0",
"chn_admin_level1",
"chn_admin_level2",
]
extra_datasets = [f"users/giswqs/public/{uid}" for uid in extra_ids]
return extra_datasets
def get_community_data_list():
"""Gets the list community datasets
from https://github.com/samapriya/awesome-gee-community-datasets/blob/master/community_datasets.json
Returns:
list: The list of Earth Engine asset IDs.
"""
collections = search_ee_data(".*", regex=True, source="community")
return [collection.get("id", None) for collection in collections]
def get_ee_stac_list():
"""Gets the STAC list of the Earth Engine Data Catalog.
Raises:
Exception: If the JSON file fails to download.
Returns:
list: The list of Earth Engine asset IDs.
"""
try:
stac_url = "https://raw.githubusercontent.com/samapriya/Earth-Engine-Datasets-List/master/gee_catalog.json"
datasets = []
with urllib.request.urlopen(stac_url) as url:
data = json.loads(url.read().decode())
datasets = [item["id"] for item in data]
return datasets
except Exception as e:
raise Exception(e)
def merge_dict(dict1, dict2):
"""Merges two nested dictionaries.
Args:
dict1 (dict): The first dictionary to merge.
dict2 (dict): The second dictionary to merge.
Returns:
dict: The merged dictionary.
"""
return {**dict1, **dict2}
def get_data_dict():
"""Gets the Earth Engine Data Catalog as a nested dictionary.
Returns:
dict: The nested dictionary containing the information about the Earth Engine Data Catalog.
"""
data_dict = {}
datasets = get_data_list()
for dataset in datasets:
tree_dict = {}
items = dataset.split("/")
for index, key in enumerate(reversed(items)):
if index == 0:
tree_dict = {key: dataset}
else:
tree_dict = {key: tree_dict}
data_dict = merge_dict(data_dict, tree_dict)
data_dict[dataset.replace("/", "_")] = dataset
return data_dict
def get_metadata(asset_id, source="ee"):
"""Gets metadata about an Earth Engine asset.
Args:
asset_id (str): The Earth Engine asset id.
source (str): 'ee', 'community' or 'all'.
Raises:
Exception: If search fails.
"""
try:
ee_assets = search_ee_data(asset_id, source=source)
html = ee_data_html(ee_assets[0])
html_widget = widgets.HTML()
html_widget.value = html
display(html_widget)
except Exception as e:
raise Exception(e)
DATA = Box(get_data_dict(), frozen_box=True)