-
Notifications
You must be signed in to change notification settings - Fork 159
/
io.py
133 lines (113 loc) · 5.01 KB
/
io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""File Input/Output utilities."""
from download import download
import os.path as op
import os
#import matplotlib.pyplot as plt
#plt.style.use('ggplot')
# Data URLs, structured as {'week_name': [(URL, FILENAME, FILETYPE)]}
# If ZIPFILE / TARFILE / etc, we'll unzip to a folder w/ the week's name
DATA_URLS = {
'week_02': [('https://ndownloader.figshare.com/files/7010681', 'boulder-precip.csv', 'file'),
('https://ndownloader.figshare.com/files/7010681', 'temperature_example.csv', 'file'),
('https://ndownloader.figshare.com/files/7426738', '.', 'zip')],
'week_03': ('https://ndownloader.figshare.com/files/7446715', '.', 'zip'),
'spatial-vector-lidar': ('https://ndownloader.figshare.com/files/12080402', '.', 'zip'),
'cold-springs-modis-h5': ('https://ndownloader.figshare.com/files/10960112', '.', 'zip'),
#'week_05': ('https://ndownloader.figshare.com/files/7525363', '.', 'zip'),
'cold-springs-fire': ('https://ndownloader.figshare.com/files/10960109', '.', 'zip'),
'cold-springs-naip-hw': ('https://ndownloader.figshare.com/files/10960211?private_link=18f892d9f3645344b2fe', '.', 'zip'),
'cold-springs-landsat-hw': ('https://ndownloader.figshare.com/files/10960214?private_link=fbba903d00e1848b423e', '.', 'zip'),
'week_08': [('https://ndownloader.figshare.com/files/9666637?private_link=480fba92b3e882c4d35d', 'week_08', 'zip'),
('https://ndownloader.figshare.com/files/9666637?private_link=480fba92b3e882c4d35d', 'week_08-hw', 'tar')],
}
ALLOWED_FILE_TYPES = ['zip', 'tar', 'tar.gz', 'file']
# destfile = "data/boulder-precip.csv"'}
HOME = op.join(op.expanduser('~'))
DATA_NAME = op.join('earth-analytics', 'data')
class EarthlabData(object):
"""
Data storage and retrieval functionality for Earthlab.
Parameters
----------
path : string | None
The path where data is stored.
"""
def __init__(self, path=None):
if path is None:
path = op.join(HOME, DATA_NAME)
self.path = path
self.data_keys = list(DATA_URLS.keys())
def __repr__(self):
s = 'Available Datasets: {}'.format(self.data_keys)
return s
def get_data(self, key=None, name=None, replace=False):
"""
Retrieve the data for a given week and return its path.
This will retrieve data from the internet if it isn't already
downloaded, otherwise it will only return a path to that dataset.
Parameters
----------
key : str
The dataset to retrieve. Possible options can be found in
``self.data_keys``.
replace : bool
Whether to replace the data for this key if it is
already downloaded.
Returns
-------
path_data : str
The path to the downloaded data.
"""
if key is None:
print('Available datasets: {}'.format(
list(DATA_URLS.keys())))
elif key not in DATA_URLS:
raise ValueError("Don't understand key "
"{}\nChoose one of {}".format(
key, DATA_URLS.keys()))
else:
this_root = op.join(self.path, key)
this_data = DATA_URLS[key]
if not isinstance(this_data, list):
this_data = [this_data]
data_paths = []
for url, name, kind in this_data:
if kind not in ALLOWED_FILE_TYPES:
raise ValueError('kind must be one of {}, got {}'.format(ALLOWED_FILE_TYPES, kind))
# If kind is not 'file' it will be un-archived to a folder w/ `name`
# else create a file called `name`
this_path = download(url, os.path.join(this_root, name),
replace=replace, kind=kind,
verbose=False)
data_paths.append(this_path)
if len(data_paths) == 1:
data_paths = data_paths[0]
return data_paths
def list_files(path, depth=3):
"""
List files in a directory up to a specified depth.
Parameters
----------
path : str
A path to a folder whose contents you want to list recursively.
depth : int
The depth of files / folders you want to list inside of ``path``.
"""
if not os.path.isdir(path):
raise ValueError('path: {} is not a directory'.format(path))
depth_str_base = ' '
if not path.endswith(os.sep):
path = path + os.sep
for ii, (i_path, folders, files) in enumerate(os.walk(path)):
folder_name = op.basename(i_path)
path_wo_base = i_path.replace(path, '')
this_depth = len(path_wo_base.split('/'))
if this_depth > depth:
continue
# Define the string for this level
depth_str = depth_str_base * this_depth
print(depth_str + folder_name)
if this_depth + 1 > depth:
continue
for ifile in files:
print(depth_str + depth_str_base + ifile)