Skip to content

Commit

Permalink
include nc utils
Browse files Browse the repository at this point in the history
  • Loading branch information
nilshempelmann committed Dec 2, 2018
1 parent 9bae615 commit c7df6a0
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 0 deletions.
77 changes: 77 additions & 0 deletions eggshell/nc/nc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,83 @@ def __exit__(self, exc_type, exc_val, exc_tb):
os.remove(self.auth_cookie_fn)


def opendap_or_download(resource, auth_tkt_cookie={}, output_path=None,
max_nbytes=10000000000):
"""Check for OPEnDAP support, if not download the resource.
:param resource: url of a NetCDF resource
:param output_path: where to save the non-OPEnDAP resource
:param max_nbytes: maximum file size for download, default: 1 gb
:return str: the original url if OPEnDAP is supported or path of saved file
"""

try:
nc = Dataset(resource, 'r')
nc.close()
except Exception:
response = requests.get(resource, cookies=auth_tkt_cookie, stream=True)
if response.status_code == 401:
raise Exception("Not Authorized")

if 'Content-Length' in response.headers.keys():
if int(response.headers['Content-Length']) > max_nbytes:
raise IOError("File too large to download.")
chunk_size = 16 * 1024
if not output_path:
output_path = os.getcwd()
output_file = os.path.join(output_path, os.path.basename(resource))
with open(output_file, 'wb') as f:
for chunk in response.iter_content(chunk_size):
if chunk:
f.write(chunk)
try:
nc = Dataset(output_file, 'r')
nc.close()
except:
raise IOError("This does not appear to be a valid NetCDF file.")
return output_file
return resource


def guess_main_variables(ncdataset):
"""Guess main variables in a NetCDF file.
:param ncdataset: netCDF4.Dataset
:return list: names of main variables
Notes
-----
The main variables are the one with highest dimensionality and size. The
time, lon, lat variables and variables that are defined as bounds are
automatically ignored.
"""

var_candidates = []
bnds_variables = []
for var_name in ncdataset.variables:
if var_name in ['time', 'lon', 'lat']:
continue
ncvar = ncdataset.variables[var_name]
if hasattr(ncvar, 'bounds'):
bnds_variables.append(ncvar.bounds)
var_candidates.append(var_name)
var_candidates = list(set(var_candidates) - set(bnds_variables))

# Find main variables among the candidates
nd = -1
size = -1
main_variables = []
for var_name in var_candidates:
ncvar = ncdataset.variables[var_name]
if len(ncvar.shape) > nd:
main_variables = [var_name]
nd = len(ncvar.shape)
size = ncvar.size
elif (len(ncvar.shape) == nd) and (ncvar.size > size):
main_variables = [var_name]
size = ncvar.size
elif (len(ncvar.shape) == nd) and (ncvar.size == size):
main_variables.append(var_name)
return main_variables


# def get_calendar(resource, variable=None):
# """
# returns the calendar and units in wich the timestamps are stored
Expand Down
6 changes: 6 additions & 0 deletions eggshell/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -*- coding: utf-8 -*-

"""Utils subpackage of Eggshell.
Utils contains general utillities.
"""

0 comments on commit c7df6a0

Please sign in to comment.