Skip to content

Commit

Permalink
optimise nc.ocg_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
nilshempelmann committed Dec 7, 2018
1 parent e2457f7 commit 72c3702
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 49 deletions.
38 changes: 35 additions & 3 deletions eggshell/nc/ocg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,47 @@ def call(resource=[], variable=None, dimension_map=None, agg_selection=True, cal
LOGGER.exception('failed to setup OcgOperations: {}'.format(ex))
return None

#TODO include comaprison dataload to available memory

dataload = 1
available_memory = 2

try:
LOGGER.info('ocgis module call as ops.execute()')
geom_file = ops.execute()
if dataload < available_memory: # compare dataload to free_memory
LOGGER.info('ocgis module call as ops.execute()')
geom_file = ops.execute()
else:
LOGGER.info('ocgis module call as compute(ops)')
# TODO: estimate right tile_dimensionS
tile_dimension = 5 # default
LOGGER.info('ocgis module call compute with chunks')
if calc is None: # TODO remove this section if issue 35 is fixed
calc = '%s=%s*1' % (variable, variable)
LOGGER.info('calc set to = %s ' % calc)
ops = OcgOperations(dataset=rd,
output_format_options=output_format_options,
dir_output=dir_output,
spatial_wrapping=spatial_wrapping,
spatial_reorder=spatial_reorder,
# regrid_destination=rd_regrid,
# options=options,
calc=calc,
calc_grouping=calc_grouping,
geom=geom,
output_format=output_format,
prefix=prefix,
search_radius_mult=search_radius_mult,
select_nearest=select_nearest,
select_ugid=select_ugid,
add_auxiliary_files=False)
geom = compute(ops, tile_dimension=tile_dimension, verbose=True)

except Exception as ex:
LOGGER.exception('failed to execute ocgis operation : {}'.format(ex))
return None
return geom_file

#TODO: memory check

# try:
# from numpy import sqrt
# from flyingpigeon.utils import FreeMemory
Expand Down
94 changes: 48 additions & 46 deletions standalones/ocgis_freememory.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,82 @@
from eggshell.nc import nc_fetch
from eggshell import utils
# from eggshell.nc import nc_fetch
# from eggshell import utils

from ocgis import RequestDataset, OcgOperations, env
from ocgis.util.large_array import compute
from os import listdir
from os.path import join

env.OVERWRITE=True
from datetime import datetime as dt
import uuid

years = range(2015,2017)

ncs = []
for year in years:
url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/slp.%s.nc' % (year)
ncs.extend([utils.download_file(url)])
# years = range(2015,2017)
#
# ncs = []
# for year in years:
# url = 'https://www.esrl.noaa.gov/psd/thredds/fileServer/Datasets/ncep.reanalysis.dailyavgs/pressure/slp.%s.nc' % (year)
# ncs.extend([utils.download_file(url)])
# # print ncs

# print ncs
level_range = [700, 700]
time_range = [dt.strptime('20100315', '%Y%m%d'), dt.strptime('20111210', '%Y%m%d')]
# time_range = [dt.strptime('20100315', '%Y%m%d'), dt.strptime('20111210', '%Y%m%d')]
bbox = [-80, 20, 20, 70]


# from os import listdir
# from os.path import join
#
# p = '/home/nils/data/CMIP5/'
# ncs = [join(p , nc ) for nc in listdir(p) if ".nc" in nc ]
# ncs.sort()
p = '/home/nils/data/CORDEX/'
ncs = [join(p, nc) for nc in listdir(p) if ".nc" in nc]
ncs.sort()

#ncs = datafetch.reanalyses(start=2000, end=2003)

# TODO: BUG: ocg compute is not running if calc == None
# calc = '%s=%s*1' % (variable, variable)
#

rd = RequestDataset(ncs)

ops = OcgOperations(rd,
time_range=time_range,
# calc = '%s=%s*1' % ('slp', 'slp'),
# time_range=time_range,
calc = '%s=%s*1' % ('tas', 'tas'),
# level_range=level_range,
geom=bbox,
output_format='nc',
prefix='ocgis_module_optimisation',
dir_output='/home/nils/data/',
add_auxiliary_files=False)

# ###################################
# check free memory available somehow
from eggshell import util_functions as ufs
free_memory = ufs.FreeMemory(unit='MB')

# ###########################
# check required memory space

data_kb = ops.get_base_request_size()['total']
data_mb = data_kb / 1024.
shnip = dt.now()
geom = ops.execute()
shnap = dt.now()
duration = (shnap - shnip).total_seconds()
print("operation performed with execute in {} sec.".format(duration))
print(geom)

# ###########################
# check if half of the available memory can take the required data load
tile_dimension=5 # default

if data_mb < fm.user_free/2:
print "enough memory. data can be processed directly"
shnip = dt.now()
shnip = dt.now()
geom = compute(ops, tile_dimension=tile_dimension, verbose=True)
shnap = dt.now()
duration = (shnap - shnip).total_seconds()

geom = ops.execute()
print("operation performed with compute in {} sec.".format(duration))
print(geom)

shnap = dt.now()

print 'operation performed with execute in %s sec' % (shnap - shnip).total_seconds()
print geom
# ###################################
# check free memory available somehow
# from eggshell import util_functions as ufs
# free_memory = ufs.FreeMemory(unit='MB')
#
# # ###########################
# # check required memory space
#
# data_kb = ops.get_base_request_size()['total']
# data_mb = data_kb / 1024.
#
# # ###########################
# # check if half of the available memory can take the required data load
#
# if data_mb < fm.user_free/2:
# print "enough memory. data can be processed directly"

########################
# simulation if memory is not enough for the dataload. Than calculate in chunks
Expand All @@ -78,10 +87,3 @@
print "NOT enough memory. data will be processed in chunks"
# calculate tile dimension:
tile_dimension= 10 # TODO: needs to be calculated based on dataload and available memory

shnip = dt.now()
geom = compute(ops, tile_dimension=tile_dimension, verbose=True)
shnap = dt.now()

print 'operation performed with compute in %s sec' % (shnap - shnip ).total_seconds()
print geom

0 comments on commit 72c3702

Please sign in to comment.