In [1]:
import iris
import numpy as np
from jade_utils.iris_tools.padded_orthogonal_merge import padded_orthogonal_merge
from jade_utils.data_tools import Loader

myloader = Loader()
fs = myloader.list_files("mogreps-g")
fs = fs[:20]
print ('%s files for example : %s'%(len(fs), fs[0]))

20 files for example : /usr/local/share/notebooks/data/mogreps-g/201612/prods_op_mogreps-g_20161203_00_00_003.pp


In [23]:
def human_bytes(num, suffix='B'):
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

In [2]:
import dask
from dask import bag as db
import os

class Error(object):
    def __init__(self, msg, err):
        self.msg = msg
        self.err = err
    def __str__(self):
        return ('Error(%s, %s)' % ( self.msg, self.err))
    
def load_cubes(filepath, constraint):
    if not os.path.exists(filepath):
        filepath = '/usr/local/share/notebooks/data' + filepath.split('/data')[1]
    try:
        return iris.load_raw(filepath, constraint, callback=add_realisation)
    except Exception as e:
        return [Error(str(e), e)]
    
def realization_from_filename(filename):
    return int(os.path.basename(filename).split('_')[-2])

def add_realisation(cube, field, filename):
    # have we got a realization attribute?
    try:
        realization_coord = cube.coord('realization')
    except iris.exceptions.CoordinateNotFoundError:
        realization = realization_from_filename(filename)
        cube.add_aux_coord(iris.coords.AuxCoord(realization, standard_name='realization', units='1'))
        
def not_an_error(thing):
    return not isinstance(thing, Error)

        
@dask.delayed
def delayed_load(filename, constraint):
    return load_cubes(filename, constraint)


stash_constraint = iris.Constraint(cube_func = lambda c: c.attributes['STASH'].item == 256)
load_bag = db.from_delayed([delayed_load(f, stash_constraint) for f in fs])
cubes = iris.cube.CubeList(load_bag.compute())



In [3]:
iris.FUTURE.netcdf_no_unlimited

False

In [4]:
mega_cube = padded_orthogonal_merge(cubes, ['time', 'pressure','realization'])

In [5]:
mega_cube

<iris 'Cube' of relative_humidity / (%) (time: 21; pressure: 16; latitude: 600; longitude: 800)>

In [7]:
iris.fileformats.pp.save(mega_cube, '/tmp/cube.pp')

/opt/conda/lib/python3.5/site-packages/iris/fileformats/pp.py:2292: IrisDeprecation: as_pairs is deprecated in v1.10; please use save_pairs_from_cube instead.
  warn_deprecated('as_pairs is deprecated in v1.10; please use'


In [19]:
import os

In [26]:
human_bytes(sum([os.stat(origfile).st_size for origfile in fs]))

'1.7GiB'

In [24]:
human_bytes(os.stat('/tmp/cube.pp').st_size)

'615.3MiB'

In [12]:
iris.fileformats.netcdf.save(mega_cube, '/tmp/cube_zlib_shuffle.nc', zlib=True, shuffle=True)

/opt/conda/lib/python3.5/site-packages/iris/fileformats/netcdf.py:2026: IrisDeprecation: NetCDF default saving behaviour currently assigns the outermost dimensions to unlimited. This behaviour is to be deprecated, in favour of no automatic assignment. To switch to the new behaviour, set iris.FUTURE.netcdf_no_unlimited to True.
  warn_deprecated(msg)


In [27]:
human_bytes(os.stat('/tmp/cube_zlib_shuffle.nc').st_size)

'136.2MiB'

In [15]:
iris.fileformats.netcdf.save(mega_cube, '/tmp/cube_zlib_noshuffle.nc', zlib=True, shuffle=False)

/opt/conda/lib/python3.5/site-packages/iris/fileformats/netcdf.py:2026: IrisDeprecation: NetCDF default saving behaviour currently assigns the outermost dimensions to unlimited. This behaviour is to be deprecated, in favour of no automatic assignment. To switch to the new behaviour, set iris.FUTURE.netcdf_no_unlimited to True.
  warn_deprecated(msg)


In [28]:
human_bytes(os.stat('/tmp/cube_zlib_noshuffle.nc').st_size)

'190.9MiB'

In [17]:
iris.fileformats.netcdf.save(mega_cube, '/tmp/cube_nozlib_noshuffle.nc', zlib=False, shuffle=False)

/opt/conda/lib/python3.5/site-packages/iris/fileformats/netcdf.py:2026: IrisDeprecation: NetCDF default saving behaviour currently assigns the outermost dimensions to unlimited. This behaviour is to be deprecated, in favour of no automatic assignment. To switch to the new behaviour, set iris.FUTURE.netcdf_no_unlimited to True.
  warn_deprecated(msg)


In [29]:
human_bytes(os.stat('/tmp/cube_nozlib_noshuffle.nc').st_size)

'692.2MiB'