In [1]:
cd ..

/raid/localscratch/qfebvre/oceanbench


In [2]:
from omegaconf import OmegaConf
import hydra
import xarray as xr

import oceanbench._src.geoprocessing.validation as geoval

In [3]:
import importlib
importlib.reload(geoval)

<module 'oceanbench._src.geoprocessing.validation' from '/raid/localscratch/qfebvre/oceanbench/oceanbench/_src/geoprocessing/validation.py'>

In [4]:
raw_natl = xr.open_dataset('../sla-data-registry/NATL60/NATL/ref_new/NATL60-CJM165_NATL_ssh_y2013.1y.nc')

In [5]:
raw_natl

## Preprocessing steps
- [ ] set time units "seconds since 2012-10-01"
- [ ] decode times to datetime
- [ ] add longitude and latitude units
- [ ] add sea surface height unit
- [ ] select domain


**Decode time**

In [6]:
print('Before: ', raw_natl.time)
print()
natl = geoval.decode_cf_time(raw_natl, units="seconds since 2012-10-01")
print('After: ', natl.time)
                      

Before:  <xarray.DataArray 'time' (time: 365)>
array([   43200.,   129600.,   216000., ..., 31320000., 31406400., 31492800.])
Coordinates:
  * time     (time) float64 4.32e+04 1.296e+05 2.16e+05 ... 3.141e+07 3.149e+07

After:  <xarray.DataArray 'time' (time: 365)>
array(['2012-10-01T12:00:00.000000000', '2012-10-02T12:00:00.000000000',
       '2012-10-03T12:00:00.000000000', ..., '2013-09-28T12:00:00.000000000',
       '2013-09-29T12:00:00.000000000', '2013-09-30T12:00:00.000000000'],
      dtype='datetime64[ns]')
Coordinates:
  * time     (time) datetime64[ns] 2012-10-01T12:00:00 ... 2013-09-30T12:00:00


**Validate lat lon coordinates**

In [7]:
print('Before: ', natl.lon.attrs, natl.lat.attrs)
print()
natl = geoval.validate_latlon(natl)
print('After: ', natl.lon.attrs, natl.lat.attrs)


Before:  {} {}

After:  {'units': 'degrees_east', 'standard_name': 'longitude', 'long_name': 'Longitude'} {'units': 'degrees_north', 'standard_name': 'latitude', 'long_name': 'Latitude'}


**Validate ssh variable**

In [8]:
print('Before: ', natl.ssh.attrs)
print()
natl = geoval.validate_ssh(natl)
print('After: ', natl.ssh.attrs)



Before:  {}

After:  {'units': 'm', 'standard_name': 'sea_surface_height', 'long_name': 'Sea Surface Height'}


In [9]:
print('Before: ', natl.dims)
print()
final_natl = natl.sel(lat=slice(32, 44), lon=slice(-66, -54), time=slice('2013-01-10', '2013-03-10'))
print('After: ', final_natl.dims)


Before:  Frozen({'time': 365, 'lat': 781, 'lon': 1721})

After:  Frozen({'time': 60, 'lat': 240, 'lon': 240})


In [10]:
final_natl

## Using configuration for processing

In [11]:
import yaml
from IPython.display import Markdown, display

def disp_config(cfg):
    display(Markdown("""```yaml\n""" +yaml.dump(OmegaConf.to_container(cfg), default_flow_style=None, indent=2)+"""\n```"""))

In [12]:
data_cfg = OmegaConf.load('config/data/gridded.yaml')
data = hydra.utils.call(data_cfg)
disp_config(data_cfg)


```yaml
domain:
  lat:
    _args_: [32.0, 44.0]
    _target_: builtins.slice
  lon:
    _args_: [-66.0, -54.0]
    _target_: builtins.slice
  time:
    _args_: ['2013-01-10', '2013-03-10']
    _target_: builtins.slice
geoval: oceanbench._src.geoprocessing.validation
natl:
  _partial_: true
  _target_: oceanbench._src.data.pipe
  fns:
  - {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
  - {_partial_: true, _target_: '${geoval}.decode_cf_time', units: seconds since 2012-10-01}
  - {_partial_: true, _target_: '${geoval}.validate_latlon'}
  - {_partial_: true, _target_: '${geoval}.validate_time'}
  - {_partial_: true, _target_: '${geoval}.validate_ssh'}
  - {_partial_: true, _target_: xarray.Dataset.sel, indexers: '${domain}'}
  inp: ${registry}/NATL60/NATL/ref_new/NATL60-CJM165_NATL_ssh_y2013.1y.nc
obs:
  _partial_: true
  _target_: oceanbench._src.data.pipe
  fns:
  - {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
  - {_partial_: true, _target_: '${geoval}.decode_cf_time', units: 'days since 2012-10-01
      12:00:00'}
  - {_partial_: true, _target_: '${geoval}.validate_latlon'}
  - {_partial_: true, _target_: '${geoval}.validate_time'}
  - {_partial_: true, _target_: xarray.Dataset.sel, indexers: '${domain}'}
  inp: ${registry}/NATL60/NATL/data_new/dataset_nadir_0d.nc
oi:
  _partial_: true
  _target_: oceanbench._src.data.pipe
  fns:
  - {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
  - {_partial_: true, _target_: '${geoval}.decode_cf_time', units: 'days since 2012-10-01
      12:00:00'}
  - {_partial_: true, _target_: '${geoval}.validate_latlon'}
  - {_partial_: true, _target_: '${geoval}.validate_time'}
  - {_partial_: true, _target_: xarray.Dataset.sel, indexers: '${domain}'}
  inp: ${registry}/NATL60/NATL/oi/ssh_NATL60_4nadir.nc
registry: ../sla-data-registry

```

In [13]:
key = 'natl'
OmegaConf.resolve(data_cfg[key])
disp_config(data_cfg[key])
data[key]()

```yaml
_partial_: true
_target_: oceanbench._src.data.pipe
fns:
- {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.decode_cf_time,
  units: seconds since 2012-10-01}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_latlon}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_time}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_ssh}
- _partial_: true
  _target_: xarray.Dataset.sel
  indexers:
    lat:
      _args_: [32.0, 44.0]
      _target_: builtins.slice
    lon:
      _args_: [-66.0, -54.0]
      _target_: builtins.slice
    time:
      _args_: ['2013-01-10', '2013-03-10']
      _target_: builtins.slice
inp: ../sla-data-registry/NATL60/NATL/ref_new/NATL60-CJM165_NATL_ssh_y2013.1y.nc

```

In [14]:
key = 'oi'
OmegaConf.resolve(data_cfg[key])
disp_config(data_cfg[key])
data[key]()

```yaml
_partial_: true
_target_: oceanbench._src.data.pipe
fns:
- {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.decode_cf_time,
  units: 'days since 2012-10-01 12:00:00'}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_latlon}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_time}
- _partial_: true
  _target_: xarray.Dataset.sel
  indexers:
    lat:
      _args_: [32.0, 44.0]
      _target_: builtins.slice
    lon:
      _args_: [-66.0, -54.0]
      _target_: builtins.slice
    time:
      _args_: ['2013-01-10', '2013-03-10']
      _target_: builtins.slice
inp: ../sla-data-registry/NATL60/NATL/oi/ssh_NATL60_4nadir.nc

```

In [15]:
key = 'obs'
OmegaConf.resolve(data_cfg[key])
disp_config(data_cfg[key])
data[key]()

```yaml
_partial_: true
_target_: oceanbench._src.data.pipe
fns:
- {_partial_: true, _target_: xarray.open_dataset, decode_times: false}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.decode_cf_time,
  units: 'days since 2012-10-01 12:00:00'}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_latlon}
- {_partial_: true, _target_: oceanbench._src.geoprocessing.validation.validate_time}
- _partial_: true
  _target_: xarray.Dataset.sel
  indexers:
    lat:
      _args_: [32.0, 44.0]
      _target_: builtins.slice
    lon:
      _args_: [-66.0, -54.0]
      _target_: builtins.slice
    time:
      _args_: ['2013-01-10', '2013-03-10']
      _target_: builtins.slice
inp: ../sla-data-registry/NATL60/NATL/data_new/dataset_nadir_0d.nc

```

In [16]:
hydra.utils.call(data).natl

functools.partial(<function pipe at 0x7f8d5ffc1480>, inp='../sla-data-registry/NATL60/NATL/ref_new/NATL60-CJM165_NATL_ssh_y2013.1y.nc', fns=[functools.partial(<function open_dataset at 0x7f8d6a8fedd0>, decode_times=False), functools.partial(<function decode_cf_time at 0x7f8d6a921c60>, units='seconds since 2012-10-01'), functools.partial(<function validate_latlon at 0x7f8d6a922200>), functools.partial(<function validate_time at 0x7f8d6a921d80>), functools.partial(<function validate_ssh at 0x7f8d6a921ea0>), functools.partial(<function Dataset.sel at 0x7f8d6ae05480>, indexers={'lat': slice(32.0, 44.0, None), 'lon': slice(-66.0, -54.0, None), 'time': slice('2013-01-10', '2013-03-10', None)})])

In [17]:
hydra.utils.call(data_cfg).oi()