### create zarr from FV3 restart files

##### post-processes the execution of an FV3 timestep(s) into a zarr data store

Brian Henn, VCM, October 2019

In [104]:
import xarray as xr
import zarr
from os.path import join
import pandas as pd

In [24]:
run_dir = '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir'
restart_dir = 'RESTART'
input_dir = 'INPUT'

In [72]:
restart_only_prefixes = ['fv_core.res', 'fv_sfc_wind.res', 'fv_tracer.res', 'sfc_data']
restart_path_prefixes = [join(restart_dir, restart_prefix) for restart_prefix in restart_only_prefixes]
output_only_prefixes = ['phy_data']
input_only_prefixes = ['grid_spec', 'oro_data']
input_path_prefixes = [join(input_dir, input_prefix) for input_prefix in input_only_prefixes]
TILES = range(1,7)
tile_suffixes = [f".tile{tile}.nc" for tile in TILES]
# tile = pd.Index(TILES, name='tile')

In [61]:
tile_suffixes

['.tile1.nc', '.tile2.nc', '.tile3.nc', '.tile4.nc', '.tile5.nc', '.tile6.nc']

In [62]:
grid = xr.open_mfdataset(paths=[join(run_dir, input_path_prefixes[0] + tile_suffix) for tile_suffix in tile_suffixes], concat_dim='tile', combine='nested')

In [95]:
grid

<xarray.Dataset>
Dimensions:   (nx: 96, nxp: 97, ny: 96, nyp: 97, tile: 6)
Dimensions without coordinates: nx, nxp, ny, nyp, tile
Data variables:
    x         (tile, nyp, nxp) float32 dask.array<shape=(6, 97, 97), chunksize=(1, 97, 97)>
    y         (tile, nyp, nxp) float32 dask.array<shape=(6, 97, 97), chunksize=(1, 97, 97)>
    dx        (tile, nyp, nx) float32 dask.array<shape=(6, 97, 96), chunksize=(1, 97, 96)>
    dy        (tile, ny, nxp) float32 dask.array<shape=(6, 96, 97), chunksize=(1, 96, 97)>
    area      (tile, ny, nx) float32 dask.array<shape=(6, 96, 96), chunksize=(1, 96, 96)>
    angle_dx  (tile, nyp, nxp) float32 dask.array<shape=(6, 97, 97), chunksize=(1, 97, 97)>
    angle_dy  (tile, nyp, nxp) float32 dask.array<shape=(6, 97, 97), chunksize=(1, 97, 97)>

In [108]:
def grid_spec_centers_only(ds: xr.Dataset, dims: list) -> xr.Dataset:
    ds_out = ds.copy()
    for dim in dims:
        ds = ds.isel(dim=[1::2])
    return ds

SyntaxError: invalid syntax (<ipython-input-108-c56019a835c8>, line 4)

In [100]:
grid.x[:, 0:-1:2, 0:-1:2]

<xarray.DataArray 'x' (tile: 6, nyp: 48, nxp: 48)>
dask.array<shape=(6, 48, 48), dtype=float32, chunksize=(1, 48, 48)>
Dimensions without coordinates: tile, nyp, nxp

In [105]:
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.collections import PolyCollection, QuadMesh
from matplotlib.colors import Normalize

In [None]:
def construct_polygons(ds):
    coords = ds[['grid_lon', 'grid_lat']]
    
    ll = coords.isel(
        grid_x=slice(None, -1),
        grid_y=slice(None, -1), 
        drop=True
    ).to_array().rename('ll')
    
    ul = coords.isel(
        grid_x=slice(1, None),
        grid_y=slice(None, -1), 
        drop=True
    ).to_array().rename('ul')

    lr = coords.isel(
        grid_x=slice(None, -1),
        grid_y=slice(1, None), 
        drop=True
    ).to_array().rename('lr')
    
    ur = coords.isel(
        grid_x=slice(1, None),
        grid_y=slice(1, None), 
        drop=True
    ).to_array().rename('ur')

    polygons = xr.merge(
        [ll, ul, ur, lr, ll.rename('ll_cyclic')]
    ).to_array(dim='corner')
    polygons = polygons.stack(polygon=('grid_x', 'grid_y', 'tile'))
    return polygons.transpose('polygon', 'corner', 'variable')

In [74]:
core = xr.open_mfdataset(paths=[join(run_dir, restart_path_prefixes[0] + tile_suffix) for tile_suffix in tile_suffixes], concat_dim='tile', combine='nested')

In [77]:
core

<xarray.Dataset>
Dimensions:  (Time: 1, tile: 6, xaxis_1: 48, xaxis_2: 49, yaxis_1: 49, yaxis_2: 48, zaxis_1: 79)
Coordinates:
  * xaxis_1  (xaxis_1) float64 1.0 2.0 3.0 4.0 5.0 ... 44.0 45.0 46.0 47.0 48.0
  * xaxis_2  (xaxis_2) float64 1.0 2.0 3.0 4.0 5.0 ... 45.0 46.0 47.0 48.0 49.0
  * yaxis_1  (yaxis_1) float64 1.0 2.0 3.0 4.0 5.0 ... 45.0 46.0 47.0 48.0 49.0
  * yaxis_2  (yaxis_2) float64 1.0 2.0 3.0 4.0 5.0 ... 44.0 45.0 46.0 47.0 48.0
  * zaxis_1  (zaxis_1) float64 1.0 2.0 3.0 4.0 5.0 ... 75.0 76.0 77.0 78.0 79.0
  * Time     (Time) float64 1.0
Dimensions without coordinates: tile
Data variables:
    u        (tile, Time, zaxis_1, yaxis_1, xaxis_1) float64 dask.array<shape=(6, 1, 79, 49, 48), chunksize=(1, 1, 79, 49, 48)>
    v        (tile, Time, zaxis_1, yaxis_2, xaxis_2) float64 dask.array<shape=(6, 1, 79, 48, 49), chunksize=(1, 1, 79, 48, 49)>
    W        (tile, Time, zaxis_1, yaxis_2, xaxis_1) float64 dask.array<shape=(6, 1, 79, 48, 48), chunksize=(1, 1, 79, 48, 48)>
    

In [84]:
data=core.isel(Time=0)['T'].load().stack(polygon=('xaxis_1', 'yaxis_2', 'tile'))

In [85]:
data

<xarray.DataArray 'T' (zaxis_1: 79, polygon: 13824)>
array([[232.60993 , 244.961451, 255.396593, ..., 233.493777, 232.975132,
        244.943228],
       [220.874213, 234.881386, 233.444895, ..., 228.595111, 221.414198,
        234.94986 ],
       [217.050662, 229.998765, 227.924566, ..., 225.597191, 217.443364,
        229.709051],
       ...,
       [285.999075, 289.986722, 300.547645, ..., 287.81766 , 285.817286,
        289.593157],
       [286.04741 , 290.395704, 300.977414, ..., 288.181539, 285.97295 ,
        289.990237],
       [286.120552, 290.754627, 301.353941, ..., 288.484815, 286.181433,
        290.329979]])
Coordinates:
  * zaxis_1  (zaxis_1) float64 1.0 2.0 3.0 4.0 5.0 ... 75.0 76.0 77.0 78.0 79.0
    Time     float64 1.0
  * polygon  (polygon) MultiIndex
  - xaxis_1  (polygon) float64 1.0 1.0 1.0 1.0 1.0 ... 48.0 48.0 48.0 48.0 48.0
  - yaxis_2  (polygon) float64 1.0 1.0 1.0 1.0 1.0 ... 48.0 48.0 48.0 48.0 48.0
  - tile     (polygon) int64 0 1 2 3 4 5 0 1 2 3 4 5 ... 0

In [64]:
[join(run_dir, input_path_prefixes[1] + tile_suffix) for tile_suffix in tile_suffixes]

['/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile1.nc',
 '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile2.nc',
 '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile3.nc',
 '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile4.nc',
 '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile5.nc',
 '/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/oro_data.tile6.nc']

In [69]:
xr.open_dataset('/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/fv_core.res.tile1.nc')

<xarray.Dataset>
Dimensions:  (Time: 1, xaxis_1: 48, xaxis_2: 49, yaxis_1: 49, yaxis_2: 48, zaxis_1: 79)
Coordinates:
  * xaxis_1  (xaxis_1) float32 1.0 2.0 3.0 4.0 5.0 ... 44.0 45.0 46.0 47.0 48.0
  * Time     (Time) float32 1.0
  * zaxis_1  (zaxis_1) float32 1.0 2.0 3.0 4.0 5.0 ... 75.0 76.0 77.0 78.0 79.0
  * yaxis_2  (yaxis_2) float32 1.0 2.0 3.0 4.0 5.0 ... 44.0 45.0 46.0 47.0 48.0
  * yaxis_1  (yaxis_1) float32 1.0 2.0 3.0 4.0 5.0 ... 45.0 46.0 47.0 48.0 49.0
  * xaxis_2  (xaxis_2) float32 1.0 2.0 3.0 4.0 5.0 ... 45.0 46.0 47.0 48.0 49.0
Data variables:
    phis     (Time, yaxis_2, xaxis_1) float32 ...
    delp     (Time, zaxis_1, yaxis_2, xaxis_1) float32 ...
    DZ       (Time, zaxis_1, yaxis_2, xaxis_1) float32 ...
    W        (Time, zaxis_1, yaxis_2, xaxis_1) float32 ...
    T        (Time, zaxis_1, yaxis_2, xaxis_1) float32 ...
    u        (Time, zaxis_1, yaxis_1, xaxis_1) float32 ...
    v        (Time, zaxis_1, yaxis_2, xaxis_2) float32 ...

In [67]:
xr.open_dataset('/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/grid_spec.tile1.nc')

<xarray.Dataset>
Dimensions:   (nx: 96, nxp: 97, ny: 96, nyp: 97)
Dimensions without coordinates: nx, nxp, ny, nyp
Data variables:
    x         (nyp, nxp) float32 ...
    y         (nyp, nxp) float32 ...
    dx        (nyp, nx) float32 ...
    dy        (ny, nxp) float32 ...
    area      (ny, nx) float32 ...
    angle_dx  (nyp, nxp) float32 ...
    angle_dy  (nyp, nxp) float32 ...

In [68]:
xr.open_dataset('/home/brianh/dev/fv3net/data/restart/C48/20160805.170000/rundir/INPUT/grid_spec.tile2.nc')

<xarray.Dataset>
Dimensions:   (nx: 96, nxp: 97, ny: 96, nyp: 97)
Dimensions without coordinates: nx, nxp, ny, nyp
Data variables:
    x         (nyp, nxp) float32 ...
    y         (nyp, nxp) float32 ...
    dx        (nyp, nx) float32 ...
    dy        (ny, nxp) float32 ...
    area      (ny, nx) float32 ...
    angle_dx  (nyp, nxp) float32 ...
    angle_dy  (nyp, nxp) float32 ...