In [1]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:45223")
client

0,1
Connection method: Direct,
Dashboard: http://127.0.0.1:8787/status,

0,1
Comm: tcp://127.0.0.1:45223,Workers: 3
Dashboard: http://127.0.0.1:8787/status,Total threads: 6
Started: 3 hours ago,Total memory: 9.23 GiB

0,1
Comm: tcp://127.0.0.1:41611,Total threads: 2
Dashboard: http://127.0.0.1:38515/status,Memory: 3.08 GiB
Nanny: tcp://127.0.0.1:32953,
Local directory: /tmp/dask-worker-space/worker-0ld0ksnh,Local directory: /tmp/dask-worker-space/worker-0ld0ksnh
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 2.0%,Last seen: Just now
Memory usage: 153.88 MiB,Spilled bytes: 0 B
Read bytes: 39.00 kiB,Write bytes: 39.00 kiB

0,1
Comm: tcp://127.0.0.1:36217,Total threads: 2
Dashboard: http://127.0.0.1:45155/status,Memory: 3.08 GiB
Nanny: tcp://127.0.0.1:45019,
Local directory: /tmp/dask-worker-space/worker-3rbuxsbd,Local directory: /tmp/dask-worker-space/worker-3rbuxsbd
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 4.0%,Last seen: Just now
Memory usage: 474.01 MiB,Spilled bytes: 0 B
Read bytes: 46.81 kiB,Write bytes: 46.81 kiB

0,1
Comm: tcp://127.0.0.1:41713,Total threads: 2
Dashboard: http://127.0.0.1:45579/status,Memory: 3.08 GiB
Nanny: tcp://127.0.0.1:34063,
Local directory: /tmp/dask-worker-space/worker-x74rwy5h,Local directory: /tmp/dask-worker-space/worker-x74rwy5h
Tasks executing: 0,Tasks in memory: 0
Tasks ready: 0,Tasks in flight: 0
CPU usage: 4.0%,Last seen: Just now
Memory usage: 470.59 MiB,Spilled bytes: 0 B
Read bytes: 50.11 kiB,Write bytes: 50.11 kiB


In [2]:
from pathlib import Path

import click
import numpy as np
import pandas as pd
import xarray as xr
import yaml
from wam2layers.analysis.checks import check_input
from wam2layers.preprocessing.shared import (accumulation_to_flux,
                                                    calculate_humidity,
                                                    insert_level, interpolate,
                                                    sortby_ndarray)

In [3]:
from wam2layers.preprocessing.era5 import parse_config, preprocess_precip_and_evap, load_data, get_dp_modellevels, get_edges

In [4]:
config = parse_config('/home/peter/WAM2layers/cases/era5_global.yaml')

In [5]:
date = config['datelist'][0]
date

Timestamp('2021-07-01 00:00:00', freq='D')

In [19]:
# 4d fields
levels = config["levels"]

q = load_data("q", date, config)  # in kg kg-1
u = load_data("u", date, config)  # in m/s
v = load_data("v", date, config)  # in m/s
sp = load_data("sp", date, config)  # in Pa

dp = get_dp_modellevels(sp, levels)
# dp = xr.map_blocks(lambda x: get_dp_modellevels(x, levels), sp, template=q)
# dp = xr.apply_ufunc(get_dp_modellevels, sp, levels, dask='allowed')

# Calculate column water vapour
g = 9.80665  # gravitational accelleration [m/s2]
cwv = q * dp / g  # (kg/m2)

# possible correct with tcw
cw = cwv

# Calculate fluxes
fx = u * cw  # eastward atmospheric moisture flux (kg m-1 s-1)
fy = v * cw  # northward atmospheric moisture flux (kg m-1 s-1)

# Vertically integrate over two layers
boundary = 111
idx = dp.level.searchsorted(boundary, side='right')
upper = np.s_[:, :idx, :, :]
lower = np.s_[:, idx:, :, :]

s_lower = cw[lower].sum(dim="level")
s_upper = cw[upper].sum(dim="level")

fx_lower = fx[lower].sum(dim="level")  # kg m-1 s-1
fy_lower = fy[lower].sum(dim="level")  # kg m-1 s-1

fx_upper = fx[upper].sum(dim="level")  # kg m-1 s-1
fy_upper = fy[upper].sum(dim="level")  # kg m-1 s-1

# Load and preprocess precip and evap
precip, evap = preprocess_precip_and_evap(date, config)

# Combine everything into one dataset
ds = xr.Dataset(
    {
        "fx_upper": fx_upper.assign_attrs(units="kg m-1 s-1"),
        "fy_upper": fy_upper.assign_attrs(units="kg m-1 s-1"),
        "fx_lower": fx_lower.assign_attrs(units="kg m-1 s-1"),
        "fy_lower": fy_lower.assign_attrs(units="kg m-1 s-1"),
        "s_upper": s_upper.assign_attrs(units="kg m-2"),
        "s_lower": s_lower.assign_attrs(units="kg m-2"),
        "evap": evap,
        "precip": precip,
    }
)
ds

/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_ml_q.nc
/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_ml_u.nc
/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_ml_v.nc
/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_sp.nc
/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_e.nc
/home/peter/WAM2layers/era5_global/2021/07/ERA5_2021-07-01_tp.nc


Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 18 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 18 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 18 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 18 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,18 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,16 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 16 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,16 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,16 Graph Layers,24 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 169.01 MiB 7.04 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 16 Graph Layers 24 Chunks Type float64 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,169.01 MiB,7.04 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,16 Graph Layers,24 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,84.51 MiB,3.52 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,5 Graph Layers,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 84.51 MiB 3.52 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 5 Graph Layers 24 Chunks Type float32 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,84.51 MiB,3.52 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,5 Graph Layers,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,84.51 MiB,3.52 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,7 Graph Layers,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 84.51 MiB 3.52 MiB Shape (24, 641, 1440) (1, 641, 1440) Count 7 Graph Layers 24 Chunks Type float32 numpy.ndarray",1440  641  24,

Unnamed: 0,Array,Chunk
Bytes,84.51 MiB,3.52 MiB
Shape,"(24, 641, 1440)","(1, 641, 1440)"
Count,7 Graph Layers,24 Chunks
Type,float32,numpy.ndarray


In [20]:
ds.to_netcdf('/home/peter/WAM2layers/test1.nc')

In [21]:
display(s_upper.data.visualize())

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [6]:
da = xr.map_blocks(lambda x: get_dp_modellevels(x, levels), sp, template=q)
display(da.data.visualize())

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [18]:
import dask.array as da
a = xr.DataArray(da.random.random((10, 11, 12), chunks=(1, 11, 12)), dims=['time', 'lat', 'lon'])
b = xr.DataArray(da.random.random(5), dims=['lev'])
c = a * b
display(c.data.visualize())

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

In [24]:
import dask.array as da
a = xr.DataArray(da.random.random((10, 11, 12), chunks=(1, 11, 12)), dims=['time', 'lat', 'lon'])
b = xr.DataArray(da.random.random(5), dims=['lev'])
c = xr.apply_ufunc(lambda a, b: a*b, a, b, dask='allowed')
display(c.data.visualize())

CytoscapeWidget(cytoscape_layout={'name': 'dagre', 'rankDir': 'BT', 'nodeSep': 10, 'edgeSep': 10, 'spacingFact…

# Notes

- Make sure all data (4d and 3d) have the same chunks. Chunks=auto will lead to different splitting for 3D and 4D
- Using `inline_array=True` in the `xr.open_dataset` call will create individual nodes for opening the data for each chunk in the task graph
- `xr.map_blocks` can be used to parallelize a function such as `get_dp_modellevels` which otherwise depends on the same levels array

In [8]:
%%time
ds.to_netcdf('/home/peter/WAM2layers/test5.nc')


KeyboardInterrupt

