# write CAFE60 $t_x$ transports

**Date:** <br>
17 May 2022 <br>
**Background:** <br>
Issue: https://github.com/csiro-dcfp/NCI-OOD-examples/issues/1<br>
**Author(s):**<br>
Thomas Moore<br>

In [1]:
Author1 = {"name": "Thomas Moore", "affiliation": "CSIRO", "email": "thomas.moore@csiro.au", "orcid": "0000-0003-3930-1946"}

# We are using NCI OOD as platform for data processing
### OOD documentation
https://opus.nci.org.au/display/DAE/Setting+up+a+Dask+Cluster+on+OOD

## Import packages

In [2]:
import xarray as xr
import numpy as np
import xrft
import xesmf as xe
import scipy
import matplotlib.pyplot as plt
import datetime
import pandas as pd
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import os
import re
import cartopy.crs as ccrs
import proplot as pplt
from rechunker import rechunk
%config Completer.use_jedi = False

## OOD cluster

In [3]:
from dask.distributed import Client,Scheduler
from dask_jobqueue import SLURMCluster
cluster = SLURMCluster(cores=2,processes=1,memory="47GB",walltime='02:00:00')
client = Client(cluster)
cluster.scale(cores=48)

  from distributed.utils import tmpfile


In [4]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.0.128.168:43933,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Load CAFE data

## Define CAFE60 source files

In [5]:
file_ocean_month = '/g/data/xv83/dcfp/CAFE60v1/ocean_month.zarr.zip'
file_ocean_bgc_month = '/g/data/xv83/dcfp/CAFE60v1/ocean_bgc_month.zarr.zip'
file_ocean_daily = '/g/data/xv83/dcfp/CAFE60v1/ocean_daily.zarr.zip'
file_grid_spec = '/g/data/xv83/users/tm4888/data/CAFE/grid_spec.auscom.20110118.nc'
file_grid_info = '/g/data/xv83/users/tm4888/data/CAFE/CAFE60_ocean_grid_info.zarr'

## Load data objects including grid information

In [6]:
%%time
ds_CAFE60_ocean_month = xr.open_zarr(file_ocean_month,consolidated=True)
ds_CAFE60_ocean_bgc_month = xr.open_zarr(file_ocean_bgc_month,consolidated=True)
ds_CAFE60_ocean_daily = xr.open_zarr(file_ocean_daily,consolidated=True)
ds_CAFE60_grid = xr.open_zarr(file_grid_info,consolidated=True)

CPU times: user 13 s, sys: 1.96 s, total: 15 s
Wall time: 15 s


# Data reduction by region & ensemble & period

## Define region & ensemble & period

In [7]:
ens1 = 23
t1 = '2000-01-16' ; t2 = '2018-12-31'
x1=-250.0 ; x2=-70.0
y1=-25.0 ; y2=25.0
reduced_Tx_dict = {'ensemble':ens1,'time':slice(t1,t2),'xu_ocean':slice(x1,x2),'yt_ocean':slice(y1,y2)}

## Reduce data and report new size

In [8]:
tx_trans_reduced = ds_CAFE60_ocean_month.tx_trans.sel(reduced_Tx_dict)

In [9]:
print('reduced data is ' + str(tx_trans_reduced.nbytes/1e9) + 'GB')

reduced data is 0.907896GB


## Resize new, smaller chunks to be closer to 100MB
### From 3876 chunks to 12

In [10]:
tx_trans_reduced

Unnamed: 0,Array,Chunk
Bytes,865.84 MiB,233.32 kiB
Shape,"(228, 50, 110, 181)","(1, 3, 110, 181)"
Count,16304 Tasks,3876 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 865.84 MiB 233.32 kiB Shape (228, 50, 110, 181) (1, 3, 110, 181) Count 16304 Tasks 3876 Chunks Type float32 numpy.ndarray",228  1  181  110  50,

Unnamed: 0,Array,Chunk
Bytes,865.84 MiB,233.32 kiB
Shape,"(228, 50, 110, 181)","(1, 3, 110, 181)"
Count,16304 Tasks,3876 Chunks
Type,float32,numpy.ndarray


### rechunk to chunk in space but include all depth and time in each chunk

In [11]:
tx_trans_reduced_unchunk_time_depth = tx_trans_reduced.chunk({'time':None,'st_ocean':None,'yt_ocean':50,'xu_ocean':50})

In [12]:
tx_trans_reduced_unchunk_time_depth

Unnamed: 0,Array,Chunk
Bytes,865.84 MiB,108.72 MiB
Shape,"(228, 50, 110, 181)","(228, 50, 50, 50)"
Count,16407 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 865.84 MiB 108.72 MiB Shape (228, 50, 110, 181) (228, 50, 50, 50) Count 16407 Tasks 12 Chunks Type float32 numpy.ndarray",228  1  181  110  50,

Unnamed: 0,Array,Chunk
Bytes,865.84 MiB,108.72 MiB
Shape,"(228, 50, 110, 181)","(228, 50, 50, 50)"
Count,16407 Tasks,12 Chunks
Type,float32,numpy.ndarray


## Write to `zarr` 

In [None]:
%%time
tx_trans_reduced_unchunk_time_depth.to_dataset().to_zarr('/g/data/xv83/users/tm4888/data/CAFE/CAFE60_Tx_region.zarr')

# Sanity check

## load back in and make some plots

In [None]:
DS = xr.open_zarr('/g/data/xv83/users/tm4888/data/CAFE/CAFE60_Tx_region.zarr')

In [None]:
DS 

In [None]:
%%time
DS.tx_trans.isel(time=1).sel({'xu_ocean':-180},method='nearest').plot(size=10)
plt.gca().invert_yaxis()
ax = plt.gca()
ax.set_facecolor('grey')
plt.tight_layout()
plt.draw()

# trying a diffferent selection that fits chunking

In [62]:
t1 = '2000-01-16'
t2 = '2000-08-16'
depth1 = 0
depth2 = 30
reduced_Tx_dict_2 = {'st_ocean':slice(depth1,depth2),'time':slice(t1,t2)}

In [63]:
tx_trans_reduced_2 = ds_CAFE60_ocean_month.tx_trans.sel(reduced_Tx_dict_2)

In [64]:
print('reduced data is ' + str(tx_trans_reduced_2.nbytes/1e9) + 'GB')

reduced data is 0.995328GB


In [65]:
%%time
tx_trans_reduced_2 = tx_trans_reduced_2.compute()

CPU times: user 2.16 s, sys: 1.93 s, total: 4.08 s
Wall time: 13.5 s


# $ The\ End$

## Break glass in case of emergency $\Downarrow$

In [None]:
client.restart()

In [None]:
client.shutdown()