In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import xarray as xr
import gfdl_utils as gu
from dask.diagnostics import ProgressBar
import os
import datetime

## Save zarr stores
Chug certain variables into a zarr store in `/work` for later use.

Notebook is separated into four sections that should be considered quasi-independent:
- **SEARCH**
    - Various functions to find specific files or variables
    - Most are dependent only on the pp directory
- **SPECIFY**
    - Give specifics of file(s) to be loaded and saved
        - `ppname` (subfolder of `pp`)
        - `out` (averaging of output: `av` or `ts`)
        - `time` (timespan to be used, usually `*` for all time)
        - `add` (name of variable (when `out==ts`) or `*` for `out==av`)
            - `add` can be a list of severable variables, so long as they are all present in `ppname`
    - Define filename for zarr store and check if it exists
        - filename : ppname.out.zarr
- **DMGET**
    - Retrieve specified file(s) from tape
- **LOAD AND SAVE**
    - Load files into `ds` (loop through variables if several specified in `add`)
    - Save to zarr store, appending to store if it already exists

In [53]:
config_id = 'ESM4_historical_D1'
pp = '/archive/oar.gfdl.cmip6/ESM4/DECK/'+config_id+'/gfdl.ncrc4-intel16-prod-openmp/pp'
# pp = '/archive/Raphael.Dussin/FMS2019.01.03_devgfdl_20210706/CM4_piControl_c192_OM4p125_v7/gfdl.ncrc4-intel18-prod-openmp/pp'

outdir = '/work/gam/zarr/'+config_id+'/'

### SEARCH

#### List pp names

In [55]:
gu.core.get_ppnames(pp)

['.dec',
 '.checkpoint',
 'ocean_daily_cmip',
 'ocean_daily_gfdl',
 'ocean_daily_1x1deg',
 'ocean_monthly',
 'ocean_monthly_1x1deg',
 'ocean_month_rho2',
 'ocean_month_rho2_1x1deg',
 'ocean_monthly_z',
 'ocean_monthly_z_1x1deg',
 'ocean_annual',
 'ocean_annual_1x1deg',
 'ocean_annual_rho2',
 'ocean_annual_rho2_1x1deg',
 'ocean_annual_z',
 'ocean_annual_z_1x1deg',
 'ocean_scalar_monthly',
 'ocean_scalar_annual',
 'ocean_Barents_opening',
 'ocean_Bering_Strait',
 'ocean_Davis_Strait',
 'ocean_Windward_Passage',
 'ocean_Denmark_Strait',
 'ocean_Drake_Passage',
 'ocean_English_Channel',
 'ocean_Faroe_Scotland',
 'ocean_Florida_Bahamas',
 'ocean_Fram_Strait',
 'ocean_Iceland_Faroe_V',
 'ocean_Iceland_Faroe_U',
 'ocean_Indonesian_Throughflow',
 'ocean_Mozambique_Channel',
 'ocean_Pacific_undercurrent',
 'ocean_Taiwan_Luzon',
 'ocean_Agulhas_section',
 'ocean_Gibraltar_Strait',
 'ocean_Iceland_Norway',
 'ocean_month_refined',
 'ocean_month_z_refined',
 'ocean_month_rho2_refined',
 'atmos_cmip

#### List all available variables

In [54]:
gu.core.get_allvars(pp)

{'ocean_daily_cmip': ['sos', 'tos', 'zos'],
 'ocean_daily_gfdl': ['omldamax', 'sossq', 'ssu', 'ssv', 'tossq'],
 'ocean_daily_1x1deg': ['omldamax', 'sos', 'sossq', 'tos', 'tossq', 'zos'],
 'ocean_monthly': ['S_diffx_2d',
  'S_diffy_2d',
  'sfdsi',
  'sob',
  'sos',
  'sossq',
  'T_adx_2d',
  'T_ady_2d',
  'tauuo',
  'tauvo',
  'T_diffx_2d',
  'T_diffy_2d',
  'temp_layer_ave',
  'tob',
  'tos',
  'tossq',
  'udml_restrat',
  'uml_restrat',
  'umo_2d',
  'ustar',
  'vdml_restrat',
  'vml_restrat',
  'vmo_2d',
  'wfo',
  'zos',
  'zossq',
  'friver',
  'heat_content_cond',
  'heat_content_fprec',
  'heat_content_massin',
  'heat_content_massout',
  'evs',
  'ficeberg',
  'heat_content_surfwater',
  'heat_content_vprec',
  'Heat_PmE',
  'hfevapds_old',
  'hfevapds',
  'mlotstmax',
  'mlotstmin',
  'mlotst',
  'mlotstsq',
  'MSTAR',
  'net_heat_coupler',
  'net_massin',
  'net_massout',
  'nonpenSW',
  'pbo',
  'prlq',
  'prsn',
  'pso',
  'rlntds',
  'rsdo',
  'rsntds',
  'S_adx_2d',
  'S_a

#### Find ppname for specific variable

In [18]:
gu.core.find_variable(pp,'thetao')

['ocean_monthly_z',
 'ocean_monthly_z_1x1deg',
 'ocean_annual',
 'ocean_annual_z',
 'ocean_annual_z_1x1deg',
 'ocean_Barents_opening',
 'ocean_Bering_Strait',
 'ocean_Davis_Strait',
 'ocean_Windward_Passage',
 'ocean_Denmark_Strait',
 'ocean_Drake_Passage',
 'ocean_English_Channel',
 'ocean_Faroe_Scotland',
 'ocean_Florida_Bahamas',
 'ocean_Fram_Strait',
 'ocean_Iceland_Faroe_V',
 'ocean_Iceland_Faroe_U',
 'ocean_Indonesian_Throughflow',
 'ocean_Mozambique_Channel',
 'ocean_Pacific_undercurrent',
 'ocean_Taiwan_Luzon',
 'ocean_Agulhas_section',
 'ocean_Gibraltar_Strait',
 'ocean_Iceland_Norway']

#### Display variables in specific ppname

In [56]:
gu.core.get_varnames(pp,'ocean_cobalt_fluxes_int')

['jaggloss_nlgp_100',
 'jaggloss_nsmp_100',
 'jexuloss_ndi_100',
 'jexuloss_nlgp_100',
 'jexuloss_nsmp_100',
 'jhploss_nlgz_100',
 'jhploss_nmdz_100',
 'jingest_n_hp_100',
 'jingest_n_nlgz_100',
 'jingest_n_nmdz_100',
 'jingest_n_nsmz_100',
 'jprod_cadet_arag_100',
 'jprod_cadet_calc_100',
 'jprod_don_nmdz_100',
 'jprod_don_nsmz_100',
 'jprod_lithdet_100',
 'jprod_mesozoo_200',
 'jprod_nbact_100',
 'jprod_ndet_hp_100',
 'jprod_ndet_nlgz_100',
 'jprod_ndet_nmdz_100',
 'jprod_ndi_100',
 'jprod_ndi_n2_100',
 'jprod_ndi_new_100',
 'jprod_nlgp_100',
 'jprod_nlgp_new_100',
 'jprod_nlgz_100',
 'jprod_nmdz_100',
 'jprod_nsmp_100',
 'jprod_nsmp_new_100',
 'jprod_nsmz_100',
 'jprod_sidet_100',
 'jremin_ndet_100',
 'jremin_n_hp_100',
 'jremin_n_nbact_100',
 'jremin_n_nlgz_100',
 'jremin_n_nmdz_100',
 'jremin_n_nsmz_100',
 'juptake_ldon_nbact_100',
 'jvirloss_nbact_100',
 'jvirloss_nsmp_100',
 'jzloss_nbact_100',
 'jzloss_ndi_100',
 'jzloss_nlgp_100',
 'jzloss_nmdz_100',
 'jzloss_nsmp_100',
 'jzlo

### SPECIFY

In [63]:
ppname = 'ocean_cobalt_fluxes_int'
out = 'ts'
add = ['wc_vert_int_jo2resp'] # type : list; can specify multiple variables if same ppname
time = '*'

In [64]:
filename = '.'.join([ppname,out,'zarr'])

In [65]:
# Check that requested variables are in ppname
available = gu.core.get_varnames(pp,ppname)
for a in add:
    if a not in available:
        raise Error(a + " not in " + ppname)

In [66]:
if os.path.exists(outdir):
    print("Directory exists : "+outdir)
else:
    print("Creating directory : "+outdir)
    os.mkdir(outdir)

outpath = outdir+filename
if os.path.exists(outpath):
    print("zarr store exists : "+outpath)
    print("Variables will be appended.")
else:
    print("zarr store will be created : "+outpath)

Directory exists : /work/gam/zarr/ESM4_historical_D1/
zarr store will be created : /work/gam/zarr/ESM4_historical_D1/ocean_cobalt_fluxes_int.ts.zarr


### DMGET

In [67]:
local = gu.core.get_local(pp,ppname,out)

In [68]:
if len(add)==1:
    path = gu.core.get_pathspp(pp,ppname,out,local,time,add[0])
    gu.core.issue_dmget(path)
else:
    for a in add:
        path = gu.core.get_pathspp(pp,ppname,out,local,time,a)
        gu.core.issue_dmget(path)

In [70]:
gu.core.query_dmget()

1

In [71]:
# Snippet of code that will only complete when out of queue
count = 0
while gu.core.query_dmget()==1:
    count+=1
    if count%100==0:
        print("Still in queue at :")
        print(datetime.datetime.now())

Still in queue at :
2022-11-09 18:21:20.942945
Still in queue at :
2022-11-09 18:21:29.664488
Still in queue at :
2022-11-09 18:21:41.331172
Still in queue at :
2022-11-09 18:21:50.251116
Still in queue at :
2022-11-09 18:21:56.437761
Still in queue at :
2022-11-09 18:22:02.204122
Still in queue at :
2022-11-09 18:22:10.382872
Still in queue at :
2022-11-09 18:22:17.900335
Still in queue at :
2022-11-09 18:22:24.051863
Still in queue at :
2022-11-09 18:22:30.290704
Still in queue at :
2022-11-09 18:22:36.417242
Still in queue at :
2022-11-09 18:22:42.447971
Still in queue at :
2022-11-09 18:22:48.149388
Still in queue at :
2022-11-09 18:22:53.786368
Still in queue at :
2022-11-09 18:23:00.093355
Still in queue at :
2022-11-09 18:23:06.883456
Still in queue at :
2022-11-09 18:23:13.359307
Still in queue at :
2022-11-09 18:23:19.490534
Still in queue at :
2022-11-09 18:23:28.384595
Still in queue at :
2022-11-09 18:23:36.583415
Still in queue at :
2022-11-09 18:23:45.916116
Still in queu

### LOAD AND SAVE

In [72]:
ds = xr.Dataset()
if len(add)==1:
    ds = gu.core.open_frompp(pp,ppname,out,local,time,add[0])
else:
    for a in add:
        ds[a]=gu.core.open_frompp(pp,ppname,out,local,time,a)[a]

In [73]:
ds

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,timedelta64[ns],numpy.ndarray
"Array Chunk Bytes 15.47 kiB 480 B Shape (1980,) (60,) Count 99 Tasks 33 Chunks Type timedelta64[ns] numpy.ndarray",1980  1,

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,timedelta64[ns],numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 15.47 kiB 480 B Shape (1980,) (60,) Count 99 Tasks 33 Chunks Type object numpy.ndarray",1980  1,

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 15.47 kiB 480 B Shape (1980,) (60,) Count 99 Tasks 33 Chunks Type object numpy.ndarray",1980  1,

Unnamed: 0,Array,Chunk
Bytes,15.47 kiB,480 B
Shape,"(1980,)","(60,)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,0.94 kiB
Shape,"(1980, 2)","(60, 2)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 30.94 kiB 0.94 kiB Shape (1980, 2) (60, 2) Count 99 Tasks 33 Chunks Type object numpy.ndarray",2  1980,

Unnamed: 0,Array,Chunk
Bytes,30.94 kiB,0.94 kiB
Shape,"(1980, 2)","(60, 2)"
Count,99 Tasks,33 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.06 GiB,94.92 MiB
Shape,"(1980, 576, 720)","(60, 576, 720)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 3.06 GiB 94.92 MiB Shape (1980, 576, 720) (60, 576, 720) Count 99 Tasks 33 Chunks Type float32 numpy.ndarray",720  576  1980,

Unnamed: 0,Array,Chunk
Bytes,3.06 GiB,94.92 MiB
Shape,"(1980, 576, 720)","(60, 576, 720)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray


In [74]:
with ProgressBar():
    ds.to_zarr(outpath,mode='a')

[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed | 10.0s
