In [1]:
%load_ext autoreload
%autoreload 2

In [88]:
import xarray as xr
import gfdl_utils as gu
from dask.diagnostics import ProgressBar
import os
import datetime

## Save zarr stores
Chug certain variables into a zarr store in `/work` for later use.

Notebook is separated into four sections:

**SEARCH**
  - Various functions to find specific files or variables
  - Most are dependent only on the `pp` directory
**SPECIFY**
  - Give specifics of file(s) to be loaded and saved
      - `ppname` (subfolder of `pp`)
      - `out` (averaging of output: `av` or `ts`)
      - `time` (timespan to be used, usually `*` for all time)
      - `add` (name of variable (when `out==ts`) or `*` for `out==av`)
          - `add` can be a list of severable variables, so long as they are all present in `ppname`
  - Define filename for zarr store and check if it exists
      - filename : ppname.out.zarr
**DMGET**
  - Retrieve specified file(s) from tape
**LOAD AND SAVE**
  - Load files into `ds` (loop through variables if several specified in `add`)
  - Save to zarr store, appending to store if it already exists

In [98]:
config_id = 'ESM4_historical_D1'
pp = '/archive/oar.gfdl.cmip6/ESM4/DECK/'+config_id+'/gfdl.ncrc4-intel16-prod-openmp/pp'
# pp = '/archive/Raphael.Dussin/FMS2019.01.03_devgfdl_20210706/CM4_piControl_c192_OM4p125_v7/gfdl.ncrc4-intel18-prod-openmp/pp'

outdir = '/work/gam/zarr/'+config_id+'/'

### SEARCH

#### List pp names

In [8]:
gu.core.get_ppnames(pp)

['.dec',
 '.checkpoint',
 'ocean_daily_cmip',
 'ocean_daily_gfdl',
 'ocean_daily_1x1deg',
 'ocean_monthly',
 'ocean_monthly_1x1deg',
 'ocean_month_rho2',
 'ocean_month_rho2_1x1deg',
 'ocean_monthly_z',
 'ocean_monthly_z_1x1deg',
 'ocean_annual',
 'ocean_annual_1x1deg',
 'ocean_annual_rho2',
 'ocean_annual_rho2_1x1deg',
 'ocean_annual_z',
 'ocean_annual_z_1x1deg',
 'ocean_scalar_monthly',
 'ocean_scalar_annual',
 'ocean_Barents_opening',
 'ocean_Bering_Strait',
 'ocean_Davis_Strait',
 'ocean_Windward_Passage',
 'ocean_Denmark_Strait',
 'ocean_Drake_Passage',
 'ocean_English_Channel',
 'ocean_Faroe_Scotland',
 'ocean_Florida_Bahamas',
 'ocean_Fram_Strait',
 'ocean_Iceland_Faroe_V',
 'ocean_Iceland_Faroe_U',
 'ocean_Indonesian_Throughflow',
 'ocean_Mozambique_Channel',
 'ocean_Pacific_undercurrent',
 'ocean_Taiwan_Luzon',
 'ocean_Agulhas_section',
 'ocean_Gibraltar_Strait',
 'ocean_Iceland_Norway',
 'ocean_month_refined',
 'ocean_month_z_refined',
 'ocean_month_rho2_refined',
 'atmos_cmip

#### List all available variables

In [97]:
gu.core.get_allvars(pp)

{'ocean_daily': ['tos_max',
  'omldamax',
  'sos',
  'tos_min',
  'ssu',
  'tos',
  'ssv',
  'ustar',
  'zos_max',
  'zos_min',
  'zos',
  'zossq'],
 'ocean_daily_d2': ['zos_min',
  'mlotst',
  'zos_max',
  'omldamax',
  'tos_max',
  'sos',
  'tos_min',
  'ssu',
  'tos',
  'ssv',
  'zos',
  'zossq'],
 'ocean_daily_1x1deg': ['tos_min',
  'omldamax',
  'sos',
  'tos',
  'tos_max',
  'ustar',
  'zos_max',
  'zos_min',
  'zos',
  'zossq'],
 'ocean_monthly': ['hfsnthermds',
  'col_height',
  'mass_wt',
  'col_mass',
  'MLD_003_max',
  'ePBL_h_ML',
  'MLD_003_min',
  'evs',
  'MLD_003',
  'ficeberg',
  'MLD_EN1_max',
  'friver',
  'MLD_EN1_min',
  'fsitherm',
  'hfsso',
  'heat_content_cond',
  'hfrunoffds',
  'heat_content_fprec',
  'heat_content_massin',
  'hf_rvxu_2d',
  'heat_content_massout',
  'hf_rvxv_2d',
  'heat_content_surfwater',
  'hfsifrazil',
  'heat_content_vprec',
  'MLD_EN1',
  'Heat_PmE',
  'MLD_EN2',
  'hf_CAu_2d',
  'MLD_EN3',
  'hf_CAv_2d',
  'hf_u_BT_accel_2d',
  'hf_di

#### Find ppname for specific variable

In [114]:
gu.core.find_variable(pp,'fgo2')

['ocean_cobalt_omip_2d', 'ocean_cobalt_omip_2d_1x1deg']

#### Display variables in specific ppname

In [115]:
gu.core.get_varnames(pp,'ocean_cobalt_omip_2d_1x1deg')

['dpco2',
 'dpo2',
 'eparag100',
 'epc100',
 'epcalc100',
 'epfe100',
 'epn100',
 'fric',
 'frn',
 'froc',
 'fsfe',
 'fsn',
 'icfriver',
 'intdic',
 'intdoc',
 'intparag',
 'intpbfe',
 'intpbn',
 'intpbp',
 'intpbsi',
 'intpcalcite',
 'intpn2',
 'intpoc',
 'intppdiat',
 'intppdiaz',
 'intppmisc',
 'intpp',
 'intppnitrate',
 'intpppico',
 'limfediat',
 'limfediaz',
 'limfemisc',
 'limfepico',
 'limirrdiat',
 'limirrdiaz',
 'limirrmisc',
 'limirrpico',
 'limndiat',
 'limnmisc',
 'limnpico',
 'limpdiat',
 'limpdiaz',
 'limpmisc',
 'limppico',
 'ocfriver',
 'spco2',
 'epp100',
 'epsi100',
 'fgco2',
 'fgo2',
 'frfe',
 'intpbp_orig',
 'intdoc_orig']

### SPECIFY

In [116]:
ppname = 'ocean_cobalt_omip_2d_1x1deg'
out = 'ts'
add = ['fgo2','fgco2'] # type : list; can specify multiple variables if same ppname
time = '*'

In [117]:
filename = '.'.join([ppname,out,'zarr'])

In [118]:
if os.path.exists(outdir):
    print("Directory exists : "+outdir)
else:
    print("Creating directory : "+outdir)
    os.mkdir(outdir)

outpath = outdir+filename
if os.path.exists(outpath):
    print("zarr store exists : "+outpath)
    print("Variables will be appended.")
else:
    print("zarr store will be created : "+outpath)

Directory exists : /work/gam/zarr/ESM4_historical_D1/
zarr store will be created : /work/gam/zarr/ESM4_historical_D1/ocean_cobalt_omip_2d_1x1deg.ts.zarr


### DMGET

In [119]:
local = gu.core.get_local(pp,ppname,out)

In [120]:
if len(add)==1:
    path = gu.core.get_pathspp(pp,ppname,out,local,time,add[0])
    gu.core.issue_dmget(path)
else:
    for a in add:
        path = gu.core.get_pathspp(pp,ppname,out,local,time,a)
        gu.core.issue_dmget(path)

In [122]:
gu.core.query_dmget()

1

In [123]:
# Snippet of code that will only complete when out of queue
count = 0
while gu.core.query_dmget()==1:
    count+=1
    if count%100==0:
        print("Still in queue at :")
        print(datetime.datetime.now())

Still in queue at :
2022-11-09 14:35:59.417755
Still in queue at :
2022-11-09 14:36:04.370845
Still in queue at :
2022-11-09 14:36:09.550816
Still in queue at :
2022-11-09 14:36:14.833547
Still in queue at :
2022-11-09 14:36:20.399274
Still in queue at :
2022-11-09 14:36:25.917058
Still in queue at :
2022-11-09 14:36:30.960717
Still in queue at :
2022-11-09 14:36:36.056330
Still in queue at :
2022-11-09 14:36:41.178367
Still in queue at :
2022-11-09 14:36:46.834446
Still in queue at :
2022-11-09 14:36:52.624244
Still in queue at :
2022-11-09 14:36:57.834569
Still in queue at :
2022-11-09 14:37:02.705563
Still in queue at :
2022-11-09 14:37:07.700346
Still in queue at :
2022-11-09 14:37:12.831956
Still in queue at :
2022-11-09 14:37:18.109894
Still in queue at :
2022-11-09 14:37:23.442243
Still in queue at :
2022-11-09 14:37:28.675996
Still in queue at :
2022-11-09 14:37:33.568991
Still in queue at :
2022-11-09 14:37:38.712073
Still in queue at :
2022-11-09 14:37:43.712050
Still in queu

### LOAD AND SAVE

In [124]:
ds = xr.Dataset()
if len(add)==1:
    ds = gu.core.open_frompp(pp,ppname,out,local,time,add[0])
else:
    for a in add:
        ds[a]=gu.core.open_frompp(pp,ppname,out,local,time,a)[a]

In [125]:
ds

Unnamed: 0,Array,Chunk
Bytes,489.44 MiB,14.83 MiB
Shape,"(1980, 180, 360)","(60, 180, 360)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 489.44 MiB 14.83 MiB Shape (1980, 180, 360) (60, 180, 360) Count 99 Tasks 33 Chunks Type float32 numpy.ndarray",360  180  1980,

Unnamed: 0,Array,Chunk
Bytes,489.44 MiB,14.83 MiB
Shape,"(1980, 180, 360)","(60, 180, 360)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,489.44 MiB,14.83 MiB
Shape,"(1980, 180, 360)","(60, 180, 360)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 489.44 MiB 14.83 MiB Shape (1980, 180, 360) (60, 180, 360) Count 99 Tasks 33 Chunks Type float32 numpy.ndarray",360  180  1980,

Unnamed: 0,Array,Chunk
Bytes,489.44 MiB,14.83 MiB
Shape,"(1980, 180, 360)","(60, 180, 360)"
Count,99 Tasks,33 Chunks
Type,float32,numpy.ndarray


In [126]:
with ProgressBar():
    ds.to_zarr(outpath,mode='a')

[########################################] | 100% Completed |  2.9s
