In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import xarray as xr
import gfdl_utils as gu
from dask.diagnostics import ProgressBar
import os
import datetime

## Save zarr stores
Chug certain variables into a zarr store in `/work` for later use.

Notebook is separated into four sections that should be considered quasi-independent:
**SEARCH**
  - Various functions to find specific files or variables
  - Most are dependent only on the pp directory
**SPECIFY**
  - Give specifics of file(s) to be loaded and saved
      - `ppname` (subfolder of `pp`)
      - `out` (averaging of output: `av` or `ts`)
      - `time` (timespan to be used, usually `*` for all time)
      - `add` (name of variable (when `out==ts`) or `*` for `out==av`)
          - `add` can be a list of severable variables, so long as they are all present in `ppname`
  - Define filename for zarr store and check if it exists
      - filename : ppname.out.zarr
**DMGET**
  - Retrieve specified file(s) from tape
**LOAD AND SAVE**
  - Load files into `ds` (loop through variables if several specified in `add`)
  - Save to zarr store, appending to store if it already exists

In [5]:
config_id = 'ESM4_1pctCO2_D1'
pp = '/archive/oar.gfdl.cmip6/ESM4/DECK/'+config_id+'/gfdl.ncrc4-intel16-prod-openmp/pp/'
# pp = '/archive/oar.gfdl.cmip6/CM4/warsaw_201710_om4_v1.0.1/'+config_id+'/gfdl.ncrc4-intel16-prod-openmp/pp/'
# pp = '/archive/oar.gfdl.cmip6/ESM4/DECK/'+config_id+'/gfdl.ncrc4-intel16-prod-openmp/pp'
# pp = '/archive/Raphael.Dussin/FMS2019.01.03_devgfdl_20210706/CM4_piControl_c192_OM4p125_v7/gfdl.ncrc4-intel18-prod-openmp/pp'

outdir = '/work/gam/zarr/'+config_id+'/'

### SEARCH

#### List pp names

In [6]:
gu.core.get_ppnames(pp)

['.dec',
 '.checkpoint',
 'ocean_daily_cmip',
 'ocean_daily_gfdl',
 'ocean_daily_1x1deg',
 'ocean_monthly',
 'ocean_monthly_1x1deg',
 'ocean_month_rho2',
 'ocean_month_rho2_1x1deg',
 'ocean_monthly_z',
 'ocean_monthly_z_1x1deg',
 'ocean_annual',
 'ocean_annual_1x1deg',
 'ocean_annual_rho2',
 'ocean_annual_rho2_1x1deg',
 'ocean_annual_z',
 'ocean_annual_z_1x1deg',
 'ocean_scalar_monthly',
 'ocean_scalar_annual',
 'ocean_Barents_opening',
 'ocean_Bering_Strait',
 'ocean_Davis_Strait',
 'ocean_Windward_Passage',
 'ocean_Denmark_Strait',
 'ocean_Drake_Passage',
 'ocean_English_Channel',
 'ocean_Faroe_Scotland',
 'ocean_Florida_Bahamas',
 'ocean_Fram_Strait',
 'ocean_Iceland_Faroe_V',
 'ocean_Iceland_Faroe_U',
 'ocean_Indonesian_Throughflow',
 'ocean_Mozambique_Channel',
 'ocean_Pacific_undercurrent',
 'ocean_Taiwan_Luzon',
 'ocean_Agulhas_section',
 'ocean_Gibraltar_Strait',
 'ocean_Iceland_Norway',
 'ocean_month_refined',
 'ocean_month_z_refined',
 'ocean_month_rho2_refined',
 'atmos_cmip

#### List all available variables

In [54]:
gu.core.get_allvars(pp)

{'ocean_daily_cmip': ['sos', 'tos', 'zos'],
 'ocean_daily_gfdl': ['omldamax', 'sossq', 'ssu', 'ssv', 'tossq'],
 'ocean_daily_1x1deg': ['omldamax', 'sos', 'sossq', 'tos', 'tossq', 'zos'],
 'ocean_monthly': ['S_diffx_2d',
  'S_diffy_2d',
  'sfdsi',
  'sob',
  'sos',
  'sossq',
  'T_adx_2d',
  'T_ady_2d',
  'tauuo',
  'tauvo',
  'T_diffx_2d',
  'T_diffy_2d',
  'temp_layer_ave',
  'tob',
  'tos',
  'tossq',
  'udml_restrat',
  'uml_restrat',
  'umo_2d',
  'ustar',
  'vdml_restrat',
  'vml_restrat',
  'vmo_2d',
  'wfo',
  'zos',
  'zossq',
  'friver',
  'heat_content_cond',
  'heat_content_fprec',
  'heat_content_massin',
  'heat_content_massout',
  'evs',
  'ficeberg',
  'heat_content_surfwater',
  'heat_content_vprec',
  'Heat_PmE',
  'hfevapds_old',
  'hfevapds',
  'mlotstmax',
  'mlotstmin',
  'mlotst',
  'mlotstsq',
  'MSTAR',
  'net_heat_coupler',
  'net_massin',
  'net_massout',
  'nonpenSW',
  'pbo',
  'prlq',
  'prsn',
  'pso',
  'rlntds',
  'rsdo',
  'rsntds',
  'S_adx_2d',
  'S_a

#### Find ppname for specific variable

In [18]:
gu.core.find_variable(pp,'thetao')

['ocean_monthly_z',
 'ocean_monthly_z_1x1deg',
 'ocean_annual',
 'ocean_annual_z',
 'ocean_annual_z_1x1deg',
 'ocean_Barents_opening',
 'ocean_Bering_Strait',
 'ocean_Davis_Strait',
 'ocean_Windward_Passage',
 'ocean_Denmark_Strait',
 'ocean_Drake_Passage',
 'ocean_English_Channel',
 'ocean_Faroe_Scotland',
 'ocean_Florida_Bahamas',
 'ocean_Fram_Strait',
 'ocean_Iceland_Faroe_V',
 'ocean_Iceland_Faroe_U',
 'ocean_Indonesian_Throughflow',
 'ocean_Mozambique_Channel',
 'ocean_Pacific_undercurrent',
 'ocean_Taiwan_Luzon',
 'ocean_Agulhas_section',
 'ocean_Gibraltar_Strait',
 'ocean_Iceland_Norway']

#### Display variables in specific ppname

In [7]:
gu.core.get_varnames(pp,'ocean_cobalt_omip_tracers_year_z_1x1deg')

['arag',
 'bacc',
 'bfe',
 'bsi',
 'calc',
 'chldiat',
 'chldiaz',
 'chlmisc',
 'chl',
 'chlpico',
 'co3',
 'co3satarag',
 'co3satcalc',
 'detoc',
 'dfe',
 'dissic',
 'nh4',
 'no3',
 'o2',
 'o2sat',
 'ph',
 'phyc',
 'phydiat',
 'phydiaz',
 'phyfe',
 'phymisc',
 'phyn',
 'phypico',
 'phyp',
 'physi',
 'po4',
 'pon',
 'pop',
 'si',
 'talk',
 'volcello',
 'zmeso',
 'zmicro',
 'zooc',
 'dissoc_orig',
 'dissoc']

### SPECIFY

In [8]:
ppname = 'ocean_cobalt_omip_tracers_year_z_1x1deg'
out = 'ts'
add = ['o2','o2sat'] # type : list; can specify multiple variables if same ppname
time = '*'

In [9]:
filename = '.'.join([ppname,out,'zarr'])

In [10]:
# Check that requested variables are in ppname
available = gu.core.get_varnames(pp,ppname)
for a in add:
    if a not in available:
        raise Error(a + " not in " + ppname)

In [11]:
if os.path.exists(outdir):
    print("Directory exists : "+outdir)
else:
    print("Creating directory : "+outdir)
    os.mkdir(outdir)

outpath = outdir+filename
if os.path.exists(outpath):
    print("zarr store exists : "+outpath)
    print("Variables will be appended.")
else:
    print("zarr store will be created : "+outpath)

Creating directory : /work/gam/zarr/ESM4_1pctCO2_D1/
zarr store will be created : /work/gam/zarr/ESM4_1pctCO2_D1/ocean_cobalt_omip_tracers_year_z_1x1deg.ts.zarr


### DMGET

In [12]:
local = gu.core.get_local(pp,ppname,out)

In [13]:
if len(add)==1:
    path = gu.core.get_pathspp(pp,ppname,out,local,time,add[0])
    gu.core.issue_dmget(path)
else:
    for a in add:
        path = gu.core.get_pathspp(pp,ppname,out,local,time,a)
        gu.core.issue_dmget(path)

In [15]:
gu.core.query_dmget()

1

In [None]:
# Snippet of code that will only complete when out of queue
count = 0
while gu.core.query_dmget()==1:
    count+=1
    if count%100==0:
        print("Still in queue at :")
        print(datetime.datetime.now())

Still in queue at :
2022-12-09 16:15:17.990821
Still in queue at :
2022-12-09 16:15:21.451076
Still in queue at :
2022-12-09 16:15:25.027338
Still in queue at :
2022-12-09 16:15:28.547143
Still in queue at :
2022-12-09 16:15:32.023771
Still in queue at :
2022-12-09 16:15:35.519981
Still in queue at :
2022-12-09 16:15:39.104946
Still in queue at :
2022-12-09 16:15:42.585137
Still in queue at :
2022-12-09 16:15:46.103619
Still in queue at :
2022-12-09 16:15:49.601061
Still in queue at :
2022-12-09 16:15:53.091659
Still in queue at :
2022-12-09 16:15:56.582695
Still in queue at :
2022-12-09 16:16:00.085743
Still in queue at :
2022-12-09 16:16:03.621650
Still in queue at :
2022-12-09 16:16:07.153404
Still in queue at :
2022-12-09 16:16:10.687564
Still in queue at :
2022-12-09 16:16:14.257226
Still in queue at :
2022-12-09 16:16:17.752217
Still in queue at :
2022-12-09 16:16:21.272432
Still in queue at :
2022-12-09 16:16:24.731712
Still in queue at :
2022-12-09 16:16:28.268997
Still in queu

### LOAD AND SAVE

In [None]:
ds = xr.Dataset()
if len(add)==1:
    ds = gu.core.open_frompp(pp,ppname,out,local,time,add[0])
else:
    for a in add:
        ds[a]=gu.core.open_frompp(pp,ppname,out,local,time,a)[a]

In [None]:
ds

In [None]:
with ProgressBar():
    ds.to_zarr(outpath,mode='a')