# Prep CESM-DPLE experiment data for use in DPLE processing notebook
### • eliminates depth dimension

In [1]:
import os
from glob import glob

import matplotlib.colors as colors

import numpy as np
import xarray as xr
import pop_tools
import utils

import warnings
warnings.filterwarnings('ignore')

## Spin up a dask cluster

In [2]:
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=1,
        memory='50GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=50GB',
        project='NCGD0011',
        walltime='02:00:00',
        interface='ib0',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
    })
    client = Client(cluster)
    return cluster, client

In [3]:
cluster, client = get_ClusterClient()
cluster.scale(12) 

In [4]:
client

0,1
Client  Scheduler: tcp://10.12.206.51:34154  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/46631/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [5]:
grid = pop_tools.get_grid('POP_gx1v6')
KMT = grid.KMT

In [6]:
def field_at_bottom(da):
    """return a field indexed at the model's bottom layer"""

    tmp_bot = xr.DataArray(np.ones(da[:, 0, :, :].shape) * np.nan, 
                           dims=tuple(da.dims[i] for i in [0, 2, 3]),
                           coords={c: da.coords[c] for c in ['time']},
                          )

    assert KMT.shape == da.shape[-2:]
    
    for j in range(len(da.nlat)):
        for i in range(len(da.nlon)):
            if KMT[j, i] > 0:
                k = int(KMT[j, i] - 1)
                tmp_bot.values[:, j, i] = da[:, k, j, i]
                
    return tmp_bot

## cycle through start years (1954 - 2017; n=64) and ensemble members (n=40) and do preprocessing

In [None]:
#user set/ cycle vars
var = 'TEMP'

for styear in np.arange(1957,1961,1):
    
    for member in np.arange(1,41,1):
    
        print('start year=', styear, 'member=', member)
        
        #first get this particular file
        ds = xr.open_dataset(
        utils.file_in(var,styear,member), 
        decode_coords=False, 
        decode_times=True,
        chunks={'time': 12}).persist()

        #tidy up the dataset
        coords = {'x':'TLONG','y':'TLAT'}
        keepthese=['z_t','z_t_150m','time_bound','TAREA','REGION_MASK','time','UAREA'] + [var]
        keep_vars = keepthese +list(coords.values())+['dz','KMT']
        ds = ds.drop([v for v in ds.variables if v not in keep_vars])

        #prepare output file -- it needs certain variables add to work with the DPLE code
        USER = os.environ['USER']
        dout = f'/glade/scratch/{USER}/DPLE-tmpfiles/'
        os.makedirs(dout, exist_ok=True)
        file_out = utils.file_out(dout,var,styear,member)
        dso = ds[['TLONG', 'TLAT', 'KMT', 'TAREA','time','time_bound','dz']] #added time

        #get the KMT (even though this could be )
        template = ds[var][:, 0, :, :].drop('z_t') 
        dso[f'{var}_bottom'] = xr.map_blocks(
        field_at_bottom, ds[var],
        template=template).compute()

        #fix attributes
        dso.TEMP_bottom.attrs['coordinates'] = "TLONG TLAT time"

        #write out the file
        dso.to_netcdf(file_out, mode='w')

start year= 1957 member= 1
start year= 1957 member= 2
start year= 1957 member= 3
start year= 1957 member= 4


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-32a0e81c4e0664a1bc98856cd9c4bf9fDYT-636c1d34a85b2f74cba47b1a47880248', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 1957 member= 5
start year= 1957 member= 6
start year= 1957 member= 7
start year= 1957 member= 8
start year= 1957 member= 9


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-36ffc5fd8a2a0b171179665eb2282c44DYU-58c0bc4a5f8b2a62f34886c6a7986b17', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 1957 member= 10
start year= 1957 member= 11
start year= 1957 member= 12


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-81a4dc30b45143944f146eb736623270HUW-d90dfdef0bf1243e1f7046540c7bc03f', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 1957 member= 13
start year= 1957 member= 14
start year= 1957 member= 15
start year= 1957 member= 16
