# Prep CESM-DPLE experiment data for use in DPLE processing notebook
### • eliminates depth dimension

In [1]:
import os
from glob import glob

import matplotlib.colors as colors

import numpy as np
import xarray as xr
import pop_tools
import utils

import warnings
warnings.filterwarnings('ignore')

## Spin up a dask cluster

In [2]:
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=1,
        memory='50GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=50GB',
        project='NCGD0011',
        walltime='08:00:00',
        interface='ib0',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
    })
    client = Client(cluster)
    return cluster, client

In [3]:
cluster, client = get_ClusterClient()
cluster.scale(12) 

In [4]:
client

0,1
Client  Scheduler: tcp://10.12.206.63:45134  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/41751/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [5]:
grid = pop_tools.get_grid('POP_gx1v6')
KMT = grid.KMT

In [6]:
def field_at_bottom(da):
    """return a field indexed at the model's bottom layer"""

    tmp_bot = xr.DataArray(np.ones(da[:, 0, :, :].shape) * np.nan, 
                           dims=tuple(da.dims[i] for i in [0, 2, 3]),
                           coords={c: da.coords[c] for c in ['time']},
                          )

    assert KMT.shape == da.shape[-2:]
    
    for j in range(len(da.nlat)):
        for i in range(len(da.nlon)):
            if KMT[j, i] > 0:
                k = int(KMT[j, i] - 1)
                tmp_bot.values[:, j, i] = da[:, k, j, i]
                
    return tmp_bot

## cycle through start years (1954 - 2017; n=64) and ensemble members (n=40) and do preprocessing

In [7]:
#user set/ cycle vars
var = 'POC_FLUX_IN'

for styear in np.arange(2014,2018,1):
    
    for member in np.arange(1,41,1):
    
        print('start year=', styear, 'member=', member)
        
        #first get this particular file
        ds = xr.open_dataset(
        utils.file_in(var,styear,member), 
        decode_coords=False, 
        decode_times=True,
        chunks={'time': 12}).persist()

        #tidy up the dataset
        coords = {'x':'TLONG','y':'TLAT'}
        keepthese=['z_t','z_t_150m','time_bound','TAREA','REGION_MASK','time','UAREA'] + [var]
        keep_vars = keepthese +list(coords.values())+['dz','KMT']
        ds = ds.drop([v for v in ds.variables if v not in keep_vars])

        #prepare output file -- it needs certain variables add to work with the DPLE code
        USER = os.environ['USER']
        dout = f'/glade/scratch/{USER}/DPLE-tmpfiles/'
        os.makedirs(dout, exist_ok=True)
        file_out = utils.file_out(dout,var,styear,member)
        dso = ds[['TLONG', 'TLAT', 'KMT', 'TAREA','time','time_bound','dz']] 

        #get the KMT (even though this could be )
        template = ds[var][:, 0, :, :].drop('z_t') 
        dso[f'{var}_bottom'] = xr.map_blocks(
        field_at_bottom, ds[var],
        template=template).compute()

        #fix attributes
        dso[f'{var}_bottom'].attrs['coordinates'] = "TLONG TLAT time"

        #write out the file
        dso.to_netcdf(file_out, mode='w')

start year= 2014 member= 1
start year= 2014 member= 2
start year= 2014 member= 3
start year= 2014 member= 4
start year= 2014 member= 5
start year= 2014 member= 6
start year= 2014 member= 7
start year= 2014 member= 8
start year= 2014 member= 9
start year= 2014 member= 10
start year= 2014 member= 11
start year= 2014 member= 12
start year= 2014 member= 13
start year= 2014 member= 14
start year= 2014 member= 15
start year= 2014 member= 16
start year= 2014 member= 17
start year= 2014 member= 18
start year= 2014 member= 19
start year= 2014 member= 20
start year= 2014 member= 21
start year= 2014 member= 22
start year= 2014 member= 23
start year= 2014 member= 24
start year= 2014 member= 25
start year= 2014 member= 26
start year= 2014 member= 27
start year= 2014 member= 28
start year= 2014 member= 29
start year= 2014 member= 30
start year= 2014 member= 31
start year= 2014 member= 32
start year= 2014 member= 33
start year= 2014 member= 34
start year= 2014 member= 35
start year= 2014 member= 36
s

distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-5c98b2bc21c6cbeeef1a1a37d314def8DYT-ea2f854605a3ca946e4bf8609cb84b4a', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2015 member= 10
start year= 2015 member= 11
start year= 2015 member= 12
start year= 2015 member= 13
start year= 2015 member= 14
start year= 2015 member= 15
start year= 2015 member= 16
start year= 2015 member= 17
start year= 2015 member= 18
start year= 2015 member= 19
start year= 2015 member= 20
start year= 2015 member= 21
start year= 2015 member= 22
start year= 2015 member= 23
start year= 2015 member= 24
start year= 2015 member= 25
start year= 2015 member= 26
start year= 2015 member= 27
start year= 2015 member= 28
start year= 2015 member= 29
start year= 2015 member= 30
start year= 2015 member= 31
start year= 2015 member= 32
start year= 2015 member= 33
start year= 2015 member= 34
start year= 2015 member= 35
start year= 2015 member= 36
start year= 2015 member= 37
start year= 2015 member= 38
start year= 2015 member= 39
start year= 2015 member= 40
start year= 2016 member= 1
start year= 2016 member= 2


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-f545cf06f7c06ec7e25c76d9171884beDXU-f47ddf16416a246363230454f36bb16e', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2016 member= 3
start year= 2016 member= 4
start year= 2016 member= 5
start year= 2016 member= 6
start year= 2016 member= 7
start year= 2016 member= 8
start year= 2016 member= 9
start year= 2016 member= 10
start year= 2016 member= 11
start year= 2016 member= 12
start year= 2016 member= 13
start year= 2016 member= 14
start year= 2016 member= 15
start year= 2016 member= 16
start year= 2016 member= 17


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-cdbd57b89373b7c63b1c75f5bb38327bHTE-84c9f65e77555e263bcf546fd17431cf', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2016 member= 18
start year= 2016 member= 19
start year= 2016 member= 20
start year= 2016 member= 21
start year= 2016 member= 22
start year= 2016 member= 23
start year= 2016 member= 24
start year= 2016 member= 25
start year= 2016 member= 26
start year= 2016 member= 27
start year= 2016 member= 28
start year= 2016 member= 29
start year= 2016 member= 30
start year= 2016 member= 31
start year= 2016 member= 32


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-d6fbb603ab9b51126471109eb05a9791HT-d40a1867887c5806c3b9292c76e7e81e', 0, 0)"


start year= 2016 member= 33
start year= 2016 member= 34
start year= 2016 member= 35
start year= 2016 member= 36
start year= 2016 member= 37
start year= 2016 member= 38
start year= 2016 member= 39
start year= 2016 member= 40
start year= 2017 member= 1
start year= 2017 member= 2
start year= 2017 member= 3
start year= 2017 member= 4
start year= 2017 member= 5


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-15b3fcfbf8b116e874e6e4a039d04854HTN-649ff242d9370084d721fdba69b41802', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2017 member= 6


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-168dca023be955755db6108fbae9c78cHUW-1f8de11e448b1c2bd7d362879f4aa312', 0, 0)"


start year= 2017 member= 7
start year= 2017 member= 8
start year= 2017 member= 9
start year= 2017 member= 10
start year= 2017 member= 11
start year= 2017 member= 12
start year= 2017 member= 13
start year= 2017 member= 14
start year= 2017 member= 15
start year= 2017 member= 16
start year= 2017 member= 17
start year= 2017 member= 18
start year= 2017 member= 19
start year= 2017 member= 20
start year= 2017 member= 21


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-26abae0efb26a70c400b3e8bdd94aba6HUW-846dd74dbcf05b20cfbb5c00aabd402c', 0, 0)"


start year= 2017 member= 22
start year= 2017 member= 23
start year= 2017 member= 24
start year= 2017 member= 25
start year= 2017 member= 26
start year= 2017 member= 27
start year= 2017 member= 28
start year= 2017 member= 29
start year= 2017 member= 30
start year= 2017 member= 31


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-390b68544112b02eca14d7200f16a817HT-7f2851557b663d216e30791ee414cb17', 0, 0)"


start year= 2017 member= 32


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-b36742e406ba5f1e7356cf5a09c2fa86DYU-a731ad936dc6b558d92d177886cfcf0b', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2017 member= 33
start year= 2017 member= 34
start year= 2017 member= 35
start year= 2017 member= 36
start year= 2017 member= 37
start year= 2017 member= 38


distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", line 493, in handle_comm
    result = handler(comm, **msg)
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in heartbeat_worker
    self.tasks[key]: duration for key, duration in executing.items()
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/scheduler.py", line 2197, in <dictcomp>
    self.tasks[key]: duration for key, duration in executing.items()
KeyError: "('open_dataset-fee58147e8a4175c594f0fec4203b35fHTN-c97f299b8bcf99848e42bffaa3a96f8a', 0, 0)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/glade/work/kristenk/miniconda/envs/analysis4/lib/python3.7/site-packages/distributed/core.py", lin

start year= 2017 member= 39
start year= 2017 member= 40
