# check output files with dask

In [1]:
import numpy as np
import numpy.matlib
import numpy.ma as ma

import xarray as xr
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) #create time coder with cftime

import time
import cftime
import netCDF4 as nc
from datetime import timedelta

import pandas as pd

import glob



In [2]:
# load custom functions for analyzing flat10

from loading_function_flat10 import load_flat10, load_grid

In [3]:
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

################
##### Dask #####
################

def get_ClusterClient(
        ncores=1,
        nmem='4GB',
        walltime='01:00:00',
        account='UWAS0155'):
    """
    Code from Daniel Kennedy
    More info about Dask on HPC - https://ncar.github.io/dask-tutorial/notebooks/05-dask-hpc.html
    """
    cluster = PBSCluster(
        cores=ncores,              # The number of cores you want
        memory=nmem,               # Amount of memory
        processes=ncores,          # How many processes
        queue='casper',            # Queue name
        resource_spec='select=1:ncpus=' +\
        str(ncores)+':mem='+nmem,  # Specify resources
        account=account,           # Input your project ID here
        walltime=walltime,         # Amount of wall time
        interface='ext',           # Interface to use
    )

    client = Client(cluster)
    return cluster, client

In [4]:
data_dict={}

# Zonal correction for NorESM

In [5]:
outputdir= '/glade/campaign/cgd/tss/people/aswann/flat10/'

# modellist_orig= ['ACCESS-ESM1-5',  
#             'CESM2',    
#             'GFDL-ESM4',  
#             'GISS_E2.1',  
#             'NorESM2-LM',
#             'MPI-ESM1-2-LR',
#             'CNRM-ESM2-1',
#             'HadCM3LC-Bris']
modellist=['NorESM2-LM']

runlist = ['flat10','flat10_zec','flat10_cdr']
#runlist = ['flat10-cdr']
# use a wildcard to capture different ways the folders and runs are named across models
runlist_wc = ['*lat10','*zec','*cdr']
#runlist_wc = ['*cdr']

varlist_load=['cVeg','cSoil','cLitter','nbp','gpp','rh'] #, 'gpp','fgco2', 'ra', 'rh']#, 'npp'] # not working beyond nbp for norESM
varlist_analyze=['cVeg','cSoil','cTot','cLitter','nbp','gpp','rh']
varlist=varlist_load
#varlist=['rh']

In [6]:
## Create Dask cluster and client, scale up to 20 workers
cluster, client = get_ClusterClient(walltime='02:00:00')
cluster.scale(20)
client.wait_for_workers(20)

## Lists active workers and their status
cluster.workers

{'PBSCluster-7': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-5': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-4': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-10': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-3': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-9': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-14': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-17': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-0': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-16': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-6': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-8': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-2': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-19': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-15': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-1': <dask_jobqueue.pbs.PBSJob: status=running>,
 'PBSCluster-18': 

In [7]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/8787/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/8787/status,Workers: 20
Total threads: 20,Total memory: 74.60 GiB

0,1
Comm: tcp://128.117.208.93:37661,Workers: 20
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/8787/status,Total threads: 20
Started: Just now,Total memory: 74.60 GiB

0,1
Comm: tcp://128.117.208.175:45065,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/38121/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.175:33987,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-6csl3xt0,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-6csl3xt0
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 78.11 MiB,Spilled bytes: 0 B
Read bytes: 194.88 MiB,Write bytes: 1.14 MiB

0,1
Comm: tcp://128.117.208.182:42207,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/44443/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.182:40801,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-8jbnt0d6,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-8jbnt0d6
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.14 MiB,Spilled bytes: 0 B
Read bytes: 47.50 MiB,Write bytes: 1.74 MiB

0,1
Comm: tcp://128.117.208.173:37599,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/34281/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:42451,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5ottp8n3,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5ottp8n3
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.89 MiB,Spilled bytes: 0 B
Read bytes: 4.21 GiB,Write bytes: 17.28 MiB

0,1
Comm: tcp://128.117.208.183:35475,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/43503/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.183:41869,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-88wmbi_y,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-88wmbi_y
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 78.19 MiB,Spilled bytes: 0 B
Read bytes: 65.55 MiB,Write bytes: 158.50 kiB

0,1
Comm: tcp://128.117.208.184:42673,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/44397/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.184:46553,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-rp6ihh9p,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-rp6ihh9p
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 79.29 MiB,Spilled bytes: 0 B
Read bytes: 22.34 MiB,Write bytes: 106.38 kiB

0,1
Comm: tcp://128.117.208.182:43659,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/41577/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.182:41605,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-u97k2oqo,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-u97k2oqo
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.18 MiB,Spilled bytes: 0 B
Read bytes: 18.88 MiB,Write bytes: 16.13 MiB

0,1
Comm: tcp://128.117.208.173:34553,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/34457/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:40539,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-kemkc4cg,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-kemkc4cg
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.89 MiB,Spilled bytes: 0 B
Read bytes: 8.37 MiB,Write bytes: 6.02 MiB

0,1
Comm: tcp://128.117.208.182:43681,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/36465/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.182:44563,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-1hqujno8,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-1hqujno8
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.14 MiB,Spilled bytes: 0 B
Read bytes: 25.23 MiB,Write bytes: 1.74 MiB

0,1
Comm: tcp://128.117.208.176:35553,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/37491/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.176:44665,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5dwgec0f,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5dwgec0f
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 4.0%,Last seen: Just now
Memory usage: 78.12 MiB,Spilled bytes: 0 B
Read bytes: 122.20 MiB,Write bytes: 70.22 kiB

0,1
Comm: tcp://128.117.208.174:44435,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/45945/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.174:44267,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-c36k4a_3,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-c36k4a_3
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.13 MiB,Spilled bytes: 0 B
Read bytes: 1.23 MiB,Write bytes: 65.34 kiB

0,1
Comm: tcp://128.117.208.182:35273,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/40295/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.182:39053,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-lcarhd1p,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-lcarhd1p
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.16 MiB,Spilled bytes: 0 B
Read bytes: 48.19 MiB,Write bytes: 1.75 MiB

0,1
Comm: tcp://128.117.208.181:40919,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/46125/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.181:45011,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-ejxq5_3d,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-ejxq5_3d
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.10 MiB,Spilled bytes: 0 B
Read bytes: 220.60 MiB,Write bytes: 80.74 MiB

0,1
Comm: tcp://128.117.208.181:35929,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/38037/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.181:43861,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5xjvedem,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-5xjvedem
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.14 MiB,Spilled bytes: 0 B
Read bytes: 220.74 MiB,Write bytes: 80.79 MiB

0,1
Comm: tcp://128.117.208.173:44167,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/38459/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:39323,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-rlry2_of,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-rlry2_of
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.98 MiB,Spilled bytes: 0 B
Read bytes: 7.99 GiB,Write bytes: 16.16 MiB

0,1
Comm: tcp://128.117.208.173:33331,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/36391/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:36055,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-w9xgz7dz,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-w9xgz7dz
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.99 MiB,Spilled bytes: 0 B
Read bytes: 4.67 GiB,Write bytes: 9.56 MiB

0,1
Comm: tcp://128.117.208.173:34431,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/45943/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:40683,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-lvf08jhs,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-lvf08jhs
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.89 MiB,Spilled bytes: 0 B
Read bytes: 19.67 MiB,Write bytes: 15.77 MiB

0,1
Comm: tcp://128.117.208.179:40529,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/42157/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.179:35067,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-3yfepeqr,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-3yfepeqr
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.14 MiB,Spilled bytes: 0 B
Read bytes: 432.16 MiB,Write bytes: 535.02 kiB

0,1
Comm: tcp://128.117.208.173:38121,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/42659/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:46591,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-8ydurcns,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-8ydurcns
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.99 MiB,Spilled bytes: 0 B
Read bytes: 16.79 MiB,Write bytes: 16.08 MiB

0,1
Comm: tcp://128.117.208.181:43943,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/39433/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.181:36219,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-24nzm4zu,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-24nzm4zu
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 2.0%,Last seen: Just now
Memory usage: 78.08 MiB,Spilled bytes: 0 B
Read bytes: 221.39 MiB,Write bytes: 81.03 MiB

0,1
Comm: tcp://128.117.208.173:39853,Total threads: 1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/aswann/proxy/36361/status,Memory: 3.73 GiB
Nanny: tcp://128.117.208.173:35817,
Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-1s4aq4i4,Local directory: /glade/derecho/scratch/aswann/tmp/dask-scratch-space/worker-1s4aq4i4
Tasks executing:,Tasks in memory:
Tasks ready:,Tasks in flight:
CPU usage: 0.0%,Last seen: Just now
Memory usage: 57.93 MiB,Spilled bytes: 0 B
Read bytes: 26.50 MiB,Write bytes: 4.22 MiB


In [9]:
# load grid
data_dict = load_grid(data_dict,modellist)

NorESM2-LM getting grid info


In [9]:
# load all data
##data_dict=load_flat10(data_dict, modellist, runlist, runlist_wc, varlist)

In [None]:
import numpy as np
import numpy.matlib
import numpy.ma as ma

import xarray as xr
#xr.set_options(enable_cftimeindex=True)
#from xarray.coding.times import CFTimedeltaCoder
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) #create time coder with cftime

import time
import cftime
import netCDF4 as nc
from datetime import timedelta

import pandas as pd

import glob


## notes on packages to add to this kernel
import nc_time_axis





# data location
outputdir= '/glade/campaign/cgd/tss/people/aswann/flat10/'

#----loop over models----#
for m in range(len(modellist)):
#for m in range(len(['GFDL-ESM4',  'GISS_E2.1',  'NorESM2-LM','MPI-ESM1-2-LR'])):
    model=modellist[m]
    print('loading model: ' +model)
    #----loop over experiments----# 
    for r in range(len(runlist)):
        run = runlist_wc[r]
        print('loading run: ' +run)
        #----loop over variables----#
        for v in range(len(varlist)):
            var=varlist[v]
            print('loading variable: ' +var)
            
            searchpath= outputdir +model +'/' +run +'/*' +var +'_*.nc'
            
            filenamelist= np.sort(glob.glob(searchpath)) # sort in time order, xarray was having trouble arranging some of them in time dim

            #----loop over filenames----#
            # some variables are stored in multiple files
            # this should be possible with xr.open_mfdataset but it isn't loading all of time points
            for f in range(len(filenamelist)):
                file = filenamelist[f]
                if f==0:
                    dsmerge_f = xr.open_dataset(file,decode_times=time_coder)
                else:
                    ds = xr.open_dataset(file,decode_times=time_coder)
                    dsmerge_f=xr.concat([dsmerge_f,ds],dim='time')

            
            if model == 'NorESM2-LM':
                if 'PRECC' in dsmerge_f: #NorESM
                    dsmerge_f['pr']=dsmerge_f['PRECC']
                    if dsmerge_f['pr'].units == 'm/s':
                        dsmerge_f['pr']=dsmerge_f['pr']*(1e3)
                        dsmerge_f['pr'].attrs['units'] = 'kg m-2 s-1' #equivalent is mm/s
                



            
            #----check units and convert if necessary----#
            if var in dsmerge_f: 
                if model =='CESM2':
                    if dsmerge_f[var].units == 'gC/m^2/s':
                        dsmerge_f[var]=dsmerge_f[var]*(1/1000) # convert from gC to kgC
                        dsmerge_f[var].attrs['units'] = 'kg m-2 s-1'
                    # stock variables

                
            else: #var does not exist
                ds=dsmerge_f
                # add a blank variable so that loops work
                if 'time' in ds:
                    nan_dataarray = xr.DataArray(np.full((len(ds['time']),len(ds['lat']), len(ds['lon'])), np.nan), 
                                                 coords={'lon': ds['lon'], 'lat': ds['lat'],'time': ds['time']}, dims=['time','lat', 'lon'])

                # Assign the new variable to the dataset
                dsmerge_f[var] = nan_dataarray
            
            #----merge all variables into one dataset----#
            # if it's the first variable, then start a new datset, otherwise merge with existing
            if v ==0:
                dsmerge_v = dsmerge_f.copy()
            else:
                dsmerge_v=xr.merge([dsmerge_v, dsmerge_f],compat='override')

            # add a new variable that is the sum of all carbon pools
            if all(var_name in dsmerge_v for var_name in ['cVeg', 'cSoil', 'cLitter']):
                if (dsmerge_v['cLitter'].notnull().all()): #litter is sometimes missing. Would be good to make this more general but dealing with this problem for now.
                    dsmerge_v['cTot'] = dsmerge_v['cVeg']+dsmerge_v['cSoil']+dsmerge_v['cLitter'] 
                else: 
                    dsmerge_v['cTot'] = dsmerge_v['cVeg']+dsmerge_v['cSoil'] 
        
        #----save output to a dictionary----#
        print('adding ' +model +' ' +runlist[r] +' to dict')
        data_dict[model +'_' +runlist[r]] = dsmerge_v





loading model: NorESM2-LM
loading run: *lat10
loading variable: cVeg
loading variable: cSoil
loading variable: cLitter
loading variable: nbp
loading variable: gpp
loading variable: rh
adding NorESM2-LM flat10 to dict
loading run: *zec
loading variable: cVeg


In [None]:
## Once done, shut down the Dask cluster
client.shutdown()

In [1]:
!rm ./dask-worker.e*
!rm ./dask-worker.o*