In [120]:
import glob
import math
import os
import requests
import cftime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import dask
from dask.distributed import Client
from dask_jobqueue import PBSCluster

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                                   # The number of cores you want
    memory='25GB',                                             # Amount of memory
    processes=1,                                               # How many processes
    queue='casper',                                            # The type of queue to utilize
    local_directory='/glade/work/afoster',                     # Use your local directory
    resource_spec='select=1:ncpus=1:mem=25GB',                 # Specify resources
    log_directory='/glade/derecho/scratch/afoster/dask_logs',  # log directory
    account='P93300041',                                       # Input your project ID here
    walltime='02:00:00',                                       # Amount of wall time
    interface='ext')                                           # Interface to use

cluster.scale(30)
dask.config.set({
    'distributed.dashboard.link': 'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
})
client = Client(cluster)
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 45201 instead


0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/45201/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/45201/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.89:33567,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/45201/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [134]:
def get_raw_neon_obs(obs_dir, site):
    
    obs_files = sorted(glob.glob(obs_dir + "/" + f"*{site}*.nc"))
    obs = xr.open_mfdataset(obs_files, combine='nested',
                            concat_dim='time', parallel=True, autoclose=True)
    
    sec_per_day = 24*60*60
    gpp_gc_m2_s = obs.GPP*1E-6*12.011
    gpp_dt_conv = obs.GPP_DT*1E-6*12.011
    
    gpp = (gpp_gc_m2_s.resample(time='D').mean())*sec_per_day    # gC/m2/ms to gC/m2/d
    gpp_dt = (gpp_dt_conv.resample(time='D').mean())*sec_per_day # gC/m2/ms to gC/m2/d
    lh = obs.EFLX_LH_TOT.resample(time='D').mean()               # W/m2
    sh = obs.FSH.resample(time='D').mean()                       # W/m2
    rnet = obs.Rnet.resample(time='D').mean()                    # W/m2
    
    neon_dat = xr.merge([gpp, gpp_dt, lh, sh, rnet])
    neon_dat = neon_dat.convert_calendar("standard", use_cftime=True)
    neon_dat = neon_dat.isel(lat=0).isel(lon=0)

    return neon_dat

In [135]:
def get_climatology(dat):
    
    daily_gpp = dat.GPP.groupby("time.dayofyear").mean().to_dataset(name='GPP')
    daily_gpp_var = dat.GPP.groupby('time.dayofyear').var().to_dataset(name='GPPvar')
    daily_lh = dat.EFLX_LH_TOT.groupby('time.dayofyear').mean().to_dataset(name='EFLX_LH_TOT')
    daily_lh_var = dat.EFLX_LH_TOT.groupby('time.dayofyear').var().to_dataset(name='EFLX_LH_TOTvar')
    daily_sh = dat.FSH.groupby('time.dayofyear').mean().to_dataset(name='FSH')
    daily_sh_var = dat.FSH.groupby('time.dayofyear').var().to_dataset(name='FSHvar')
    daily_rnet = dat.Rnet.groupby('time.dayofyear').mean().to_dataset(name='Rnet')
    daily_rnet_var = dat.Rnet.groupby('time.dayofyear').var().to_dataset(name='Rnetvar')

    out = xr.merge([daily_gpp, daily_gpp_var, daily_lh, daily_lh_var, daily_sh, daily_sh_var,
                   daily_rnet, daily_rnet_var])

   # out = out.sel(

    return out

In [136]:
def get_site_evals(data, site):
    
    eval_data = []
    for dat in data:
        if site in dat and 'eval' in dat:
            eval_data.append(dat)

    max_version = 0
    for dat in eval_data:
        max_version = max(max_version, int(dat.split('/')[6].replace('v', '')))

    latest_version = []
    for dat in eval_data:
        if dat.split('/')[6] == f"v{max_version}":
            latest_version.append(dat)

    return latest_version

def download_eval_data(listing_dat, site, dir):

    # get list of latest evaluation data
    eval_data = get_site_evals(listing_dat, site)

    # download
    for data in eval_data:
        response = requests.get(data, stream=True)
        with open(os.path.join(dir, os.path.basename(data)), "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

In [142]:
# observations
obs_dir = "/glade/work/afoster/FATES_calibration/observations/"

In [130]:
listing = pd.read_csv('/glade/work/afoster/FATES_calibration/observations/listing.csv')
listing_data = listing.object

In [119]:
dir = '/glade/work/afoster/FATES_calibration/observations/NEON_eval_data'

In [89]:
neon_sites = ['ABBY', 'BLAN', 'CPER', 'DEJU', 'HARV', 'JORN', 'LAJA', 'MOAB',
              'OAES', 'OSBS', 'SCBI', 'SOAP', 'STER', 'TOOL', 'UNDE', 'YELL',
              'BARR', 'BONA',  'DCFS', 'DELA', 'GRSM', 'HEAL', 'KONA', 'LENO',
              'NIWO', 'ONAQ', 'PUUM', 'SERC', 'SRER', 'TALL', 'TREE', 'WOOD',
              'BART', 'CLBJ', 'DSNY', 'GUAN', 'JERC', 'KONZ', 'MLBS', 'NOGP',
              'ORNL', 'RMNP', 'SJER', 'STEI', 'TEAK', 'UKFS', 'WREF']

In [131]:
for site in neon_sites:
    if site != 'HARV':
        download_eval_data(listing_data, site, dir)

In [None]:
for site in neon_sites:
    file = os.path.join(obs_dir, 'NEON_climatology', f"{site}_climatology.nc")
    if not os.path.exists(file):
       obs = get_raw_neon_obs(dir, site)
       clim = get_climatology(obs)
       clim.to_netcdf(file)

In [6]:
clim_dir = os.path.join(obs_dir, 'climatology')
files = [os.path.join(clim_dir, f) for f in sorted(os.listdir(clim_dir))]
obs_dat = []
for file in files:
    obs_dat.append(xr.open_dataset(file))
obs = xr.concat(obs_dat, dim='site')
obs = obs.assign_coords(site=("site", neon_sites))

In [87]:
dat = obs.sel(site='NIWOT')
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(dat.dayofyear, dat.GPP, color='blue')
ax.fill_between(dat.dayofyear, dat.GPP - np.sqrt(dat.GPPvar), 
                dat.GPP + np.sqrt(dat.GPPvar), color='blue', alpha=0.2)

KeyError: "not all values found in index 'site'. Try setting the `method` keyword argument (example: method='nearest')."