In [1]:
import os
import xarray as xr
import glob
from datetime import date
import functools
from dask_jobqueue import PBSCluster
from dask.distributed import Client
import pandas as pd

KeyboardInterrupt: 

In [None]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                      # The number of cores you want
    memory='10GB',                                # Amount of memory
    processes=1,                                  # How many processes
    queue='casper',                               # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='/glade/work/afoster',        # Use your local directory
    resource_spec='select=1:ncpus=1:mem=10GB',    # Specify resources
    project='P93300041',                          # Input your project ID here
    walltime='02:00:00',                          # Amount of wall time
    interface='ext',                              # Interface to use
)

In [None]:
cluster.scale(2)

In [None]:
client = Client(cluster)

In [None]:
def preprocess(ds, varset):
    return ds[data_vars]

In [None]:
def postprocess(top_dir, histdir, data_vars):

    files = sorted(glob.glob(os.path.join(top_dir, histdir, 'lnd', 'hist/') +
                             "*clm2.h0*.nc"))
    flen = len(files)

    if flen < 12:

        return None

    else:

        ds = xr.open_mfdataset(files, combine='nested', concat_dim='time',
                               preprocess=functools.partial(preprocess,
                                                            varset=data_vars),
                               parallel=True, autoclose=True)

        # fix time bug
        ds['time'] = xr.cftime_range(str(2005), periods=12*60, freq='MS')
        ds = ds.sel(time=slice("2055-01-01", "2064-12-31"))
        ds['time'] = xr.cftime_range(str(2005), periods=12*10, freq='MS')

        # calculate some variables
        ds['ASA'] = ds.FSR/ds.FSDS.where(ds.FSDS > 0)
        ds['ASA'].attrs['units'] = 'unitless'
        ds['ASA'].attrs['long_name'] = 'All sky albedo'

        ds['GPP'] = ds['FATES_GPP']*ds['FATES_FRACTION']  # kg m-2 s-1
        ds['GPP'].attrs['units'] = ds['FATES_GPP'].attrs['units']
        ds['GPP'].attrs['long_name'] = ds['FATES_GPP'].attrs['long_name']

        ds['Temp'] = ds.TSA - 273.15
        ds['Temp'].attrs['units'] = 'degrees C'
        ds['Temp'].attrs['long_name'] = ds['TSA'].attrs['long_name']

        ds0 = xr.open_dataset(files[0])
        extras = ['grid1d_lat', 'grid1d_lon']
        for extra in extras:
            ds[extra] = ds0[extra]

        ds['ensemble'] = histdir.split('_')[-1]

        ds.attrs['Date'] = str(date.today())
        ds.attrs['Author'] = 'afoster@ucar.edu'
        ds.attrs['Original'] = files[0]

        return ds

In [None]:
data_vars = ['FATES_GPP', 'EFLX_LH_TOT', 'FSR', 'FSDS', 'QRUNOFF',
             'FATES_FRACTION', 'SNOWDP', 'SOILWATER_10CM', 'TV',
             'FATES_LAI', 'TWS', 'FSH', 'QVEGE', 'TG', 'TSA', 'RAIN', 'SNOW',
             'TBOT']

In [None]:
top_dir = '/glade/derecho/scratch/afoster/FATES_SP_LH_500/archive'
postp_dir = '/glade/work/afoster/FATES_calibration/FATES_SP_LH/hist_500/'
dirs = sorted(os.listdir(top_dir))

In [None]:
bad_ensembles = []
good_ensembles = []
for histdir in dirs:
    ensemble = histdir.split('_')[-1]
    ds = postprocess(top_dir, histdir, data_vars)
    if ds is not None:
        out_file = os.path.join(postp_dir, f"{histdir}.nc")
        ds.to_netcdf(out_file)
        good_ensembles.append(ensemble)
    else:
        bad_ensembles.append(ensemble)

In [None]:
with open('ensembles_good.txt', 'w') as f:
    for line in good_ensembles:
        f.write(f"{line}\n")