# Post-Process FATES or CLM Ensemble

In [1]:
import os
import numpy as np
import xarray as xr
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

import fates_calibration_library.analysis_functions as analysis
import fates_calibration_library.utils as utils

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                                   # The number of cores you want
    memory='25GB',                                             # Amount of memory
    processes=1,                                               # How many processes
    queue='casper',                                            # The type of queue to utilize
    local_directory='/glade/work/afoster',                     # Use your local directory
    resource_spec='select=1:ncpus=1:mem=25GB',                 # Specify resources
    log_directory='/glade/derecho/scratch/afoster/dask_logs',  # log directory
    account='P08010000',                                       # Input your project ID here
    walltime='02:00:00',                                       # Amount of wall time
    interface='ext')                                           # Interface to use

Perhaps you already have a cluster running?
Hosting the HTTP server on port 40445 instead


In [3]:
cluster.scale(30)
dask.config.set({
    'distributed.dashboard.link': 'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
})
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/40445/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/40445/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.173:44005,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/40445/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Set Up

First specify history variables and variables to process

In [4]:
fates_vars = ['FATES_FRACTION', 'FATES_GPP', 'FATES_LAI', 'QVEGE',
              'QVEGT', 'EFLX_LH_TOT', 'FSH', 'FSR', 'FSDS', 'FSA',
              'FIRE', 'FLDS', 'FCTR', 'FCEV', 'FGEV', 'BTRANMN',
              'FGR', 'SOILWATER_10CM', 'TWS', 'QRUNOFF', 'SNOWDP',
              'TV', 'TG', 'TSA', 'TBOT']

clm_vars = ['FPSN', 'TLAI', 'QVEGE', 'QVEGT', 'EFLX_LH_TOT', 'FSH',
            'FSR', 'FSDS', 'FSA', 'FIRE', 'FLDS', 'FCTR', 'FCEV',
            'FGEV', 'BTRANMN', 'FGR', 'SOILWATER_10CM', 'TWS',
            'QRUNOFF', 'SNOWDP', 'TV', 'TG', 'TSA', 'TBOT']

out_vars = ['GPP', 'LAI', 'EFLX_LH_TOT', 'FSH', 'EF', 'SOILWATER_10CM', 'ASA',
            'FSR', 'FSA', 'FIRE', 'RLNS', 'RN', 'BTRANMN', 'TV']

### Files Needed

In [7]:
# sparse grid whittaker biomes
whittaker_ds = xr.open_dataset('/glade/work/afoster/FATES_calibration/observations/whittaker/whitkey.nc')

# sparsegrid landarea - needed for unit conversion
land_area_file = '/glade/work/afoster/FATES_calibration/CLM5PPE/postp/sparsegrid_landarea.nc'
land_area = xr.open_dataset(land_area_file).landarea

# config file with conversion information in it
var_config = '/glade/work/afoster/FATES_calibration/fates_calibration_library/configs/model_conversion.yaml'
var_dict = utils.get_config_file(var_config)

In [25]:
def post_process_ensemble(run_dict, vars, biome):
    
    # this is true
    run_dict['ensemble'] = True
    
    # create output directory if it doesn't exist
    os.makedirs(run_dict['postp_dir'], exist_ok=True)
    
    keys_finished = []
    dirs = sorted(os.listdir(run_dict['top_dir']))
    
    for dir in dirs:
        ensemble = dir.split('_')[-1]
        out_file = os.path.join(run_dict['postp_dir'], f"{dir}.nc")
        
        # skip if file exists and clobber is False
        if os.path.isfile(out_file) and not run_dict.get("clobber", False):
            print(f"File {out_file} for ensemble {ensemble} exists, skipping")
            keys_finished.append(ensemble)
            continue
    
        # create history file for this ensemble
        ds_out = analysis.post_process_ds(os.path.join(run_dict['top_dir'], dir, 'lnd', 'hist'),
                                          vars, biome, years, run_dict=run_dict)
        # write to file
        if ds_out is not None:
            if len(ds_out.time) == (run_dict['years'][1] - run_dict['years'][0] + 1)*12:
                ds_out.to_netcdf(out_file)
                keys_finished.append(ensemble)
    
    return keys_finished

In [24]:
run_dict = {'fates_oaat': {
    'top_dir': '/glade/derecho/scratch/afoster/FATES_calibration/fates_oaat/archive',
    'postp_dir': '/glade/work/afoster/FATES_calibration/history_files/fates_oaat',
    'years': [2000, 2014],
    'clobber': False,
}}

In [None]:
keys_finished = post_process_ensemble(run_dict['fates_oaat'], fates_vars, whittaker_ds)