# Post-Process FATES or CLM Ensemble

In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

import fates_calibration_library.analysis_functions as analysis
import fates_calibration_library.utils as utils

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                                   # The number of cores you want
    memory='25GB',                                             # Amount of memory
    processes=1,                                               # How many processes
    queue='casper',                                            # The type of queue to utilize
    local_directory='/glade/work/afoster',                     # Use your local directory
    resource_spec='select=1:ncpus=1:mem=25GB',                 # Specify resources
    log_directory='/glade/derecho/scratch/afoster/dask_logs',  # log directory
    account='P08010000',                                       # Input your project ID here
    walltime='06:00:00',                                       # Amount of wall time
    interface='ext')                                           # Interface to use

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41641 instead


In [3]:
cluster.scale(30)
dask.config.set({
    'distributed.dashboard.link': 'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
})
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/41641/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/41641/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.173:36367,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/41641/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [None]:
# client.shutdown()

## Set Up

First specify history variables and variables to process

In [4]:
fates_vars = ['FATES_FRACTION', 'FATES_GPP', 'FATES_LAI', 'QVEGE',
              'QSOIL', 'QVEGT', 'EFLX_LH_TOT', 'FSH', 'FSR', 'FSDS', 'FSA',
              'FIRE', 'FLDS', 'FCTR', 'FCEV', 'FGEV', 'BTRANMN',
              'FGR', 'SOILWATER_10CM', 'TWS', 'QRUNOFF', 'SNOWDP',
              'TV', 'TG', 'TSA', 'RH2M', 'SNOW', 'RAIN', 'FGR12',
              'TREFMXAV', 'TREFMNAV']

clm_vars = ['FPSN', 'TLAI', 'QVEGE', 'QVEGT', 'QSOIL', 'EFLX_LH_TOT', 'FSH',
            'FSR', 'FSDS', 'FSA', 'FIRE', 'FLDS', 'FCTR', 'FCEV',
            'FGEV', 'BTRANMN', 'FGR', 'SOILWATER_10CM', 'TWS',
            'QRUNOFF', 'SNOWDP', 'TV', 'TG', 'TSA', 'RH2M', 'SNOW',
            'FGR12', 'TREFMXAV', 'TREFMNAV']

out_vars = ['GPP', 'LAI', 'EFLX_LH_TOT', 'FSH', 'EF', 'SOILWATER_10CM', 'ASA',
            'FSDS', 'FSR', 'FSA', 'FIRE', 'RLNS', 'RN', 'BTRANMN', 'TV', 'Temp',
            'RH2M', 'Precip']

### Files Needed

In [5]:
# sparse grid whittaker biomes
whittaker_ds = xr.open_dataset('/glade/work/afoster/FATES_calibration/observations/whittaker/whitkey.nc')

# sparsegrid landarea - needed for unit conversion
land_area_file = '/glade/work/afoster/FATES_calibration/CLM5PPE/postp/sparsegrid_landarea.nc'
land_area = xr.open_dataset(land_area_file).landarea

# sparse grid clustering file
grid_dir = '/glade/work/afoster/FATES_calibration/surfdata'
grid_file = 'clusters.clm51_PPEn02ctsm51d021_2deg_GSWP3V1_leafbiomassesai_PPE3_hist.annual+sd.400.nc'
sparse_grid = xr.open_dataset(os.path.join(grid_dir, grid_file))

# create a global land frac and area grid
land_frac_ds = os.path.join("/glade/derecho/scratch/afoster/archive",
                            "ctsm60SP_bigleaf_fullgrid/lnd/hist",
                            "ctsm60SP_bigleaf_fullgrid.clm2.h0.0001-02-01-00000.nc")
target_grid = analysis.create_target_grid(land_frac_ds, 'FSR')

# config file with conversion information in it
var_config = '/glade/work/afoster/FATES_calibration/fates_calibration_library/configs/model_conversion.yaml'
var_dict = utils.get_config_file(var_config)

# parameter keys
fates_oaat_key_file = '/glade/work/afoster/FATES_calibration/parameter_files/fates_oaat/fates_oaat_key.csv'
fates_oaat_key = pd.read_csv(fates_oaat_key_file, index_col=0)
fates_oaat_key.columns = ['key', 'minmax', 'param']

clm_param_key_file = '/glade/work/afoster/FATES_calibration/parameter_files/clm6sp_oaat_key.csv'
clm_param_key = pd.read_csv(clm_param_key_file, header=None)
clm_param_key.columns = ['ensemble_name', 'param', 'minmax']
clm_param_key['key'] = [int(ens.strip()[-4:]) for ens in clm_param_key['ensemble_name']]

### Config Dictionaries
Sets up post-processing

In [6]:
archive_dir = '/glade/derecho/scratch/afoster/FATES_calibration/'
hist_dir = '/glade/work/afoster/FATES_calibration/history_files/'
years = [2000, 2014]

ensemble_dict = {
    'fates_oaat': {
        'top_dir': os.path.join(archive_dir, 'fates_oaat/archive'),
        'postp_dir': os.path.join(hist_dir, 'fates_oaat'),
        'out_dir': os.path.join(hist_dir, 'compiled_files'),
        'years': years,
        'ensemble_name': 'fates_oaat',
        'clobber': False,
        'default_dir': '/glade/derecho/scratch/afoster/archive/ctsm60SP_fates_sparse_vai_default',
        'key': fates_oaat_key,
        'tag': 'ctsm60SP_fates_sparse_vai_FATES_OAAT_',
        'vars': fates_vars
    },
    'fates_oaat_clmpars': {
        'top_dir': os.path.join(archive_dir, 'fates_oaat_clmpars/archive'),
        'postp_dir': os.path.join(hist_dir, 'fates_oaat_clmpars'),
        'out_dir': os.path.join(hist_dir, 'compiled_files'),
        'years': years,
        'ensemble_name': 'fates_oaat_clmpars',
        'default_dir': '/glade/derecho/scratch/afoster/archive/ctsm60SP_fates_sparse_vai_default',
        'clobber': False,
        'key': clm_param_key,
        'tag': 'ctsm60SP_fates_sparse_vai_CLM6SPoaat',
        'vars': fates_vars},
    'clm_oaat_btran': {
        'top_dir': os.path.join(archive_dir, 'clm_oaat/archive'),
        'postp_dir': os.path.join(hist_dir, 'clm_oaat'),
        'out_dir': os.path.join(hist_dir, 'compiled_files'),
        'years': years,
        'ensemble_name': 'clm_oaat',
        'default_dir': '/glade/derecho/scratch/afoster/archive/ctsm60SP_bigleaf_sparse_default',
        'clobber': False,
        'fates': False,
        'key': clm_param_key,
        'tag': 'ctsm60SP_bigleaf_sparse_CLM6SPoaat',
        'vars': clm_vars
    },
}

In [7]:
ensemble = 'fates_oaat'

## Post-Process History Files
Write one history file per ensemble member to a file in the post-processing directory.

In [None]:
# write out history files
keys_finished = analysis.post_process_ensemble(ensemble_dict[ensemble],
                                               ensemble_dict[ensemble]['vars'],
                                               whittaker_ds)

In [None]:
# check to see if we are missing any ensembles
not_run = analysis.check_ensembles_run(ensemble_dict[ensemble]['key'], keys_finished)

Now compile and do some averaging (by year, month, gridcell, etc.)

In [None]:
# analysis.compile_global_ensemble(ensemble_dict[ensemble], out_vars, var_dict, 
#                                   sparse_grid, land_area, target_grid.land_area)