# Post-Process FATES or CLM Ensemble

In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

import fates_calibration_library.analysis_functions as analysis
import fates_calibration_library.utils as utils

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                                   # The number of cores you want
    memory='25GB',                                             # Amount of memory
    processes=1,                                               # How many processes
    queue='casper',                                            # The type of queue to utilize
    local_directory='/glade/work/afoster',                     # Use your local directory
    resource_spec='select=1:ncpus=1:mem=25GB',                 # Specify resources
    log_directory='/glade/derecho/scratch/afoster/dask_logs',  # log directory
    account='P08010000',                                       # Input your project ID here
    walltime='02:00:00',                                       # Amount of wall time
    interface='ext')                                           # Interface to use

Perhaps you already have a cluster running?
Hosting the HTTP server on port 33205 instead


In [3]:
cluster.scale(30)
dask.config.set({
    'distributed.dashboard.link': 'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
})
client = Client(cluster)
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/33205/status,

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/33205/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://128.117.208.173:39559,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/afoster/proxy/33205/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Set Up

First specify history variables and variables to process

In [4]:
fates_vars = ['FATES_FRACTION', 'FATES_GPP', 'FATES_LAI', 'QVEGE',
              'QVEGT', 'EFLX_LH_TOT', 'FSH', 'FSR', 'FSDS', 'FSA',
              'FIRE', 'FLDS', 'FCTR', 'FCEV', 'FGEV', 'BTRANMN',
              'FGR', 'SOILWATER_10CM', 'TWS', 'QRUNOFF', 'SNOWDP',
              'TV', 'TG', 'TSA', 'TBOT']

clm_vars = ['FPSN', 'TLAI', 'QVEGE', 'QVEGT', 'EFLX_LH_TOT', 'FSH',
            'FSR', 'FSDS', 'FSA', 'FIRE', 'FLDS', 'FCTR', 'FCEV',
            'FGEV', 'BTRANMN', 'FGR', 'SOILWATER_10CM', 'TWS',
            'QRUNOFF', 'SNOWDP', 'TV', 'TG', 'TSA', 'TBOT']

out_vars = ['GPP', 'LAI', 'EFLX_LH_TOT', 'FSH', 'EF', 'SOILWATER_10CM', 'ASA',
            'FSR', 'FSA', 'FIRE', 'RLNS', 'RN', 'BTRANMN', 'TV']

### Files Needed

In [5]:
# sparse grid whittaker biomes
whittaker_ds = xr.open_dataset('/glade/work/afoster/FATES_calibration/observations/whittaker/whitkey.nc')

# sparsegrid landarea - needed for unit conversion
land_area_file = '/glade/work/afoster/FATES_calibration/CLM5PPE/postp/sparsegrid_landarea.nc'
land_area = xr.open_dataset(land_area_file).landarea

# config file with conversion information in it
var_config = '/glade/work/afoster/FATES_calibration/fates_calibration_library/configs/model_conversion.yaml'
var_dict = utils.get_config_file(var_config)

# parameter key
fates_oaat_key_file = '/glade/work/afoster/FATES_calibration/parameter_files/fates_oaat/fates_oaat_key.csv'
fates_oaat_key = pd.read_csv(fates_oaat_key_file, index_col=0)
fates_oaat_key.columns = ['key', 'minmax', 'param']

### Config Dictionaries
Sets up post-processing

In [6]:
run_dict = {'fates_oaat': {
    'top_dir': '/glade/derecho/scratch/afoster/FATES_calibration/fates_oaat/archive',
    'postp_dir': '/glade/work/afoster/FATES_calibration/history_files/fates_oaat',
    'years': [2000, 2014],
    'clobber': False,
}}

## Post-Process History Files
Write one history file per ensemble member to a file in the post-processing directory.

In [7]:
keys_finished = analysis.post_process_ensemble(run_dict['fates_oaat'], fates_vars, whittaker_ds)

File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_001.nc for ensemble 001 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_002.nc for ensemble 002 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_003.nc for ensemble 003 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_004.nc for ensemble 004 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_005.nc for ensemble 005 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_006.nc for ensemble 006 exists, skipping
File /glade/work/afoster/FATES_calibration/history_files/fates_oaat/ctsm60SP_fates_sparse_vai_FATES_OAAT_007.nc for ensemble

In [8]:
# check to see if we are missing any
analysis.check_ensembles_run(fates_oaat_key, keys_finished)

All ensemble members were run.


In [9]:
files = sorted([os.path.join(run_dict['fates_oaat']['postp_dir'], f) for f in os.listdir(run_dict['fates_oaat']['postp_dir'])])
ensemble_ds = xr.open_mfdataset(files, combine='nested', concat_dim=['ensemble'], parallel=True)

In [10]:
ensemble_ds = ensemble_ds.chunk({'gridcell': 20, 'ensemble': 20, 'time': 20})

In [11]:
annual_means = analysis.apply_to_vars(ensemble_ds, out_vars, func=analysis.calculate_annual_mean, add_sparse=True,
                                         conversion_factor={var: var_dict[var]["time_conversion_factor"] for var in out_vars},
                                         new_units={var: var_dict[var]["annual_units"] for var in out_vars})

This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Consider scattering data ahead of time and using futures.
This may cause some slowdown.
Co

In [13]:
montly_means = analysis.apply_to_vars(ensemble_ds, out_vars,
                                         func=analysis.calculate_monthly_mean,
                                         add_sparse=True,
                                         conversion_factor={var: var_dict[var]["time_conversion_factor"] for var in out_vars})