# Calculate CLM Output for Sparse Grid

In [1]:
import os
import copy
import glob

import xarray as xr
import pandas as pd
import numpy as np
import matplotlib
import dask

import matplotlib.pyplot as plt
from dask_jobqueue import PBSCluster
from dask.distributed import Client

## PBS Cluster Setup

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                     # The number of cores you want
    memory='25GB',                               # Amount of memory
    processes=1,                                 # How many processes
    queue='casper',                              # The type of queue to utilize
    local_directory='/glade/work/afoster',       # Use your local directory
    resource_spec='select=1:ncpus=1:mem=25GB',   # Specify resources
    project='P93300041',                         # Input your project ID here
    walltime='08:00:00',                         # Amount of wall time
    interface='ext',                             # Interface to use
)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35815 instead


In [3]:
cluster.scale(30)



In [4]:
client = Client(cluster)

## Helper Functions

In [5]:
def get_ensemble(files):
    # read in dataset and attach other info
    ds = xr.open_mfdataset(files, combine='nested', concat_dim='ensemble', 
                           parallel=True, chunks = {'time': 60, 'ensemble': 250, 'gridcell': 200})
    return ds

In [6]:
def annual_mean(da, cf):

    days_per_month = da['time.daysinmonth']
    ann_mean = cf*(days_per_month*da).groupby('time.year').sum()
    ann_mean.name = da.name

    return ann_mean

In [7]:
def calculate_vars(ds):
    # GPP in kg/m2/yr
    gpp_ens = annual_mean(ds.GPP, 24*60*60).mean(dim='year')
    
    # ET in kg/m2/yr
    et_ens = annual_mean(ds.EFLX_LH_TOT, 1/2.5e6*24*60*60).mean(dim='year')

    # LH in W/m2
    lh_ens = annual_mean(ds.FSH, 1/365).mean(dim='year')

    # albedo (unitless)
    alb_ens = annual_mean(ds.ASA, 1/365).mean(dim='year')

    ensemble_ds = xr.merge([gpp_ens, et_ens, lh_ens, alb_ens])

    return ensemble_ds

In [8]:
def calc_all_pfts(ds, pfts, grid_pfts):

    pft_ds = []
    for pft in pfts:
        grid = grid_pfts[grid_pfts.pft == pft]
        if grid.num_gridcells.values[0] > 0:
            gridcells = [int(f) for f in grid.gridcells.values[0].split(',')]
            ds_pft = ds.where(ds.gridcell.isin(gridcells), drop=True)

            ensemble_ds = calculate_vars(ds_pft)
            ensemble_ds['pft'] = pft
            pft_ds.append(ensemble_ds)

    pft_dat = xr.concat(pft_ds, dim = 'pft')

    return pft_dat

## Read in Ensemble

In [9]:
topdir = '/glade/work/afoster/FATES_calibration/FATES_SP_LH/hist'

In [10]:
files = sorted([os.path.join(topdir, file) for file in os.listdir(topdir)])
ds = get_ensemble(files)

In [11]:
annual_vals = calculate_vars(ds)

In [None]:
annual_vals.to_netcdf('LH300_sparsegrid_output.nc')