In [1]:
import os
import numpy as np
import xarray as xr
import cftime
import pandas as pd
import glob
from datetime import date
import functools
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client

import matplotlib
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point

In [2]:
# Setup PBSCluster
cluster = PBSCluster(
    cores=1,                                      # The number of cores you want
    memory='10GB',                                # Amount of memory
    processes=1,                                  # How many processes
    queue='casper',                               # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='/glade/work/afoster',        # Use your local directory
    resource_spec='select=1:ncpus=1:mem=10GB',    # Specify resources
    project='P93300041',                          # Input your project ID here
    walltime='02:00:00',                          # Amount of wall time
    interface='ext',                              # Interface to use
)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 35145 instead


In [3]:
cluster.scale(10)



In [4]:
client = Client(cluster)

In [5]:
def preprocess(ds, varset):
    return ds[data_vars]

In [6]:
def postprocess(top_dir, histdir, data_vars, postp_dir):

    files = sorted(glob.glob(os.path.join(top_dir, histdir, 'lnd', 'hist/') + 
                             "*clm2.h0*.nc"))

    ds = xr.open_mfdataset(files, combine='nested', concat_dim='time',
                           preprocess=functools.partial(preprocess, varset=data_vars),
                           parallel=True, autoclose=True)
    
    ds['time'] = xr.cftime_range(str(2005), periods=12*60, freq='MS')
    ds = ds.sel(time=slice("2055-01-01", "2064-12-31"))
    ds['time'] = xr.cftime_range(str(2005), periods=12*10, freq='MS')
    
    # calculate some variables
    ds['ASA'] = ds.FSR/ds.FSDS.where(ds.FSDS>0)
    ds['ASA'].attrs['units'] = 'unitless'
    ds['ASA'].attrs['long_name'] = 'All sky albedo'

    ds['GPP'] = ds['FATES_GPP']*ds['FATES_FRACTION'] # kg m-2 s-1
    ds['GPP'].attrs['units'] = ds['FATES_GPP'].attrs['units']
    ds['GPP'].attrs['long_name'] = ds['FATES_GPP'].attrs['long_name']
    
    ds['Temp'] = ds.TSA-273.15
    ds['Temp'].attrs['units'] = 'degrees C'
    ds['Temp'].attrs['long_name'] = ds['TSA'].attrs['long_name']
    
    ds0 = xr.open_dataset(files[0])
    extras = ['grid1d_lat','grid1d_lon']
    for extra in extras:
        ds[extra]=ds0[extra]
    
    key = int(os.path.basename(files[0]).split('_')[-1].split('.')[0])

    ds['ensemble'] = key
    ds.attrs['Date'] = str(date.today())
    ds.attrs['Author'] = 'afoster@ucar.edu'
    ds.attrs['Original'] = files[0]
    
    out_file = os.path.join(postp_dir, files[0].split('/')[-1].split('.')[0]+'.nc')
    ds.to_netcdf(out_file)

In [7]:
def annual_mean(da):

    cf1, cf2 = cfs[da.name].values()

    days_per_month = da['time.daysinmonth']
    ann_mean = cf1*(days_per_month*da).groupby('time.year').sum()
    ann_mean.name = da.name
    return ann_mean

In [8]:
def get_map(ds, da):
    
    thedir = '/glade/u/home/forrest/ppe_representativeness/output_v4/'
    thefile = 'clusters.clm51_PPEn02ctsm51d021_2deg_GSWP3V1_leafbiomassesai_PPE3_hist.annual+sd.400.nc'
    sg = xr.open_dataset(thedir + thefile)

    #ds = ds.isel(ensemble=0)
    out = np.zeros(sg.cclass.shape) + np.nan
    for c, (o, a) in enumerate(sg.rcent_coords):
        i = np.arange(400)[
            (abs(ds.grid1d_lat - a) < 0.1) &
            (abs(ds.grid1d_lon - o) < 0.1)]
        out[sg.cclass == c + 1] = i
    cclass = out.copy()
    cclass[np.isnan(out)] = 0

    sgmap = xr.Dataset()
    sgmap['cclass'] = xr.DataArray(cclass.astype(int), dims=['lat', 'lon'])
    sgmap['notnan'] = xr.DataArray(~np.isnan(out), dims=['lat', 'lon'])
    sgmap['lat'] = sg.lat
    sgmap['lon'] = sg.lon
    
    damap = da.sel(gridcell=sgmap.cclass).where(sgmap.notnan).compute()
    
    return damap

In [9]:
def get_ensemble(files, whittaker_ds):

    # read in dataset and attach other info
    ds = xr.open_mfdataset(files, combine='nested', concat_dim='ensemble',
                           parallel=True, chunks = {'time': 60, 'ensemble': 100,
                                                    'gridcell': 200})

    ds['biome'] = whittaker_ds.biome
    ds['biome_name'] = whittaker_ds.biome_name

    return ds

In [10]:
def get_difference(ds_default, ds_update, update_name):
    
    ds = xr.concat([ds_default, ds_update], 'ensemble_type', data_vars='all')
    
    ds = ds.assign_coords(ensemble_type=("ensemble_type", ['default', update_name]))
    
    varDiff = ds.sel(ensemble_type=update_name) - ds.sel(ensemble_type='default')
    return varDiff

In [11]:
def plot_map(da, title, cmap, units, vmin, vmax):

    fig, ax = plt.subplots(figsize=(13, 6),
                           subplot_kw=dict(projection=ccrs.Robinson()))
    
    ax.set_title(title, loc='left', fontsize='large', fontweight='bold')
    
    ax.coastlines()
    ocean = ax.add_feature(cfeature.NaturalEarthFeature('physical', 'ocean', '110m',
                                                        facecolor='white'))
    
    pcm = ax.pcolormesh(da.lon, da.lat, da,
                        transform=ccrs.PlateCarree(), shading='auto',
                        cmap=cmap, vmin = vmin, vmax = vmax)
    ax.set_extent([-180,180,-56,85], crs=ccrs.PlateCarree())
    cbar = fig.colorbar(pcm, ax=ax, pad=0.02, fraction = 0.03, orientation='horizontal')
    cbar.set_label(units, size=12, fontweight='bold')

In [12]:
# conversion factors
cfs = {'GPP': {'cf1': 24*60*60, 'cf2': 1e-6},
       'EFLX_LH_TOT': {'cf1': 1/2.5e6*24*60*60, 'cf2': 1e-9},
       'ASA': {'cf1': 1/365, 'cf2': 'intrinsic'},
       'SOILWATER_10CM': {'cf1': 1/365, 'cf2': 1e-9},
       'FSH': {'cf1': 1/365, 'cf2': 'intrinsic'},
       'Temp': {'cf1': 1/365, 'cf2': 'intrinsic'}}
units = {'GPP': 'PgC/yr',
         'EFLX_LH_TOT': 'TtH2O/yr',
         'ASA': '0-1',
         'SOILWATER_10CM': 'TtH2O',
         'FSH': 'W/m2',
         'Temp': 'degrees C'}

In [13]:
data_vars = ['FATES_GPP', 'EFLX_LH_TOT', 'FSR', 'FSDS', 'QRUNOFF', 'FATES_FRACTION', 'SNOWDP', 
             'SOILWATER_10CM', 'TV', 'FATES_LAI', 'TWS', 'FSH', 'QVEGE', 'TG', 'TSA', 'RAIN', 'SNOW', 
            'TBOT']

In [14]:
top_dir = '/glade/derecho/scratch/afoster/FATES_SP_configs/archive'
postp_dir = '/glade/work/afoster/FATES_calibration/configuration_testing/hist/'
dirs = sorted(os.listdir(top_dir))

In [15]:
factorial_key = pd.read_csv('factorial_test_names.csv').set_index('ensemble')
factorial_key['ensemble_name'] = factorial_key[['fates_rad_model', 'fates_maintresp_leaf_model', 'fates_leaf_stomatal_model', 'fates_leaf_photo_tempsens_model']].agg('_'.join, axis=1)
fact = xr.Dataset(factorial_key)

In [16]:
# whittaker biomes
whit = xr.open_dataset('/glade/work/afoster/FATES_calibration/CLM5PPE/pyth/whit/whitkey.nc')

In [None]:
if False:
    for dir in dirs:
        postprocess(top_dir, dir, data_vars, postp_dir)

In [18]:
out_dir = '/glade/work/afoster/FATES_calibration/configuration_testing/ilamb_hist'
files = [os.path.join(postp_dir, file) for file in os.listdir(postp_dir)]
if True:
    for file in files:
        dat = xr.open_dataset(file)
        dfs = []
        for var in data_vars:
            map = get_map(dat, dat[var])
            dfs.append(map)
        all_vars = xr.merge(dfs)
        grid = os.path.basename(file).replace('.nc', '').split('_')[-2:]
        subdir = '_'.join(grid)
        all_vars.to_netcdf(os.path.join(out_dir, subdir, os.path.basename(file)))

In [None]:
ds = get_ensemble(files, whit)
ds = xr.merge([ds, fact])

In [None]:
gpp_annual = annual_mean(ds.GPP).mean(dim='year')
gpp_annual_map = get_map(ds, gpp_annual)
gpp_map = gpp_annual_map.to_dataset(name='GPP')

In [None]:
asa_annual = annual_mean(ds.ASA).mean(dim='year')
asa_annual_map = get_map(ds, asa_annual)
asa_map = asa_annual_map.to_dataset(name='ASA')

In [None]:
i = 16
default_name = factorial_key[factorial_key.index == 1]['ensemble_name'].values[0]
name = factorial_key[factorial_key.index == i]['ensemble_name'].values[0]
diff = get_difference(gpp_map.sel(ensemble=1), gpp_map.sel(ensemble=i), name)

In [None]:
vlim = abs(diff.GPP).max() + 0.1
plot_map(diff.GPP, 'GPP', f'{name} - {default_name}' , 'bwr_r', 'GPP Difference (Pg C/m2/yr)', vmin = -1.0*vlim, vmax = vlim)

In [None]:
i = 16
default_name = factorial_key[factorial_key.index == 1]['ensemble_name'].values[0]
name = factorial_key[factorial_key.index == i]['ensemble_name'].values[0]
diff = get_difference(asa_map.sel(ensemble=1), asa_map.sel(ensemble=i), name)
vlim = abs(diff.ASA).max() + 0.1
plot_map(diff.ASA, 'ASA', f'{name} - {default_name}' , 'bwr_r', 'Albedo Difference (0-1)', vmin = -1.0*vlim, vmax = vlim)