# CESM Historical Runs 1901-2014

# Setup

## Packages

In [1]:
%load_ext watermark

import numpy as np
import xarray as xr
import pandas as pd
import xcdat as xc
import intake

import matplotlib as mpl
import matplotlib.pyplot as plt

import cartopy
import cartopy.crs as ccrs
from cartopy.util import add_cyclic_point
import regionmask

import cmocean.cm as cmo

%watermark -iv

regionmask: 0.11.0
cmocean   : 3.0.3
xcdat     : 0.6.0
intake    : 0.0.0
xarray    : 2023.10.1
sys       : 3.11.6 | packaged by conda-forge | (main, Oct  3 2023, 10:40:35) [GCC 12.3.0]
pandas    : 2.2.0
numpy     : 1.24.4
matplotlib: 3.8.0
cartopy   : 0.22.0



## Figure settings

In [2]:
mpl.rcParams['figure.dpi'] = 120
%config InlineBackend.figure_formats = ["png"]  # raster, transparent background -> best for publication of large, dense figures

In [3]:
def format_ds_coords(ds):
    ds = ds.bounds.add_bounds('X')
    ds = ds.bounds.add_bounds('Y')
    ds = xc.swap_lon_axis(ds, (-180, 180))
    return ds

def select_sites_from_gridded_data(xr_grid_data, df_site_data):
    """
    Add docstring
    """
    nsite = df_site_data.iloc[:,0].size
    np_site_data = np.empty((nsite), dtype=xr.DataArray)

    # Select the grid box nearest to the coordinates for each site
    for i, row in df_site_data.iterrows():
        gridbox_timeseries = xr_grid_data.sel(lat=row['lat'], lon=row['lon'], method='nearest')
        np_site_data[i] = gridbox_timeseries

    xr_site_data = xr.concat(np_site_data, dim='site')
    xr_site_data = xr_site_data.assign_coords({'site': np.arange(nsite)})

    return xr_site_data

In [4]:
# Load tree ring coordinates
# Load gridded variable
# Select sites from gridded variable -> convert to xarray
# Add relevant metadata
# Save to NetCDF + check file size

# Generate index arrays for selecting ang/gym sites

In [5]:
# Load tree ring coordinates
tree_ring_coords = pd.read_csv('./latlon_gym_ang.csv', usecols=[0,1,2])

# Tree type classification: 0=gym, 1=ang, 2=none
tree_ring_coords = tree_ring_coords.rename(columns={'gym=0/ang=1': 'isAng'})
tree_ring_coords = tree_ring_coords.replace({'isAng': np.nan}, 2)

In [20]:
global_metadata = {
    'description': 'Gridded to (time,lat,lon,pft) then only grid boxes corresponding to tree ring sites were selected. All global attributes below are copied from the original history file.',
    'title': 'CLM History file information',
    'comment': 'NOTE: None of the variables are weighted by land fraction!',
    'Conventions': 'CF-1.0',
    'history': 'created on 04/27/21 13:27:32',
    'source': 'Community Terrestrial Systems Model',
    'hostname': 'cheyenne',
    'username': 'oleson',
    'version': 'cesm2_3_alpha02c',
    'revision_id': '$Id: histFileMod.F90 42903 2012-12-21 15:32:10Z muszala $',
    'case_title': 'UNSET',
    'case_id': 'clm50_cesm23a02cPPEn08ctsm51d030_1deg_GSWP3V1_hist',
    'Surface_dataset': 'surfdata_0.9x1.25_hist_78pfts_CMIP6_simyr1850_c190214.nc',
    'Initial_conditions_dataset': 'finidat_interp_dest.nc',
    'PFT_physiological_constants_dataset': 'clm50_params.c210217.nc',
    'time_period_freq': 'month_1',
    'Time_constant_3Dvars_filename': './clm50_cesm23a02cPPEn08ctsm51d030_1deg_GSWP3V1_hist.clm2.h3.1850-01-01-00000.nc',
    'Time_constant_3Dvars': 'ZSOI:DZSOI:WATSAT:SUCSAT:BSW:HKSAT:ZLAKE:DZLAKE:PCT_SAND:PCT_CLAY',
}

variable_metadata = {
    'GSSUNLN': {
        'long_name': 'sunlit leaf stomatal conductance at local noon',
        'units': 'umol H20/m2/s',
        'cell_methods': 'time: mean',
    },
    'GPP': {
        'long_name': 'gross primary production',
        'units': 'gC/m^2/s',
        'cell_methods': 'time: mean',
    },
    'FCTR': {
        'long_name': 'canopy transpiration',
        'units': 'W/m^2',
        'cell_methods': 'time: mean',
    }
}

directory = '/glade/work/bbuchovecky/WUE_analysis'
casename = 'clm50_cesm23a02cPPEn08ctsm51d030_1deg_GSWP3V1_hist'
variables = ['GSSUNLN']

for this_var in variables:
    # Load gridded variable and format coordinates
    this_data = xc.open_dataset(f'{directory}/{casename}.clm2.h1.{this_var}.185001-201412_gridded.nc')
    this_data = format_ds_coords(this_data)
    this_data = this_data[this_var].isel(time=slice(None, 50))

    # Select sites from gridded variable
    this_site_data = select_sites_from_gridded_data(this_data, tree_ring_coords)

    # Convert to dataset
    this_site_data = this_site_data.to_dataset(name=this_var)

    # Format coordinates
    this_site_data = this_site_data.rename({'vegtype': 'pft', 'vegtype_name': 'pft_name'})
    this_site_data['pft'].attrs = {'long_name': 'plant functional type'}
    this_site_data['pft_name'].attrs = {'long_name': 'plant functional type name'}
    this_site_data['site'].attrs = {'long_name': 'tree ring site'}

    # Add metadata
    this_site_data.attrs = global_metadata
    this_site_data[this_var].attrs = variable_metadata[this_var]

    # Save to NetCDF
    this_site_data.to_netcdf(f'{directory}/sites_from_gridded/{casename}.clm2.h1.{this_var}.185001-201412_sites.nc')

In [23]:
this_site_data

In [3]:
xr.open_dataset('/glade/work/bbuchovecky/WUE_analysis/sites_from_gridded/clm50_cesm23a02cPPEn08ctsm51d030_1deg_GSWP3V1_hist.clm2.h1.GSSUNLN.185001-201412_sites.nc')

In [6]:
ds = xr.open_dataset('/glade/campaign/cesm/cesmdata/cseg/inputdata/atm/datm7/atm_forcing.datm7.GSWP3.0.5d.v1.c170516/TPHWL/clmforc.GSWP3.c2011.0.5x0.5.TPQWL.1901-01.nc')

In [7]:
ds

In [11]:
import cftime 

In [16]:
xr.open_dataset('/glade/campaign/cesm/cesmdata/cseg/inputdata/atm/datm7/CO2/fco2_datm_global_simyr_1750-2014_CMIP6_c180929.nc', decode_times=False)