### Script for calculating CESM-LENS2 climate forcing to force MPAS simulations
### date created: 28 May 2025
### author: Erin Dougherty (doughert@ucar.edu)

In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
# import netCDF4 as nc
# from netCDF4 import Dataset, num2date
#from datetime import datetime, date, timedelta
import glob
import xarray as xr

### set path your output directory- change this

In [2]:
outdir = '/glade/derecho/scratch/doughert/SOARS/'

#### set bounds to apply PGW forcing–generally the outer edges of regional domain

In [3]:
llat = -20
ulat = 61
llon = -145
rlon = -15

#### define 30-year time slices for historical and future period (change month depending on event of interest)

In [4]:
def is_hist(year, month):
    return (year >= 1991) & (year <= 2021) & (month ==9)

def is_future(year, month):
    return (year >= 2070) & (year <= 2100) & (month ==9)

#### DON'T CHANGE BELOW HERE 
#### 3D variables to perturb

In [5]:
cesm_dir= '/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/'
cesm_dir_t = cesm_dir+'T/'
cesm_dir_u = cesm_dir+'U/'
cesm_dir_v = cesm_dir+'V/'
cesm_dir_rh = cesm_dir+'RELHUM/'
cesm_dir_q = cesm_dir+'Q/'
cesm_dir_z = cesm_dir+'Z3/'

In [6]:
hist_dates = ['199001', '200001', '201001', '201501']
future_dates = ['206501', '207501', '208501', '209501']

#### definitions for opening CESM variables over defined time/spatial boundaries

In [7]:
def cesm_hist(var_dir, varname):
    hist_var = []

    for dir in glob.glob(var_dir+'b.e21.*'):
        for c, item in enumerate(hist_dates): 
            for name in glob.glob(dir):
                if item in name:
                    file = xr.open_mfdataset(name)[varname]
                    #change longitude from 0-360, to -180 to 180
                    file['_longitude_adjusted'] = xr.where(file['lon'] > 180, file['lon']-360, file['lon'])
                    file = (file.swap_dims({'lon': '_longitude_adjusted'}).sel(**{'_longitude_adjusted': sorted(file._longitude_adjusted)}).drop('lon'))
                    file = file.rename({'_longitude_adjusted': 'lon'})
                    # sub select 
                    file_sub = file.sel(time=is_hist(file['time.year'], file['time.month']), lat=slice(llat, ulat), lon=slice(llon, rlon))
                    hist_var.append(file_sub)

    return(hist_var)

In [8]:
def cesm_future(var_dir, varname):
    future_var = []

    for dir in glob.glob(var_dir+'b.e21.*'):
        for c, item in enumerate(future_dates): 
            for name in glob.glob(dir):
                if item in name:
                    file = xr.open_mfdataset(name)[varname]
                    #change longitude from 0-360, to -180 to 180
                    file['_longitude_adjusted'] = xr.where(file['lon'] > 180, file['lon']-360, file['lon'])
                    file = (file.swap_dims({'lon': '_longitude_adjusted'}).sel(**{'_longitude_adjusted': sorted(file._longitude_adjusted)}).drop('lon'))
                    file = file.rename({'_longitude_adjusted': 'lon'})
                    # sub select 
                    file_sub = file.sel(time=is_future(file['time.year'], file['time.month']), lat=slice(llat, ulat), lon=slice(llon, rlon))
                    future_var.append(file_sub)

    return(future_var)


### open historical and future CESM data

In [9]:
hist_t = cesm_hist(cesm_dir_t, 'T')
hist_u = cesm_hist(cesm_dir_u, 'U')
hist_v = cesm_hist(cesm_dir_v, 'V')
hist_q = cesm_hist(cesm_dir_q, 'Q')
hist_rh = cesm_hist(cesm_dir_rh, 'RELHUM')
hist_z = cesm_hist(cesm_dir_z, 'Z3')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the

In [10]:
future_t = cesm_future(cesm_dir_t, 'T')
future_u = cesm_future(cesm_dir_u, 'U')
future_v = cesm_future(cesm_dir_v, 'V')
future_q = cesm_future(cesm_dir_q, 'Q')
future_rh = cesm_future(cesm_dir_rh, 'RELHUM')
future_z = cesm_future(cesm_dir_z, 'Z3')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the

### set-up job clusters to run in order to get more memory
#### change project key and resources depending on what you need

In [11]:
import dask 

from dask_jobqueue import PBSCluster

cluster = PBSCluster(cores=10,
                     memory="100GB",
                     project='P48500028',
                     queue='casper',
                     resource_spec='select=1:ncpus=10:mem=250GB',
                     walltime='01:00:00')

cluster.scale(jobs=15)


from dask.distributed import Client
client = Client(cluster)

In [18]:
client

0,1
Client  Scheduler: tcp://128.117.208.198:32907  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/doughert/proxy/8787/status,Cluster  Workers: 75  Cores: 150  Memory: 1.36 TiB


### concat all members/time and take 30-year mean over all members

In [19]:
%%time
hist_t_all = xr.concat(hist_t, dim='member_time').compute()
hist_t_mean = hist_t_all.mean(dim=['member_time', 'time'])

hist_u_all = xr.concat(hist_u, dim='member_time').compute()
hist_u_mean = hist_u_all.mean(dim=['member_time', 'time'])

hist_v_all = xr.concat(hist_v, dim='member_time').compute()
hist_v_mean = hist_v_all.mean(dim=['member_time', 'time'])

hist_q_all = xr.concat(hist_q, dim='member_time').compute()
hist_q_mean = hist_q_all.mean(dim=['member_time', 'time'])

hist_rh_all = xr.concat(hist_rh, dim='member_time').compute()
hist_rh_mean = hist_rh_all.mean(dim=['member_time', 'time'])

hist_z_all = xr.concat(hist_z, dim='member_time').compute()
hist_z_mean = hist_z_all.mean(dim=['member_time', 'time'])

CPU times: user 1min 14s, sys: 58.9 s, total: 2min 13s
Wall time: 4min 9s


In [20]:
%%time
future_t_all = xr.concat(future_t, dim='member_time').compute()
future_t_mean = future_t_all.mean(dim=['member_time', 'time'])

future_u_all = xr.concat(future_u, dim='member_time').compute()
future_u_mean = future_u_all.mean(dim=['member_time', 'time'])

future_v_all = xr.concat(future_v, dim='member_time').compute()
future_v_mean = future_v_all.mean(dim=['member_time', 'time'])

future_q_all = xr.concat(future_q, dim='member_time').compute()
future_q_mean = future_q_all.mean(dim=['member_time', 'time'])

future_rh_all = xr.concat(future_rh, dim='member_time').compute()
future_rh_mean = future_rh_all.mean(dim=['member_time', 'time'])

future_z_all = xr.concat(future_z, dim='member_time').compute()
future_z_mean = future_z_all.mean(dim=['member_time', 'time'])

CPU times: user 1min 10s, sys: 58 s, total: 2min 8s
Wall time: 3min 44s


### take future - historical ensemble mean difference

In [21]:
%%time
delta_t_mean = future_t_mean - hist_t_mean
delta_u_mean = future_u_mean - hist_u_mean
delta_v_mean = future_v_mean - hist_v_mean
delta_z_mean = future_z_mean - hist_z_mean
delta_q_mean = future_q_mean - hist_q_mean
delta_rh_mean = future_rh_mean - hist_rh_mean

CPU times: user 8.08 ms, sys: 0 ns, total: 8.08 ms
Wall time: 14.4 ms


### export files to your output directory

In [22]:
delta_t_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_T.nc', compute=True)
delta_u_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_U.nc', compute=True)
delta_v_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_V.nc', compute=True)
delta_z_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_Z3.nc', compute=True)
delta_q_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_Q.nc', compute=True)
delta_rh_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_RELHUM.nc', compute=True)