### Script for adding CESM-LENS2 perturbation for surface fields to ERA5 data
### date created: 28 May 2025
### author: Erin Dougherty (doughert@ucar.edu) 

In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
# import netCDF4 as nc
# from netCDF4 import Dataset, num2date
#from datetime import datetime, date, timedelta
import glob
import xarray as xr

#### set path to output directory-change this!

In [2]:
outdir = '/glade/derecho/scratch/doughert/SOARS/'

#### set bounds to apply PGW forcing–generally the outer edges of regional domain

In [3]:
llat = -20
ulat = 61
llon = -145
rlon = -15

#### define 30-year time slices for historical and future period (change month depending on event of interest)

In [4]:
def is_hist(year, month): 
    return (year >= 1991) & (year <= 2021) & (month ==9)

def is_future(year, month):
    return (year >= 2070) & (year <= 2100) & (month ==9)

### DON'T CHANGE BELOW HERE
#### surface CESM variables

In [5]:
cesm_dir= '/glade/campaign/cgd/cesm/CESM2-LE/timeseries/atm/proc/tseries/month_1/'
cesm_land= '/glade/campaign/cgd/cesm/CESM2-LE/timeseries/lnd/proc/tseries/month_1/'

### 2D variables
cesm_dir_sst = cesm_dir+'SST/'
cesm_dir_ts = cesm_dir+'TS/'
cesm_dir_tsoil = cesm_land+'TSOI/'
cesm_dir_ps = cesm_dir+'PS/'
cesm_dir_psl = cesm_dir+'PSL/'
cesm_dir_phi = cesm_dir+'PHIS/'
cesm_dir_ice = cesm_dir+'ICEFRAC/'
cesm_dir_snow = cesm_dir+'SNOWHLND/'

In [6]:
hist_dates = ['199001', '200001', '201001', '201501']
future_dates = ['206501', '207501', '208501', '209501']

#### definitions for opening CESM variables over defined time/spatial boundaries

In [7]:
def cesm_hist(var_dir, varname):
    hist_var = []

    for dir in glob.glob(var_dir+'b.e21.*'):
        for c, item in enumerate(hist_dates): 
            for name in glob.glob(dir):
                if item in name:
                    file = xr.open_mfdataset(name)[varname]
                    #change longitude from 0-360, to -180 to 180
                    file['_longitude_adjusted'] = xr.where(file['lon'] > 180, file['lon']-360, file['lon'])
                    file = (file.swap_dims({'lon': '_longitude_adjusted'}).sel(**{'_longitude_adjusted': sorted(file._longitude_adjusted)}).drop('lon'))
                    file = file.rename({'_longitude_adjusted': 'lon'})
                    # sub select 
                    file_sub = file.sel(time=is_hist(file['time.year'], file['time.month']), lat=slice(llat, ulat), lon=slice(llon, rlon))
                    hist_var.append(file_sub)

    return(hist_var)

In [8]:
def cesm_future(var_dir, varname):
    future_var = []

    for dir in glob.glob(var_dir+'b.e21.*'):
        for c, item in enumerate(future_dates): 
            for name in glob.glob(dir):
                if item in name:
                    file = xr.open_mfdataset(name)[varname]
                    #change longitude from 0-360, to -180 to 180
                    file['_longitude_adjusted'] = xr.where(file['lon'] > 180, file['lon']-360, file['lon'])
                    file = (file.swap_dims({'lon': '_longitude_adjusted'}).sel(**{'_longitude_adjusted': sorted(file._longitude_adjusted)}).drop('lon'))
                    file = file.rename({'_longitude_adjusted': 'lon'})
                    # sub select 
                    file_sub = file.sel(time=is_future(file['time.year'], file['time.month']), lat=slice(llat, ulat), lon=slice(llon, rlon))
                    future_var.append(file_sub)

    return(future_var)

### open historical and future CESM data

In [9]:
%%time
hist_sst = cesm_hist(cesm_dir_sst, 'SST')
hist_ts = cesm_hist(cesm_dir_ts, 'TS')
hist_tsoil = cesm_hist(cesm_dir_tsoil, 'TSOI')
hist_ps = cesm_hist(cesm_dir_ps, 'PS')
hist_psl = cesm_hist(cesm_dir_psl, 'PSL')
hist_phi = cesm_hist(cesm_dir_phi, 'PHIS')
hist_ice = cesm_hist(cesm_dir_ice, 'ICEFRAC')
hist_snow = cesm_hist(cesm_dir_snow, 'SNOWHLND')

CPU times: user 4min 56s, sys: 7.59 s, total: 5min 3s
Wall time: 8min 32s


In [10]:
%%time
future_sst = cesm_future(cesm_dir_sst, 'SST')
future_ts = cesm_future(cesm_dir_ts, 'TS')
future_tsoil = cesm_future(cesm_dir_tsoil, 'TSOI')
future_ps = cesm_future(cesm_dir_ps, 'PS')
future_psl = cesm_future(cesm_dir_psl, 'PSL')
future_phi = cesm_future(cesm_dir_phi, 'PHIS')
future_ice = cesm_future(cesm_dir_ice, 'ICEFRAC')
future_snow = cesm_future(cesm_dir_snow, 'SNOWHLND')

CPU times: user 5min 1s, sys: 7.27 s, total: 5min 8s
Wall time: 8min 18s


### set-up job clusters to run in order to get more memory
#### change project key and resources depending on what you need

In [11]:
import dask 

from dask_jobqueue import PBSCluster

cluster = PBSCluster(cores=10,
                     memory="100GB",
                     project='P48500028',
                     queue='casper',
                     resource_spec='select=1:ncpus=10:mem=250GB',
                     walltime='01:00:00')

cluster.scale(jobs=15)


from dask.distributed import Client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 42183 instead


In [12]:
client

0,1
Client  Scheduler: tcp://128.117.208.198:44793  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/doughert/proxy/42183/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


### concat all members/time and take 30-year mean over all members

In [25]:
%%time
hist_sst_all = xr.concat(hist_sst, dim='member_time').compute()
hist_sst_mean = hist_sst_all.mean(dim=['member_time', 'time'])

hist_ts_all = xr.concat(hist_ts, dim='member_time').compute()
hist_ts_mean = hist_ts_all.mean(dim=['member_time', 'time'])

hist_tsoil_all = xr.concat(hist_tsoil, dim='member_time').compute()
hist_tsoil_mean = hist_tsoil_all.mean(dim=['member_time', 'time'])

hist_ps_all = xr.concat(hist_ps, dim='member_time').compute()
hist_ps_mean = hist_ps_all.mean(dim=['member_time', 'time'])

hist_psl_all = xr.concat(hist_psl, dim='member_time').compute()
hist_psl_mean = hist_psl_all.mean(dim=['member_time', 'time'])

hist_phi_all = xr.concat(hist_phi, dim='member_time').compute()
hist_phi_mean = hist_phi_all.mean(dim=['member_time', 'time'])

hist_ice_all = xr.concat(hist_ice, dim='member_time').compute()
hist_ice_mean = hist_ice_all.mean(dim=['member_time', 'time'])

hist_snow_all = xr.concat(hist_snow, dim='member_time').compute()
hist_snow_mean = hist_snow_all.mean(dim=['member_time', 'time'])

CPU times: user 30.8 s, sys: 12.1 s, total: 42.9 s
Wall time: 1min 9s


In [26]:
%%time
future_sst_all = xr.concat(future_sst, dim='member_time').compute()
future_sst_mean = future_sst_all.mean(dim=['member_time', 'time'])

future_ts_all = xr.concat(future_ts, dim='member_time').compute()
future_ts_mean = future_ts_all.mean(dim=['member_time', 'time'])

future_tsoil_all = xr.concat(future_tsoil, dim='member_time').compute()
future_tsoil_mean = future_tsoil_all.mean(dim=['member_time', 'time'])

future_ps_all = xr.concat(future_ps, dim='member_time').compute()
future_ps_mean = future_ps_all.mean(dim=['member_time', 'time'])

future_psl_all = xr.concat(future_psl, dim='member_time').compute()
future_psl_mean = future_psl_all.mean(dim=['member_time', 'time'])

future_phi_all = xr.concat(future_phi, dim='member_time').compute()
future_phi_mean = future_phi_all.mean(dim=['member_time', 'time'])

future_ice_all = xr.concat(future_ice, dim='member_time').compute()
future_ice_mean = future_ice_all.mean(dim=['member_time', 'time'])

future_snow_all = xr.concat(future_snow, dim='member_time').compute()
future_snow_mean = future_snow_all.mean(dim=['member_time', 'time'])

CPU times: user 31.3 s, sys: 12.1 s, total: 43.4 s
Wall time: 1min 8s


### take future - historical ensemble mean difference

In [27]:
delta_sst_mean = future_sst_mean - hist_sst_mean
delta_ts_mean = future_ts_mean - hist_ts_mean
delta_tsoil_mean = future_tsoil_mean - hist_tsoil_mean
delta_ps_mean = future_ps_mean - hist_ps_mean
delta_psl_mean = future_psl_mean - hist_psl_mean
delta_phi_mean = future_phi_mean - hist_phi_mean
delta_ice_mean = future_ice_mean - hist_ice_mean
delta_snow_mean = future_snow_mean - hist_snow_mean

### export files to your output directory

In [29]:
delta_sst_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_SST.nc', compute=True)
delta_ts_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_TS.nc', compute=True)
delta_tsoil_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_TSOI.nc', compute=True)
delta_ps_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_PS.nc', compute=True)
delta_psl_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_PSL.nc', compute=True)
delta_phi_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_phi.nc', compute=True)
delta_ice_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_ICEFRAC.nc', compute=True)
delta_snow_mean.to_netcdf(outdir+'LENS2-Sept_2070-2100_1991-2021_delta_SNOWHLND.nc', compute=True)