## Generate subset of CESM daily output

This notebook creates a small, location-bounded subset of CESM daily output for two experiments and writes them to NetCDF:

- Defines variables to keep: PS, TREFHT, QREFHT, T, Z3, Q.
- Selects four regions using latitude/longitude bounds (russia, spain, australia, canada).
- Uses a preprocess step (lat_lon_coord_slice) with isca_tools.cesm.load_dataset to subset on load.
- Loads full data for each experiment:
  - Pre-industrial: e.e20.E1850TEST.f09_g17.daily_output → saved as pre_industrial.nd2
  - 2×CO₂: e.e20.E1850TEST.f09_g17.co2_2x_daily_output → saved as co2_2x.nd2


In [None]:
import pandas as pd
import xarray as xr
import numpy as np
import sys

sys.path.append('/home/users/jamd1/Isca/')
from isca_tools import cesm
from isca_tools.utils import area_weighting, lat_lon_rolling, print_ds_var_list, set_attrs
from isca_tools.papers.byrne_2021 import get_quant_ind
from isca_tools.utils.moist_physics import moist_static_energy, sphum_sat
from isca_tools.utils.constants import g, c_p, lapse_dry
from isca_tools.convection.base import lcl_metpy
from isca_tools.utils.ds_slicing import lat_lon_coord_slice
import isca_tools
import cftime
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter
import matplotlib.patches as patches
from tqdm.notebook import tqdm
import matplotlib
from matplotlib.dates import ConciseDateConverter
matplotlib.units.registry[cftime.DatetimeNoLeap] = ConciseDateConverter()  # to allow plotting time as x axis
# plt.style.use('/home/users/jamd1/Isca/jobs/publish.mplstyle')
import os
from numba import jit
import metpy
from geocat.comp.interpolation import interp_hybrid_to_pressure
print(os.getcwd())

In [1]:
var_keep = ['PS', 'TREFHT', 'QREFHT', 'T', 'Z3', 'Q']
lat_use = [59.84,  40.05, -24.03, 55]
lon_use = [50, 355, 133.75, 290]
loc_names = ['russia', 'spain', 'australia', 'canada']

In [None]:
def preprocess(ds, lat_list=lat_use, lon_list=lon_use):
    ds = lat_lon_coord_slice(ds, lat_list, lon_list)
    return ds

In [None]:
exp_pi = 'e.e20.E1850TEST.f09_g17.daily_output'
ds_pi = cesm.load_dataset(exp_pi, hist_file=1, preprocess=preprocess)[var_keep]
ds_pi = ds_pi.rename({'location': 'loc'})
ds_pi = ds_pi.assign_coords(loc=loc_names)

In [None]:
ds_pi = ds_pi.load()     # takes about 3 hours 30 minutes, I think it is probably just as quick to load in all lat, lon then save subset after loading

In [None]:
ds_pi.to_netcdf('/home/users/jamd1/Isca/jobs/cesm/raw_subset/pre_industrial.nd2')

In [None]:
exp_warm = 'e.e20.E1850TEST.f09_g17.co2_2x_daily_output'
ds_warm = cesm.load_dataset(exp_warm, hist_file=1, preprocess=preprocess)[var_keep]
ds_warm = ds_warm.rename({'location': 'loc'})
ds_warm = ds_warm.assign_coords(loc=loc_names)

In [None]:
ds_warm = ds_warm.load()     # takes about 3 hours 30 minutes, I think it is probably just as quick to load in all lat, lon then save subset after loading

In [None]:
ds_warm.to_netcdf('/home/users/jamd1/Isca/jobs/cesm/raw_subset/co2_2x.nd2')