In [1]:
%load_ext autoreload
%autoreload 2

%aimport xclimate

In [6]:
"""Create plots using xclimate utilities."""

from dataclasses import dataclass
from pathlib import Path
from datetime import datetime

import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

import xclimate as xclim

In [7]:
# CLIENT_CLUSTER = xclim.create_dask_cluster(
#     account="UWAS0155",
#     nworkers=10,
#     nmem="5GB",
#     walltime="00:45:00",
# )

In [8]:
@dataclass
class Variable:
    """Variable dataclass."""
    name: str
    long_name: str
    gcomp: str
    units: str
    derived: str | None

# Metadata tag for figure attribution
FNAME = "plots-for-group-meeting.ipynb"
NOW = datetime.now().strftime("%Y-%m-%d")
TAG = f"{FNAME} {NOW}"

PATH_LE = Path("/glade/campaign/collections/gdex/data/d651056/CESM2-LE")

START_TMEAN = "1995-01"
END_TMEAN = "2014-12"
TIME_SLICE = slice(START_TMEAN,END_TMEAN)
YEAR_SLICE = slice(int(START_TMEAN[:4]), int(END_TMEAN[:4]))

VARIABLES = {
    # "ALBEDO": Variable("ALBEDO", "surface albedo", "lnd", "1", "(FLDS + FSDS) / (FIRE + FSR)"),
    # "IVT": Variable("IVT", "integrated vapor transport", "atm", "kg/m/s", "sqrt(uIVT**2 + vIVT**2)")
    # "TOTVEGC": Variable("TOTVEGC", "total vegetation carbon, excluding cpool", "lnd", "gC/m2", None), # pool so sum instead of average
    "EFLX_LH_TOT": Variable("EFLX_LH_TOT", "total latent heat flux", "lnd", "W/m2", None),
    "FSH": Variable("FSH", "sensible heat flux", "lnd", "W/m2", None),
    "FCEV": Variable("FCEV", "canopy evaporation", "lnd", "W/m2", None),
    "FCTR": Variable("FCTR", "canopy transpiration", "lnd", "W/m2", None),
    "FGEV": Variable("FGEV", "ground evaporation", "lnd", "W/m2", None),
    "TSA": Variable("TSA", "near surface air temperature", "lnd", "K", None),
    "TLAI": Variable("TLAI", "total leaf area index", "lnd", "m2/m2", None),
    "GPP": Variable("GPP", "gross primary productivity", "lnd", "gC/m2/s", None),
    "QOVER": Variable("QOVER", "surface runoff", "lnd", "mm/s", None),
    "QRUNOFF": Variable("QRUNOFF", "total runoff", "lnd", "mm/s", None),
    "VPD_CAN": Variable("VPD_CAN", "canopy vapor pressure deficit", "lnd", "Pa", None),
    "WIND": Variable("WIND", "wind speed", "lnd", "m/s", None),
    "CLDLOW": Variable("CLDLOW", "low cloud fraction", "atm", "1", None),
    "CLDTOT": Variable("CLDTOT", "total cloud fraction", "atm", "1", None),
    "PRECT": Variable("PRECT", "total precipitation", "atm", "m/s", None),
    "RAIN_FROM_ATM": Variable("RAIN_FROM_ATM", "rain from atmosphere", "lnd", "mm/s", None),
    "FLNT": Variable("FLNT", "net longwave flux at top of model", "atm", "W/m2", None),
    "FSNT": Variable("FSNT", "net shortwave flux at top of model", "atm", "W/m2", None),
    "uIVT": Variable("uIVT", "zonal integrated vapor transport", "atm", "kg/m/s", None),
    "vIVT": Variable("vIVT", "meridional integrated vapor transport", "atm", "kg/m/s", None),
}

In [None]:
print("Loading grids and gridcell area fields...")
A_FHIST = xr.open_dataset(
    "/glade/campaign/collections/cmip/CMIP6/timeseries-cmip6/" \
    "f.e21.FHIST_BGC.f19_f19_mg17.CMIP6-AMIP-2deg.001/atm/proc/tseries/month_1/" \
    "f.e21.FHIST_BGC.f19_f19_mg17.CMIP6-AMIP-2deg.001.cam.h0.AREA.200001-201412.nc",
    decode_timedelta=False)["AREA"].isel(time=0).fillna(0) / 1e6
A_FHIST.attrs["units"] = "km^2"
LND_GRID_FHIST = xclim.load_coupled_fhist_ppe("EFLX_LH_TOT", "lnd", "month_1")[["area", "landfrac"]].isel(member=0).fillna(0)
LND_GRID_FHIST = LND_GRID_FHIST.reindex_like(A_FHIST, method="nearest", tolerance=1e-3)
LA_FHIST = LND_GRID_FHIST.area * LND_GRID_FHIST.landfrac

A_LE = xclim.load_cesm2le("AREA", "atm", "month_1", "h0", keep_var_only=True)["AREA"].isel(member=0, time=0).fillna(0) / 1e6
A_LE.attrs["units"] = "km^2"
LND_GRID_LE = xclim.load_cesm2le("EFLX_LH_TOT", "lnd", "month_1", "h0")[["area", "landfrac"]].isel(member=0, time=0).fillna(0)
LND_GRID_LE = LND_GRID_LE.reindex_like(A_LE, method="nearest", tolerance=1e-3)
LA_LE = LND_GRID_LE.area * LND_GRID_LE.landfrac
print("Done loading grids and gridcell area fields.")

Loading grids and gridcell area fields...


In [6]:
AREA = [A_FHIST, A_LE]
LA = [LA_FHIST, LA_LE]

In [11]:
variable = VARIABLES["EFLX_LH_TOT"]

name = variable.name
gcomp = variable.gcomp
units = variable.units

fhist = xclim.load_coupled_fhist_ppe(name, gcomp, "month_1", "h0", keep_var_only=True, chunk=True)[name]
fhist = fhist.reindex_like(A_FHIST, method="nearest", tolerance=1e-3)

In [7]:
variable = VARIABLES["EFLX_LH_TOT"]

name = variable.name
gcomp = variable.gcomp
units = variable.units

print(name)

das = []
das_violin = []
das_labels = []

fhist = xclim.load_coupled_fhist_ppe(name, gcomp, "month_1", "h0", keep_var_only=True, chunk=True)[name]
fhist = fhist.reindex_like(A_FHIST, method="nearest", tolerance=1e-3)
das.append(fhist)
das_labels.append("FHIST PPE")

le = None
le_tag = ""
if (PATH_LE / f"{gcomp}/proc/tseries/month_1").exists():
    le_tag = "-CESM2LE"
    le = xclim.load_cesm2le(name, gcomp, "month_1", "h0", keep_var_only=True, chunk={"time": -1})[name]
    le = le.sel(time=slice("1950-01", None)).reindex_like(A_LE, method="nearest", tolerance=1e-3)
    das.append(le)
    das_labels.append("CESM2 LE")

for i, (da, la) in enumerate(zip(das, LA)):
    das[i] = das[i].weighted(la).mean(dim=["lat", "lon"]).groupby("time.year").mean()
    da_violin = das[i].sel(year=slice(YEAR_SLICE.start, YEAR_SLICE.stop)).mean(dim="year")
    das_violin.append(da_violin)

EFLX_LH_TOT


In [8]:
# ILAMB
ILAMB_V = "hfls"
indir = Path("/glade/work/bbuchovecky/CPL_PPE_CO2/select_parameters/ILAMB_data/processed")
ilamb_gm = {}
for f in indir.glob("global_mean/*.nc"):
    key = f.stem.split("_")[3]
    ilamb_gm[key] = xr.open_dataset(f)[ILAMB_V.lower()+"_"+key].sel(year=slice(1950, 2014))



# CLM6 PPE
# Load parameter settings key
clm6_key = pd.read_csv("/glade/campaign/cgd/tss/projects/PPE/ctsm6_oaat/ctsm6_oaat_key.csv")

# Load postprocessed timeseries dataset
in_file = Path("/glade/campaign/cgd/tss/projects/PPE/ctsm6_oaat/postp/ctsm6_oaat_postp_timeseries_1901-2023.nc")
clm6_ts = xr.open_dataset(in_file).rename({"ens": "member"}).sel(year=slice(1950, 2014))
# Append key to ds
clm6_ts = clm6_ts.assign_coords(
    key=("member", clm6_key["key"].values),
    param=("member", clm6_key["param"].values),
    minmax=("member", clm6_key["minmax"].values)
)
clm6_ts = clm6_ts.set_index(member=["param", "minmax", "key"], append=True)
clm6_ts = clm6_ts[[vv for vv in clm6_ts.data_vars if str(vv).startswith(name)]]

In [9]:
fig, ax = xclim.plot.plot_ensemble_line(
    das=das,
    das_violin=das_violin,
    das_labels=das_labels,
    ylabel=f"{name} [{units}]",
    plot_dim="year",
    xlabel="Year",
    highlight_member=[0, None],
    violin_xrange=(YEAR_SLICE.start, YEAR_SLICE.stop),
    violin_settings={"x": YEAR_SLICE.stop + 2},
    add_legend=False
)



# Plot the timeseries for CLM6 PPE
for i, e in enumerate(clm6_ts.member):
    label = None
    if i == 0:
        label = "CLM6 PPE"
    clm6_ts[f"{name}_global_mean"].sel(member=e).plot(ax=ax, color="silver", alpha=0.2, lw=1, label=label, _labels=False, zorder=0)

# Plot the violin plot for CLM6 PPE
vp = xclim.plot._create_violin_plot(
    ax,
    clm6_ts[f"{name}_global_mean"].sel(year=YEAR_SLICE).mean(dim="year"),
    YEAR_SLICE.stop + 2,
    "silver",
    "silver",
)

# Highlight the CLM6 PPE default
clm6_ts[f"{name}_global_mean"].sel(param="default", minmax="max").plot(ax=ax, color="k", lw=1, ls="-", alpha=0.7, label="CLM6 PPE 0", _labels=False, zorder=1)
ax.scatter(
    YEAR_SLICE.stop + 5,
    clm6_ts[f"{name}_global_mean"].sel(param="default", minmax="max").sel(year=YEAR_SLICE).mean(dim="year"),
    s=15,
    marker="o",
    color="k",
)



# Plot the ILAMB datasets
cs_ilamb = list(mcolors.TABLEAU_COLORS.keys())[2:]
for i, (key, da) in enumerate(ilamb_gm.items()):
    ax.plot(da.year, da, c=cs_ilamb[i], ls="-", lw=2.5, label=key, zorder=100)
    ax.scatter(YEAR_SLICE.stop + 5, da.mean(dim="year"), marker="o", s=15, edgecolor=cs_ilamb[i], facecolor="none")



# Add a legend
ax.legend(loc="lower left", ncols=2, fontsize=8)
ymin, ymax = ax.get_ylim()
final_ylim = (ymin, ymax + (ymax - ymin) / 15)


# Save figure
fig.suptitle(f"Global Land Mean, Annual Mean Timeseries, {name} [{units}]", x=0.065, y=0.96, ha="left")
fig.text(y=1.025, x=0.975, s=TAG, fontsize=6, ha="right", va="center")
plt.tight_layout()
plt.savefig(f"a.global.lnd.{YEAR_SLICE.start}-{YEAR_SLICE.stop}.FHIST{le_tag}-CLM6IHIST-ILAMB.{name}.png", dpi=300, bbox_inches="tight")
plt.close(fig)

This may cause some slowdown.
Consider scattering data ahead of time and using futures.


In [4]:
l1 = [0]
l1 += [None]

In [5]:
l1

[0, None]

In [17]:
variable = VARIABLES["TSA"]

name = variable.name
gcomp = variable.gcomp
units = variable.units

print(name)

das = []
das_violin = []
das_labels = []

fhist = xclim.load_coupled_fhist_ppe(name, gcomp, "month_1", "h0", keep_var_only=True, chunk=True)[name]
fhist = fhist.reindex_like(A_FHIST, method="nearest", tolerance=1e-3)
das.append(fhist)
das_labels.append("FHIST PPE")

le = None
le_tag = ""
if (PATH_LE / f"{gcomp}/proc/tseries/month_1").exists():
    le_tag = "-CESM2LE"
    le = xclim.load_cesm2le(name, gcomp, "month_1", "h0", keep_var_only=True, chunk={"time": -1})[name]
    le = le.sel(time=slice("1950-01", None)).reindex_like(A_LE, method="nearest", tolerance=1e-3)
    das.append(le)
    das_labels.append("CESM2 LE")

for i, (da, la) in enumerate(zip(das, LA)):
    das[i] = das[i].weighted(la).mean(dim=["lat", "lon"]).groupby("time.year").mean()
    da_violin = das[i].sel(year=slice(YEAR_SLICE.start, YEAR_SLICE.stop)).mean(dim="year")
    das_violin.append(da_violin)


# # CLM6 PPE
# # Load parameter settings key
# clm6_key = pd.read_csv("/glade/campaign/cgd/tss/projects/PPE/ctsm6_oaat/ctsm6_oaat_key.csv")

# # Load postprocessed timeseries dataset
# in_file = Path("/glade/campaign/cgd/tss/projects/PPE/ctsm6_oaat/postp/ctsm6_oaat_postp_timeseries_1901-2023.nc")
# clm6_ts = xr.open_dataset(in_file).rename({"ens": "member"}).sel(year=slice(1950, 2014))
# # Append key to ds
# clm6_ts = clm6_ts.assign_coords(
#     key=("member", clm6_key["key"].values),
#     param=("member", clm6_key["param"].values),
#     minmax=("member", clm6_key["minmax"].values)
# )
# clm6_ts = clm6_ts.set_index(member=["param", "minmax", "key"], append=True)
# clm6_ts = clm6_ts[[vv for vv in clm6_ts.data_vars if str(vv).startswith(name)]]


fig, ax = xclim.plot.plot_ensemble_line(
    das=das,
    das_violin=das_violin,
    das_labels=das_labels,
    ylabel=f"{name} [{units}]",
    plot_dim="year",
    xlabel="Year",
    highlight_member=[0, None],
    violin_xrange=(YEAR_SLICE.start, YEAR_SLICE.stop),
    violin_settings={"x": YEAR_SLICE.stop + 2},
    add_legend=True
)


# # Plot the timeseries for CLM6 PPE
# for i, e in enumerate(clm6_ts.member):
#     label = None
#     if i == 0:
#         label = "CLM6 PPE"
#     clm6_ts[f"{name}_global_mean"].sel(member=e).plot(ax=ax, color="silver", alpha=0.2, lw=1, label=label, _labels=False, zorder=0)

# # Plot the violin plot for CLM6 PPE
# vp = xclim.plot._create_violin_plot(
#     ax,
#     clm6_ts[f"{name}_global_mean"].sel(year=YEAR_SLICE).mean(dim="year"),
#     YEAR_SLICE.stop + 2,
#     "silver",
#     "silver",
# )

# # Highlight the CLM6 PPE default
# clm6_ts[f"{name}_global_mean"].sel(param="default", minmax="max").plot(ax=ax, color="k", lw=1, ls="-", alpha=0.7, label="CLM6 PPE 0", _labels=False, zorder=1)
# ax.scatter(
#     YEAR_SLICE.stop + 5,
#     clm6_ts[f"{name}_global_mean"].sel(param="default", minmax="max").sel(year=YEAR_SLICE).mean(dim="year"),
#     s=15,
#     marker="o",
#     color="k",
# )


# # Add a legend
# ax.legend(loc="lower left", ncols=2, fontsize=8)
# ymin, ymax = ax.get_ylim()
# final_ylim = (ymin, ymax + (ymax - ymin) / 15)


fig.suptitle(f"Global Land Mean, Annual Mean Timeseries, {name} [{units}]", x=0.065, y=0.96, ha="left")
fig.text(y=1.025, x=0.975, s=TAG, fontsize=6, ha="right", va="center")
plt.tight_layout()
plt.savefig(f"a.global.lnd.{YEAR_SLICE.start}-{YEAR_SLICE.stop}.FHIST{le_tag}.{name}.png", dpi=300, bbox_inches="tight")
plt.close(fig)

TSA


This may cause some slowdown.
Consider scattering data ahead of time and using futures.


In [9]:
if CLIENT_CLUSTER is not None:
    xclim.close_dask_cluster(CLIENT_CLUSTER)