In [1]:
import sys, os, pygmt, importlib, re, time, glob
mod_path = '/home/581/da1339/AFIM/src/AFIM/src'
sys.path.insert(0, mod_path)
from sea_ice_toolbox      import SeaIceToolbox, SeaIceToolboxManager
from datetime             import timedelta, date, datetime
from pathlib              import Path
from dask.distributed     import Client, LocalCluster
from dask.diagnostics     import ProgressBar
from pyproj               import CRS, Transformer
from collections          import defaultdict
from scipy.interpolate    import interp1d
from tqdm                 import tqdm        
import numpy                as np
import pandas               as pd
import xarray               as xr
import xesmf                as xe
import matplotlib.pyplot    as plt
import matplotlib.dates     as mdates
import matplotlib.animation as animation
from IPython.display      import Image, HTML, Video
import warnings
warnings.filterwarnings("ignore", message="Sending large graph of size", category=UserWarning, module="distributed.client")

In [None]:
import sys
import importlib
for mod in list(sys.modules):
    if mod.startswith("sea_ice_toolbox") or mod.startswith("sea_ice_"):
        del sys.modules[mod]
import sea_ice_plotter
import sea_ice_classification
import sea_ice_icebergs
import sea_ice_observations
import sea_ice_metrics
import sea_ice_toolbox
importlib.reload(sea_ice_plotter)
importlib.reload(sea_ice_classification)
importlib.reload(sea_ice_icebergs)
importlib.reload(sea_ice_observations)
importlib.reload(sea_ice_metrics)
importlib.reload(sea_ice_toolbox)
from sea_ice_toolbox import SeaIceToolbox, SeaIceToolboxManager

# APPENDIX SECTION: Justification for Binary-days

## Which is the best method for computing fast ice: binary-day or rolling-mean?

+ create a dictionary of circumpolar fast ice area time series for one simulation (``elps-min``) for a range of different ``binary-day`` and ``rolling-mean`` configurations
+ calculate relevant statistical skills for each configuration against fast ice area observations (``AF2020``)
+ come up with a metric (normalisation-score) for determining the best performing configuration

In [None]:
FIA_dict    = {}
#FIV_dict    = {}
vars_load   = ['aice','tarea','hi']
sim_name    = "elps-min"
dt0_str     = "1994-01-01"
dtN_str     = "1999-12-31"
P_log       = Path(Path.home(), "logs", "paper1_sandbox.log")
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
SI_tools    = SI_tool_mgr.get_toolbox(sim_name = sim_name,
                                      dt0_str  = dt0_str,
                                      dtN_str  = dtN_str,
                                      ice_speed_threshold = 5e-4)
CICE_all = SI_tools.load_cice_zarr( slice_hem = False, variables = vars_load, dt0_str = dt0_str, dtN_str = dtN_str)

In [4]:
for win in np.arange(7, 20):  # window sizes: 7 to 15 inclusive
    for cnt in np.arange(win, win - 3, -1):  # max, max-1, max-2 (e.g., 7,6,5)
        bin_name   = f"elps-min_bin-day_{cnt:d}of{win:d}"
        roll_name  = f"elps-min_bin-day_{cnt:d}of{win:d}"
        yr0_strs = [f"{y}-01-01" for y in range(1994, 2024)]
        yrN_strs = [f"{y}-12-31" for y in range(1994, 2024)]
        FI_bin_yr  = []
        FI_roll_yr = []
        for yr0_str, yrN_str in zip(yr0_strs, yrN_strs):
            yr_str = f"{yr0_str[:4]}"
            SI_tools.logger.info(f"looping year {yr_str}")
            _, FI_bin, FI_roll = SI_tools.classify_fast_ice(dt0_str               = yr0_str,
                                                                 dtN_str               = yrN_str,
                                                                 bin_win_days          = win,
                                                                 bin_min_days          = cnt,
                                                                 roll_win_days         = win,
                                                                 enable_rolling_output = True)
            FI_bin_yr.append(FI_bin)
            FI_roll_yr.append(FI_roll)
        FI_bin              = xr.concat(FI_bin_yr, dim="time").chunk(SI_tools.CICE_dict["FI_chunks"])
        FI_roll             = xr.concat(FI_roll_yr, dim="time").chunk(SI_tools.CICE_dict["FI_chunks"])
        CICE_SO             = SI_tools.slice_hemisphere(CICE_all)
        FI_bin_SO           = SI_tools.slice_hemisphere(FI_bin)
        FI_roll_SO          = SI_tools.slice_hemisphere(FI_roll)
        aice_bin            = CICE_SO['aice'].where(FI_bin_SO)
        aice_roll           = CICE_SO['aice'].where(FI_roll_SO)
        tarea_bin           = CICE_SO['tarea'].where(FI_bin_SO)
        tarea_roll          = CICE_SO['tarea'].where(FI_roll_SO)
        FIA_dict[bin_name]  = SI_tools.compute_hemisphere_ice_area(aice_bin, tarea_bin, ice_area_scale=SI_tools.FIC_scale)
        FIA_dict[roll_name] = SI_tools.compute_hemisphere_ice_area(aice_roll, tarea_roll, ice_area_scale=SI_tools.FIC_scale)

2025-09-19 09:34:12,371 - INFO - looping year 1994
2025-09-19 09:34:13,050 - INFO - Reusing regrid weights: /g/data/gv90/da1339/grids/weights/map_AOM2_u_to_t_0p25_bilinear_with_mask.nc
2025-09-19 09:34:20,287 - INFO - loading model data between 1993-12-25 and 1995-01-07
2025-09-19 09:34:20,976 - INFO - Loading Zarr groups between 1993-12-25 and 1995-01-07
2025-09-19 09:34:23,349 - INFO - computing ispd_B from uvel and vvel
2025-09-19 09:34:48,138 - INFO - creating composite sea ice speed 3D array ('ispd_BT') from:
2025-09-19 09:34:48,143 - INFO -    Ta
2025-09-19 09:34:48,153 - INFO -    Tx
2025-09-19 09:34:53,235 - INFO - Hemisphere slice applied on dims: nj.
2025-09-19 09:34:53,237 - INFO - Hemisphere slice applied on dims: nj.
2025-09-19 09:34:53,238 - INFO - FAST ICE (3D DATA ARRAY) MASK CREATION:
2025-09-19 09:34:53,239 - INFO -    1. masking sea ice concentration: 0.15 < 'aice_hem'
2025-09-19 09:34:53,240 - INFO -    2. masking sea ice speed        : 0 < 'ispd_hem' <= 5.0e-04
202

KeyboardInterrupt: 

2025-09-19 10:17:03,627 - distributed.nanny - ERROR - Worker process died unexpectedly
Process Dask Worker process (from Nanny):
Traceback (most recent call last):
2025-09-19 10:17:03,633 - distributed.nanny - ERROR - Worker process died unexpectedly
  File "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/site-packages/distributed/nanny.py", line 985, in run
    await worker.finished()
2025-09-19 10:17:03,633 - distributed.nanny - ERROR - Worker process died unexpectedly
  File "/g/data/xp65/public/apps/med_conda/envs/analysis3-25.05/lib/python3.11/site-packages/distrib

In [None]:
# AF_clim = SI_tools.load_AF2020_FIA_summary(start="1994-01-01", end="1999-12-31")
# obs_fia = SI_tools.AF2020_clim_to_model_time( FIA_dict['elps-min_bin-day_7of7'] , AF_clim["FIA_clim"].sel(region="circumpolar"))
# FIA_stats = {}
# for key in FIA_dict.keys():
#     print(key)
#     FIA_stats[key] = SI_tools.compute_skill_statistics( FIA_dict[key], obs_fia )

# APPENDIX SECTION: Justification for Ice Speed Threshold

In [None]:
# --- CONFIG (same as before) ---
SIM = "elps-min"
ARCH = Path("/home/581/da1339/AFIM_archive") / SIM / "zarr"
MET_BIN  = "FI_BT_bin_mets.zarr"
MET_ROLL = "FI_BT_roll_mets.zarr"   # or None
VAR_MAX = "FIA_Maximum Mean"
VAR_MIN = "FIA_Minimum Mean"
T_LIST = [1.0e-6, 3.0e-6, 5.0e-6, 7.0e-6, 9.0e-6,
          1.0e-5, 3.0e-5, 5.0e-5, 7.0e-5, 9.0e-5,
          1.0e-4, 3.0e-4, 5.0e-4, 7.0e-4, 9.0e-4,
          1.0e-3, 3.0e-3, 5.0e-3, 7.0e-3, 9.0e-3,
          1.0e-2, 3.0e-2, 5.0e-2, 7.0e-2, 9.0e-2]

OBS_MIN, OBS_MAX = 221, 601
BETA_MAIN = 5.0e-4
BETA_ANNOT = None
YMIN, YMAX = 60, 1000
OUTPNG = "ispd-thresh_vs_FIA-min-max_pygmt.png"
DEBUG_LIST_FIRST = True  # set False to silence var listing

# --------------------------------

def find_threshold_dirs(arch_root: Path, metric_dirname: str):
    """Return sorted list of (t_value, metric_path) that actually exist."""
    dirs = []
    for p in arch_root.glob("ispd_thresh_*"):
        m = re.search(r"ispd_thresh_([0-9.]+e[+-]?\d+)", p.name)
        if not m:
            continue
        t = float(m.group(1))
        metric_path = p / metric_dirname
        if metric_path.is_dir():
            dirs.append((t, metric_path))
    # sort by threshold
    dirs.sort(key=lambda x: x[0])
    return dirs

def read_scalar_component(metric_path: Path, varname: str) -> float:
    """
    Read a scalar metric when the layout is:
      <...>/<metric>.zarr/<varname>/
    and <varname> is a *Zarr array store* (has .zarray, .zattrs, chunk files).
    """
    comp = metric_path / varname
    if not comp.is_dir():
        raise FileNotFoundError(f"Missing variable dir: {comp}")
    arr = zarr.open(zarr.DirectoryStore(str(comp)), mode="r")   # <-- Zarr array
    return float(np.array(arr)[...].ravel()[0])

def read_scalar_any(metric_path: Path, varname: str) -> float:
    """
    First try opening the metric root as an xarray Dataset (vars with spaces work),
    else fall back to reading the component as a raw Zarr array.
    """
    try:
        ds = xr.open_zarr(metric_path, consolidated=False)
        return float(np.array(ds[varname]).ravel()[0])
    except Exception:
        return read_scalar_component(metric_path, varname)

def collect_series(arch_root: Path, metric_dirname: str):
    t_vals, v_max, v_min, fails = [], [], [], []
    for t, mpath in find_threshold_dirs(arch_root, metric_dirname):
        try:
            vmax = read_scalar_any(mpath, VAR_MAX)
            vmin = read_scalar_any(mpath, VAR_MIN)
            t_vals.append(t); v_max.append(vmax); v_min.append(vmin)
        except Exception as e:
            fails.append((t, str(e).splitlines()[0]))
    return (np.array(t_vals), np.array(v_max), np.array(v_min), fails)

# ---- gather ----
t_bin, max_bin, min_bin, fail_bin = collect_series(ARCH, MET_BIN)

t_roll = max_roll = min_roll = np.array([])
fail_roll = []
if MET_ROLL:
    t_roll, max_roll, min_roll, fail_roll = collect_series(ARCH, MET_ROLL)

if t_bin.size == 0 and t_roll.size == 0:
    msg = ["No data read. Sample failures:"]
    msg += [f"  BIN  t={t:.1e}: {r}" for t, r in fail_bin[:5]]
    msg += [f"  ROLL t={t:.1e}: {r}" for t, r in fail_roll[:5]]
    raise RuntimeError("\n".join(msg))

In [None]:
# ---- plot ----
fig = pygmt.Figure()

# x-range from whatever we actually read
x_all = np.r_[t_bin, t_roll] if t_roll.size else t_bin
xmin, xmax = float(x_all.min()), float(x_all.max())

fig.basemap(
    region=[0, 0.001, YMIN, 800],
    projection="X16c/10c",
    frame=['WSen', 'xaf+lIce Speed Threshold (m/s)', 'yaf+lFast Ice Area (@[1\\times10^3-\\mathrm{km}^2@[)'],
)

# AF2020 bounds
fig.plot(x=[xmin, xmax], y=[OBS_MIN, OBS_MIN], pen="1p,black,4_2:2p")
fig.plot(x=[xmin, xmax], y=[OBS_MAX, OBS_MAX], pen="1p,black,4_2:2p")

# binary-day
if t_bin.size:
    fig.plot(x=t_bin, y=min_bin, pen="1.5p,royalblue", label="FI_BT_bin – FIA min")
    fig.plot(x=t_bin, y=max_bin, pen="1.5p,gold",      label="FI_BT_bin – FIA max")

# rolling
if t_roll.size:
    fig.plot(x=t_roll, y=min_roll, pen="1.5p,forestgreen", label="FI_BT_roll – FIA min")
    fig.plot(x=t_roll, y=max_roll, pen="1.5p,red",         label="FI_BT_roll – FIA max")

# verticals
#if BETA_MAIN is not None and xmin < BETA_MAIN < xmax:
fig.plot(x=[4.0e-4,4.0e-4], y=[500, 700], pen="1p,gold,-")
fig.plot(x=[1.9e-4,1.9e-4], y=[100, 300], pen="1p,royalblue,-")
fig.plot(x=[3.4e-4,3.4e-4], y=[100, 300], pen="1p,forestgreen,-")
fig.plot(x=[6.5e-4,6.5e-4], y=[500, 700], pen="1p,red,-")
fig.plot(x=[5.0e-4,5.0e-4], y=[60, 800], pen="1p,grey,-")
# if BETA_ANNOT is not None and xmin < BETA_ANNOT < xmax:
#     fig.plot(x=[BETA_ANNOT, BETA_ANNOT], y=[YMIN, YMAX], pen="0.75p,gray,.-")
#     fig.text(x=BETA_ANNOT, y=OBS_MAX+25, text=f"{BETA_ANNOT:.1e}", font="10p,gray,Helvetica-Bold", angle=75)

fig.legend(position="JTL+jTL+o0.4c/0.4c", box="+gwhite+p0.5p", transparency=10)
#fig.text(x=xmin*1.05, y=YMAX-20,
#         text="FIA min/max vs Ice Speed Threshold (1994–1999, annual extrema mean)",
#         font="12p,Helvetica-Bold")
fig.show()
fig.savefig(OUTPNG, dpi=300)
print(f"Wrote {OUTPNG}")

# Parameter Modifications to Support Antarctic Fast Ice Growth

In [None]:
sim_name    = "notensnogi"
dt0_str     = "1994-01-01"
dtN_str     = "1999-12-31"
P_log       = Path(Path.home(), "logs", "paper1_sandbox2.log")
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
skip_names  = {"pack_ice.zarr","ice_diag_summary.csv","CMEMS-ORAS","AOM2-ERA5","AFIM_archive_status.json"}
for item in ["notensnogi"]:#Path(SI_tools.D_dict["AFIM_out"]).iterdir():
    # if item.name in skip_names:
    #     continue
    sim_name = item#.name
    SI_tools = SI_tool_mgr.get_toolbox(dt0_str  = dt0_str,
                                       dtN_str  = dtN_str,
                                       sim_name = sim_name)
    SI_tools.pygmt_fastice_panel(fast_ice_variable = "FIA",   
                                 ice_class         = "FI_BT", 
                                 class_type        = "bin",   
                                 sim_name          = sim_name,
                                 font_annot_pri    = "26p,Times-Roman",
                                 font_lab          = "28p,Times-Bold",
                                 overwrite_fig     = True,
                                 show_fig          = True)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FIT",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FIS",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FIMAR",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              water_clr         = "#F4A582",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FIMVR",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              water_clr         = "#F4A582",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FITAR",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              water_clr         = "#F4A582",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)
    # SI_tools.pygmt_fastice_panel(fast_ice_variable = "FITVR",   
    #                              ice_class         = "FI_BT", 
    #                              class_type        = "bin",   
    #                              sim_name          = sim_name,
    #                              font_annot_pri    = "26p,Times-Roman",
    #                              font_lab          = "28p,Times-Bold",
    #                              water_clr         = "#F4A582",
    #                              overwrite_fig     = True,
    #                              show_fig          = False)

# Sensitivity of Fast Ice Area

In [None]:
sim_name    = "__dummy__"
dt0_str     = "1994-01-01"
dtN_str     = "1999-12-31"
P_log       = Path(Path.home(), "logs", "paper1_sandbox2.log")
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
sim_names   = ["Cstar-max","gi-mid","Cstar-min"]#"elps-min",
comp_name   = "Cstar-comparison"
# "FI-heavy"         : ["elps-min","FI-heavy"]
# "GI-perturbation"  : ["gi-mid","gi-mid-gi1","gi-mid-gi2","gi-mid-gi3"]
# "GI-comparison"    : ["gi-max","gi-mid","gi-min","gi-nil","gi-nil-def","notensnogi"]
# "Cstar-comparison" : "@[\\texttt{gi-mid}\\ \\mathrm{C}^{\\ast}=20@["
# "Pstar-comparison" : "@[\\texttt{gi-mid}\\ \\mathrm{P}^{\\ast}=2.75\\times10^4@["
# "ktens-comparison" : "@[\\texttt{gi-mid}\\ k_{T}=0.2@[", ["ktens-ext","ktens-max","gi-mid","ktens-min","ktens-nil"]
# "ktens-comparison" : "@[\\texttt{gi-mid}\\ \\mathrm{e}=2.0@[", ["elps-max","gi-mid","elps-mid","elps-min","elps-ext"]
FIA_dict    = {}
FIT_dict    = {}
FITVR_dict  = {}
FITAR_dict  = {}
FIMAR_dict  = {}
for sim_name in sim_names:
    SI_tools = SI_tool_mgr.get_toolbox(sim_name = sim_name, dt0_str=dt0_str, dtN_str=dtN_str)
    FIA_dict['AF2020']  = {'FIA' : SI_tools.load_AF2020_FIA_summary()['FIA_obs']}
    P_zarr = Path(SI_tools.D_ispd_thresh, "FI_BT_bin_mets.zarr")
    SI_tools.logger.info(f"opening zarr met file: {P_zarr}")
    ds                   = xr.open_zarr(P_zarr, consolidated=False)
    FIA_dict[sim_name]   = {'FIA'   : ds['FIA']}
    FIT_dict[sim_name]   = {'FIT'   : ds['FIT']}
    FITVR_dict[sim_name] = {'FITVR' : ds['FITVR']}
    FITAR_dict[sim_name] = {'FITAR' : ds['FITAR']}
    FIMAR_dict[sim_name] = {'FIMAR' : ds['FIMAR']}

In [None]:
SI_tools.pygmt_timeseries(FIA_dict,
                          comp_name   = comp_name,
                          primary_key = "FIA",
                          climatology = True,
                          ylabel      = "Fast Ice Area (@[1\\times10^3\\ \\mathrm{km}^2@[)",
                          show_fig    = True, 
                          legend_pos  = "JTL+jTL+o0.2c+w7c")
SI_tools.pygmt_timeseries(FIT_dict, 
                          comp_name   = comp_name,
                          primary_key = "FIT",
                          climatology = True,
                          ylabel      = "Fast Ice Thickness (@[\\mathrm{m}@[)",
                          ylim        = [1,7],
                          ytick_pri   = 1,
                          ytick_sec   = 0.5,
                          show_fig    = True, 
                          legend_pos  = "JTR+jTR+o0.2c+w7c")
# SI_tools.pygmt_timeseries(FITVR_dict, 
#                           comp_name   = comp_name,
#                           primary_key = "FITVR",
#                           climatology = True,
#                           clim_smooth = 15, 
#                           ylabel      = "Fast Ice Thermodynamic Volume Rate (@[\\mathrm{m}/\\mathrm{s}@[)",
#                           ylim        = [-4,4],
#                           ytick_pri   = 1,
#                           ytick_sec   = 0.5,
#                           show_fig    = True, 
#                           legend_pos  = "JBC+jBC+o0.2c+w7c")
# SI_tools.pygmt_timeseries(FITAR_dict, 
#                           comp_name   = comp_name,
#                           primary_key = "FITAR",
#                           climatology = True,
#                           clim_smooth = 15,
#                           ylabel      = "Fast Ice Thermodynamic Area Rate (@[\\mathrm{m}/\\mathrm{s}@[)",
#                           ylim        = [-1,1],
#                           ytick_pri   = 1,
#                           ytick_sec   = 0.5,
#                           show_fig    = True, 
#                           legend_pos  = "JBC+jBC+o0.2c+w7c")
# SI_tools.pygmt_timeseries(FIMAR_dict, 
#                           comp_name   = comp_name,
#                           primary_key = "FIMAR",
#                           climatology = True,
#                           ylabel      = "Fast Ice Mechanical Area Rate (@[\\mathrm{m}/\\mathrm{s}@[)",
#                           ylim        = [-1,1],
#                           ytick_pri   = 1,
#                           ytick_sec   = 0.5,
#                           show_fig    = True, 
#                           legend_pos  = "JTC+jTC+o0.2c+w13c")

# ICE SPEED BIASES

created by running script: reG_OSISAF_ispd.py

In [None]:
def load_ispd_diffs(sim_name):
    D_search = Path(Path.home(), "seaice", "OSI_SAF", "ice_drift_455m")
    F_search = "ispd_diffs_pygmt_nn_{sim}_199*.nc".format(sim=sim_name)
    P_       = sorted(D_search.rglob(F_search))
    return xr.open_mfdataset(P_, combine="by_coords")

ISP_bias               = {}
ISP_rmse               = {}
ISP_ang                = {}
ISP_cos                = {}
ds                     = load_ispd_diffs("elps-min")
ISP_bias['elps-min']   = {'ispd_bias'  : ds['d_ispd_CICE'].mean(dim=['ny','nx']).compute()}
ISP_rmse['elps-min']   = {'ispd_rmse'  : ds['RMSE_CICE']}
ISP_ang['elps-min']    = {'ang_bias'   : ds['ANG_CICE_mean']}
ISP_cos['elps-min']    = {'cos_bias'   : ds['COS_CICE_mean']}
ISP_bias['AOM2-ERA5']  = {'ispd_bias'  : ds['d_ispd_AOM2'].mean(dim=['ny','nx']).compute()}
ISP_rmse['AOM2-ERA5']  = {'ispd_rmse'  : ds['RMSE_AOM2']}
ISP_ang['AOM2-ERA5']   = {'ang_bias'   : ds['ANG_AOM2_mean']}
ISP_cos['AOM2-ERA5']   = {'cos_bias'   : ds['COS_AOM2_mean']}
ISP_bias['ORAS']       = {'ispd_bias'  : ds['d_ispd_ORAS'].mean(dim=['ny','nx']).compute()}
ISP_rmse['ORAS']       = {'ispd_rmse'  : ds['RMSE_ORAS']}
ISP_ang['ORAS']        = {'ang_bias'   : ds['ANG_ORAS_mean']}
ISP_cos['ORAS']        = {'cos_bias'   : ds['COS_ORAS_mean']}
ds                     = load_ispd_diffs("notensnogi")
ISP_bias['notensnogi'] = {'ispd_bias'  : ds['d_ispd_CICE'].mean(dim=['ny','nx']).compute()}
ISP_rmse['notensnogi'] = {'ispd_rmse'  : ds['RMSE_CICE']}
ISP_ang['notensnogi']  = {'ang_bias'   : ds['ANG_CICE_mean']}
ISP_cos['notensnogi']  = {'cos_bias'   : ds['COS_CICE_mean']}

sim_name    = "elps-min"
dt0_str     = "1994-01-01"
dtN_str     = "1999-12-31"
P_log       = Path(Path.home(), "logs", "paper1.log")
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
sim_tools   = SI_tool_mgr.get_toolbox(sim_name, dt0_str=dt0_str, dtN_str=dtN_str)
sim_tools.pygmt_timeseries(ISP_bias,
                        comp_name    = "ISP_ispd_bias",
                        primary_key  = "ispd_bias",
                        climatology  = True,
                           clim_smooth = 15,
                        ylabel       = "Sea Ice Speed Biases (m/s)",
                        ylim         = [-.1,.1],
                        ytick_pri    = .05,
                        ytick_sec    = .05,
                           legend_pos = "JBL+jBL+o0.2c+w5c",
                          show_fig   = True)

# FAST ICE SENSITIVITY TABLE

In [5]:
ARCHIVE_DIR = Path.home() / "AFIM_archive"
dt0_str     = "1994-01-01"
dtN_str     = "1999-12-31"
P_log       = Path(Path.home(), "logs", f"paper1_sandbox_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
# Metrics variable name mapping (left = what this script looks for, right = fallback names in your zarr)
VAR_CANDIDATES = {"bias":               ["bias", "fia_bias", "FIA_bias"],
                  "corr":               ["corr", "correlation", "pearson_r"],
                  "mae":                ["mae", "mean_abs_error"],
                  "rmse":               ["rmse", "root_mean_sq_error"],
                  "fipsi":              ["fipsi", "FIPSI", "persistence_stability_index"],
                  "fipmax":             ["fipmax", "FIP_max", "persistence_max_distance", "fip_max_km"],
                  "fia_max_area":       ["fia_max_area", "FIA_max_area", "fia_area_max", "fia_max_kkm2"],
                  "fia_min_area":       ["fia_min_area", "FIA_min_area", "fia_area_min", "fia_min_kkm2"],
                  "fia_max_onset_doy":  ["fia_max_onset_doy", "FIA_max_onset_doy", "onset_max_doy", "fia_onset_max"],
                  "fia_min_onset_doy":  ["fia_min_onset_doy", "FIA_min_onset_doy", "onset_min_doy", "fia_onset_min"],}
AREA_UNITS = "m2"  # one of {"kkm2", "km2", "m2"}
# Map archive folder names -> LaTeX \gls{} macros
GLOSS_MAP = {"elps-min":  r"\gls{elpsmin}",
             "elps-ext":  r"\gls{elpsext}",
             "elps-mid":  r"\gls{elpsmid}",
             "elps-max":  r"\gls{elpsmax}",
             "FI-heavy":  r"\gls{fiheavy}",
             "ry93":      r"\gls{ry}",
             "Pstar-max": r"\gls{pstarmax}",
             "Pstar-min": r"\gls{pstarmin}",
             "Cstar-max": r"\gls{cstarmax}",
             "Cstar-min": r"\gls{cstarmin}",
             "gi-max":    r"\gls{gimax}",
             "gi-mid":    r"\gls{gimid}",
             "gi-min":    r"\gls{gimin}",
             "gi-nil":    r"\gls{ginil}",
             "gi-nil-def":r"\gls{ginildef}",
             "notensnogi":r"\gls{nofi}",
             "ktens-max": r"\gls{ktensmax}",
             "ktens-min": r"\gls{ktensmin}",
             "ktens-nil": r"\gls{ktensnil}",
             "ktens-ext": r"\gls{ktensext}"}
# Preferred row order (match paper)
PREFERRED_ORDER = ["elps-min","elps-ext","elps-mid","elps-max","FI-heavy","ry93",
                   "Pstar-max","Pstar-min","Cstar-min","Cstar-max",
                   "gi-max","gi-mid","gi-min","gi-nil","gi-nil-def","notensnogi",
                   "ktens-max","ktens-min","ktens-nil","ktens-ext"]

def _get_scalar(ds: xr.Dataset, keys: list[str], default=np.nan):
    """Return a Python float from ds[key], trying keys in order; supports DataArray or 0-d variable."""
    for k in keys:
        if k in ds:
            v = ds[k]
            try:
                # xr.DataArray(…)-> .item(); if it has dims, reduce if size==1
                return float(v.item() if v.size == 1 else v.values.squeeze().item())
            except Exception:
                pass
    return float(default)
    
def _area_to_kkm2(val: float) -> float:
    if np.isnan(val):
        return np.nan
    if AREA_UNITS == "kkm2":  # already in 1e3 km^2
        return float(val)
    if AREA_UNITS == "km2":
        return float(val) / 1e3
    if AREA_UNITS == "m2":
        return float(val) / 1e9  # m^2 -> (1e3 km^2)
    return float(val)

def format_table_latex(df: pd.DataFrame, max_width="\\linewidth", label="tab:FIA_skill_metrics_ext") -> str:
    """
    Build a LaTeX tabular string with a units row, using booktabs.
    """
    # Column order & pretty headers
    cols    = ["Experiment", "bias", "corr", "mae", "rmse", "fipsi", "fipmax"           , "fia_max_area"                 , "fia_min_area"                 , "fia_max_onset_doy"  , "fia_min_onset_doy"]
    units   = [""          , ""    , ""    ,  ""  , ""    , ""     , r"\si{\kilo\metre}", r"$1\times10^3\ \mathrm{km}^2$", r"$1\times10^3\ \mathrm{km}^2$", r"\text{onset (DOY)}", r"\text{onset (DOY)}"]
    headers = [r"\textbf{Experiment}",
               r"\textbf{\gls{bias}}",
               r"\textbf{\gls{corr}}",
               r"\textbf{\gls{mae}}",
               r"\textbf{\gls{rmse}}",
               r"\textbf{\gls{fipsi}}",
               r"\textbf{\gls{fipmax}}",
               r"\textbf{\gls{fia}$_\mathrm{max}$}",
               r"\textbf{\gls{fia}$_\mathrm{min}$}",
               r"\textbf{\gls{fia}$_\mathrm{max}$}",
               r"\textbf{\gls{fia}$_\mathrm{min}$}"]
    # Alignment: 1 'l' + 10 'r'
    align = "l" + "r"*10
    # Format numbers per your table style
    df_fmt                      = df.copy()
    df_fmt["bias"]              = df_fmt["bias"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["corr"]              = df_fmt["corr"].map(lambda x: f"{x:.3f}" if pd.notna(x) else "")
    df_fmt["mae"]               = df_fmt["mae"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["rmse"]              = df_fmt["rmse"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["fipsi"]             = df_fmt["fipsi"].map(lambda x: f"{x:.3f}" if pd.notna(x) else "")
    df_fmt["fipmax"]            = df_fmt["fipmax"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["fia_max_area"]      = df_fmt["fia_max_area"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["fia_min_area"]      = df_fmt["fia_min_area"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["fia_max_onset_doy"] = df_fmt["fia_max_onset_doy"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    df_fmt["fia_min_onset_doy"] = df_fmt["fia_min_onset_doy"].map(lambda x: f"{x:.1f}" if pd.notna(x) else "")
    # Build LaTeX lines
    header_line = " & ".join(headers) + r" \\"
    units_line  = " & ".join(units)   + r" \\"
    rows = []
    for _, row in df_fmt[cols].iterrows():
        rows.append(" & ".join(str(v) for v in row.values) + r" \\")
    body = "\n".join(rows)
    latex = fr"""\begin{adjustbox}{{max width={max_width}}}
\begin{tabular}{{{align}}}
\toprule
{header_line}
{units_line}
\midrule
{body}
\bottomrule
\end{tabular}
\end{adjustbox}"""
    return latex


In [None]:
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
SI_tools = SI_tool_mgr.get_toolbox(dt0_str="1994-01-01", dtN_str="1999-12-31", sim_name="elps-min")
FI_bin = SI_tools.load_classified_ice(ice_type="FI")
FI_bin

2025-09-19 16:17:33,166 - INFO - log file connected: /home/581/da1339/logs/paper1_sandbox_20250919_152914.log
2025-09-19 16:17:33,174 - INFO - Dask Client Connected
  Dashboard      : /proxy/8787/status
  Threads        : 0
  Threads/Worker : []
  Total Memory   : 0.00 GB

2025-09-19 16:17:33,175 - INFO - hemisphere initialised: SH
2025-09-19 16:17:33,176 - INFO - reading /g/data/gv90/da1339/afim_output/elps-min/ice_diag.d to construct /g/data/gv90/da1339/afim_output/elps-min/ice_in_AFIM_subset_elps-min.json
2025-09-19 16:17:33,178 - INFO -  self.ice_class defined as FI_BT
2025-09-19 16:17:33,180 - INFO - --- SeaIceToolbox Summary ---
2025-09-19 16:17:33,181 - INFO - Simulation Name     : elps-min
2025-09-19 16:17:33,182 - INFO - Analysis Start Date : 1994-01-01
2025-09-19 16:17:33,183 - INFO - Analysis End Date   : 1999-12-31
2025-09-19 16:17:33,184 - INFO - Speed Threshold     : 5.0e-04 m/s
2025-09-19 16:17:33,185 - INFO - Speed Type(s)       : BT
2025-09-19 16:17:33,186 - INFO - Ice

In [None]:
# Discover simulation names (folders) and exclude non-runs
EXCLUDE   = {"AOM2-ERA5"}  # add "*.json", "*.csv" are excluded automatically below
sim_names = sorted([p.name for p in ARCHIVE_DIR.iterdir() if p.is_dir() and (p.name not in EXCLUDE) and not p.name.endswith(".zarr")])
# Instantiate the toolbox manager (creates a toolbox per sim inside the loop)
SI_tool_mgr = SeaIceToolboxManager(P_log=P_log)
rows        = []
skipped     = []
for sim_name in sim_names:
    # Build toolbox for this sim & window
    SI_tools = SI_tool_mgr.get_toolbox(dt0_str=dt0_str, dtN_str=dtN_str, sim_name=sim_name)
    # Load the precomputed metrics Zarr (defaults are fine unless you need specific grid/type)
    ds_mets = SI_tools.load_computed_metrics(ice_type="FI_BT")
    # Extract scalars (robust to different var names)
    bias              = ds_mets["Bias"].values
    corr              = ds_mets["Corr"].values
    mae               = ds_mets["MAE"].values
    rmse              = ds_mets["RMSE"].values
    print(ds_mets)
    fipsi             = ds_mets["persistence_stability_index"].values
    fipmax            = ds_mets["persistence_max_distance"].values
    fia_max_area      = _area_to_kkm2(_get_scalar(ds_mets, VAR_CANDIDATES["fia_max_area"]))
    fia_min_area      = _area_to_kkm2(_get_scalar(ds_mets, VAR_CANDIDATES["fia_min_area"]))
    fia_max_onset_doy = _get_scalar(ds_mets, VAR_CANDIDATES["fia_max_onset_doy"])
    fia_min_onset_doy = _get_scalar(ds_mets, VAR_CANDIDATES["fia_min_onset_doy"])
    # Experiment label as \gls macro if we know it; else raw folder name
    exp_label = GLOSS_MAP.get(sim_name, sim_name)
    rows.append({"Experiment"        : exp_label,
                 "sim_name"          : sim_name,  # keep raw for ordering/sorting
                 "bias"              : bias,
                 "corr"              : corr,
                 "mae"               : mae,
                 "rmse"              : rmse,
                 "fipsi"             : fipsi,
                 "fipmax"            : fipmax,
                 "fia_max_area"      : fia_max_area,
                 "fia_min_area"      : fia_min_area,
                 "fia_max_onset_doy" : fia_max_onset_doy,
                 "fia_min_onset_doy" : fia_min_onset_doy})
df = pd.DataFrame(rows)

2025-09-19 16:05:55,688 - INFO - log file connected: /home/581/da1339/logs/paper1_sandbox_20250919_152914.log
2025-09-19 16:05:55,695 - INFO - Dask Client Connected
  Dashboard      : /proxy/8787/status
  Threads        : 0
  Threads/Worker : []
  Total Memory   : 0.00 GB

2025-09-19 16:05:55,696 - INFO - hemisphere initialised: SH
2025-09-19 16:05:55,698 - INFO - reading /g/data/gv90/da1339/afim_output/Cstar-max/ice_diag.d to construct /g/data/gv90/da1339/afim_output/Cstar-max/ice_in_AFIM_subset_Cstar-max.json
2025-09-19 16:05:55,700 - INFO -  self.ice_class defined as FI_BT
2025-09-19 16:05:55,702 - INFO - --- SeaIceToolbox Summary ---
2025-09-19 16:05:55,703 - INFO - Simulation Name     : Cstar-max
2025-09-19 16:05:55,704 - INFO - Analysis Start Date : 1994-01-01
2025-09-19 16:05:55,705 - INFO - Analysis End Date   : 1999-12-31
2025-09-19 16:05:55,707 - INFO - Speed Threshold     : 5.0e-04 m/s
2025-09-19 16:05:55,708 - INFO - Speed Type(s)       : BT
2025-09-19 16:05:55,709 - INFO -

<xarray.Dataset> Size: 53MB
Dimensions:                    (time: 2191, nj: 540, ni: 1440)
Coordinates:
    TLAT                       (nj, ni) float32 3MB ...
    TLON                       (nj, ni) float32 3MB ...
    ULAT                       (nj, ni) float32 3MB ...
    ULON                       (nj, ni) float32 3MB ...
    lat                        (nj, ni) float64 6MB ...
    lon                        (nj, ni) float64 6MB ...
  * time                       (time) datetime64[ns] 18kB 1994-01-01 ... 1999...
Dimensions without coordinates: nj, ni
Data variables: (12/43)
    Bias                       float64 8B -175.9
    Corr                       float64 8B 0.89
    FIA                        (time) float32 9kB ...
    FIA_DOY Max Mean           float64 8B ...
    FIA_DOY Max Std            float64 8B ...
    FIA_DOY Min Mean           float64 8B ...
    ...                         ...
    MAE                        float64 8B 175.9
    RMSE                       float64 8B 18

KeyError: "No variable named 'persistence_stability_index'. Did you mean one of ('persistence_max_distance', 'persistence_mean_distance')?"

In [14]:
df

Unnamed: 0,Experiment,sim_name,bias,corr,mae,rmse,fipsi,fipmax,fia_max_area,fia_min_area,fia_max_onset_doy,fia_min_onset_doy
0,\gls{cstarmax},Cstar-max,,,,,,222.8778,,,,
1,\gls{cstarmin},Cstar-min,,,,,,222.8778,,,,
2,\gls{fiheavy},FI-heavy,,,,,,223.24703,,,,
3,\gls{pstarmax},Pstar-max,,,,,,222.8778,,,,
4,\gls{pstarmin},Pstar-min,,,,,,222.8778,,,,
5,\gls{elpsext},elps-ext,,,,,,222.8778,,,,
6,\gls{elpsmax},elps-max,,,,,,222.8778,,,,
7,\gls{elpsmid},elps-mid,,,,,,222.8778,,,,
8,\gls{elpsmin},elps-min,,,,,,222.8778,,,,
9,\gls{gimax},gi-max,,,,,,179.901632,,,,


In [None]:
# Order rows to match your manuscript, with any extras appended alphabetically
order     = [s for s in PREFERRED_ORDER if s in df["sim_name"].values]
extras    = sorted(set(df["sim_name"].values) - set(order))
order_idx = order + extras
df        = df.set_index("sim_name").loc[order_idx].reset_index(drop=True)
# Reorder visible columns (drop sim_name)
df = df[["Experiment","bias","corr","mae","rmse","fipsi","fipmax","fia_max_area","fia_min_area","fia_max_onset_doy","fia_min_onset_doy"]]
# Show a quick preview
print(df)
latex_tabular = format_table_latex(df, max_width="\\linewidth", label="tab:FIA_skill_metrics_ext")
print(latex_tabular)
# save to tex file
OUT_TEX = Path.home() / "paper1_FIA_skill_metrics_table.tex"
OUT_TEX.write_text(latex_tabular)
print(OUT_TEX)

2025-09-19 15:29:20,977 - INFO - log file connected: /home/581/da1339/logs/paper1_sandbox_20250919_152914.log
2025-09-19 15:29:20,986 - INFO - Dask Client Connected
  Dashboard      : /proxy/8787/status
  Threads        : 0
  Threads/Worker : []
  Total Memory   : 0.00 GB

2025-09-19 15:29:20,988 - INFO - hemisphere initialised: SH
2025-09-19 15:29:20,989 - INFO - reading /g/data/gv90/da1339/afim_output/Cstar-max/ice_diag.d to construct /g/data/gv90/da1339/afim_output/Cstar-max/ice_in_AFIM_subset_Cstar-max.json
2025-09-19 15:29:21,082 - INFO -  self.ice_class defined as FI_BT
2025-09-19 15:29:21,083 - INFO - --- SeaIceToolbox Summary ---
2025-09-19 15:29:21,084 - INFO - Simulation Name     : Cstar-max
2025-09-19 15:29:21,085 - INFO - Analysis Start Date : 1994-01-01
2025-09-19 15:29:21,086 - INFO - Analysis End Date   : 1999-12-31
2025-09-19 15:29:21,087 - INFO - Speed Threshold     : 5.0e-04 m/s
2025-09-19 15:29:21,088 - INFO - Speed Type(s)       : BT
2025-09-19 15:29:21,089 - INFO -

KeyError: 'sim_name'