In [7]:
import os, sys, glob, subprocess, shlex
from pathlib import Path

In [17]:
INIT_CANDIDATES = [
    "/etc/profile.d/modules.sh",
    "/usr/share/Modules/init/bash",
    "/etc/profile.d/lmod.sh",
]
# Load NCO module into Python process' env
def import_module_env(mod="nco"):
    last_err = None
    for init in INIT_CANDIDATES:
        print(init)
        try:
            # Start a login-like shell, source modules init, load module, print env as NUL-separated pairs
            out = subprocess.check_output(
                ["bash", "-lc", f"source {init} 2>/dev/null || true; "
                                f"type module >/dev/null 2>&1 || return 127; "
                                f"module load {mod}; env -0"],
                stderr=subprocess.STDOUT
            )
            # Parse env -0
            for entry in out.split(b"\x00"):
                if not entry:
                    continue
                k, _, v = entry.partition(b"=")
                os.environ[k.decode()] = v.decode()
            return
        except subprocess.CalledProcessError as e:
            last_err = e
            continue
    raise RuntimeError("Failed to import module environment; adjust INIT_CANDIDATES") from last_err

# Use it once:
import_module_env("nco")

# Now plain calls work:
import subprocess
subprocess.run(["which", "ncrcat"], check=True)
subprocess.run(["ncrcat", "--version"], check=True)

/etc/profile.d/modules.sh
/data/apps/extern/nco/5.0.8/bin/ncrcat


NCO netCDF Operators version 5.0.8-alpha01 "QuickBlade" built by apate168 on login02 at May 20 2022 16:59:02
ncrcat version 5.0.8-alpha01


CompletedProcess(args=['ncrcat', '--version'], returncode=0)

In [18]:
# --------------------------
# CONFIGURE PATHS
# --------------------------
HOURLY_DIR = Path("/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly")      # directory with hourly files
QUARTER_DIR = Path("/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_qtrmean")
QUARTER_DIR.mkdir(parents=True, exist_ok=True)    # output: quarterly means

INIT = INIT_CANDIDATES[0]  # for subprocess calls

YEARS = range(1979, 2025)
QUARTERS = [
    ("01", "02", "03"),  # Q1
    ("04", "05", "06"),  # Q2
    ("07", "08", "09"),  # Q3
    ("10", "11", "12"),  # Q4
]

# Hourly filename pattern:
# One file per month:    YYYY_MM.nc
PATTERN_PER_MONTH = "{y}_{m}.nc"   # for one file per month


# --------------------------
# CONFIGURE ENVIRONMENT
# --------------------------
os.environ.setdefault("HDF5_USE_FILE_LOCKING", "FALSE")
os.environ["NETCDF_HDF5_FILE_LOCKING"] = "FALSE"

# --------------------------
# HELPERS
# --------------------------
def run(cmd):
    print("$", cmd)
    subprocess.run(cmd, shell=True, check=True)

def time_is_unlimited(nc_path: Path) -> bool:
    """Return True if 'time' is UNLIMITED in the file."""
    chk = f'ncks -m "{nc_path}" | grep -q "time = UNLIMITED"'
    res = subprocess.run(["bash","-lc", f"source {INIT} 2>/dev/null || true; module load nco; {chk}"])
    return res.returncode == 0

def make_time_unlimited(nc_path: Path):
    """Make the time dimension unlimited in-place."""
    tmp = nc_path.with_suffix(nc_path.suffix + ".tmp")
    run(f'ncks -O -4 -L 1 --mk_rec_dmn time "{nc_path}" "{tmp}" && mv "{tmp}" "{nc_path}"')

In [13]:
for y in YEARS:
    y = int(y)
    for m in MONTHS:
        # Find inputs for this month
        monthly_glob = HOURLY_DIR / PATTERN_PER_MONTH.format(y=y, m=m)
        files = sorted(glob.glob(str(monthly_glob)))
        if not files:
            print(f"[skip] No hourly files for {y}-{m}")
            continue
        # --------------------------
        # 1) HOURLY → MONTHLY (hourly-weighted)
        #    - If multiple hourly files per month exist, we first ncrcat them.
        #    - Then ncra over time to monthly mean.
        # --------------------------

        # If multiple files, concat to a temp stack; else use the single file directly
        tmp_stack = MONTHLY_DIR / f"{y}_{m}_all.nc"
        if len(files) == 1:
            stack_in = files[0]
        else:
            # Concatenate along time
            run(["ncrcat", "-O", *files, str(tmp_stack)])
            stack_in = str(tmp_stack)

        # Monthly mean (hourly-weighted)
        mon_out = MONTHLY_DIR / f"{y}_{m}_monmean.nc"
        run(["ncra", *NCO_FLAGS, stack_in, str(mon_out)])

        # Cleanup temp stack if we created it
        if str(stack_in) == str(tmp_stack) and tmp_stack.exists():
            tmp_stack.unlink()

print("Monthly means complete →", MONTHLY_DIR)

NameError: name 'MONTHLY_DIR' is not defined

In [None]:
# --------------------------
# 2) QUARTERLY (hourly-weighted)
#    Concatenate ALL hourly files for the three months, then ncra once.
# --------------------------
for y in YEARS:
    y = int(y)
    for q_idx, (m1, m2, m3) in enumerate(QUARTERS, start=1):
        # Gather all hourly files for the 3 months
        f1 = sorted(glob.glob(str(HOURLY_DIR / PATTERN_PER_MONTH.format(y=y, m=m1))))
        f2 = sorted(glob.glob(str(HOURLY_DIR / PATTERN_PER_MONTH.format(y=y, m=m2))))
        f3 = sorted(glob.glob(str(HOURLY_DIR / PATTERN_PER_MONTH.format(y=y, m=m3))))
        all_files = f1 + f2 + f3

        if not all_files:
            print(f"[skip] No hourly files for {y} Q{q_idx}")
            continue

        tmp_q_stack = QUARTER_DIR / f"{y}_Q{q_idx}_all.nc"
        q_out       = QUARTER_DIR / f"{y}_Q{q_idx}_qmean.nc"

        # Concatenate all hours in the quarter
        run(["ncrcat", "-O", *all_files, str(tmp_q_stack)])

        # Quarterly mean (hourly-weighted)
        run(["ncra", *NCO_FLAGS, str(tmp_q_stack), str(q_out)])

        # Cleanup
        if tmp_q_stack.exists():
            tmp_q_stack.unlink()

print("Quarterly means complete →", QUARTER_DIR)

In [19]:
for year in range(1970, 2025):
    for qi, (m1,m2,m3) in enumerate(QUARTERS, start=1):
        
        for f in (m1, m2, m3):
            monthly_file = HOURLY_DIR / f"{year}_{f}.nc"
            if not time_is_unlimited(monthly_file):
                print(f"Making time unlimited in {monthly_file}")
                make_time_unlimited(monthly_file)
            else:
                print(f"✓ Record dim OK: {f.name}")

        qtmp = QUARTER_DIR / f"{year}_Q{qi}_all.nc"
        qout = QUARTER_DIR / f"{year}_Q{qi}_qmean.nc"
        # concat 3 monthly-hourly files → one quarterly stack
        run(f'ncrcat -O "{HOURLY_DIR}/{year}_{m1}.nc" "{HOURLY_DIR}/{year}_{m2}.nc" "{HOURLY_DIR}/{year}_{m3}.nc" "{qtmp}"')
        # time-mean over all hours in the quarter
        run(f'ncra -O --mro -4 -L 1 "{qtmp}" "{qout}"')
        qtmp.unlink(missing_ok=True)

Making time unlimited in /vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc
$ ncks -O -4 -L 1 --mk_rec_dmn time "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc" "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc.tmp" && mv "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc.tmp" "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc"


/bin/sh: line 1: 1221761 Killed                  ncks -O -4 -L 1 --mk_rec_dmn time "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc" "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc.tmp"


CalledProcessError: Command 'ncks -O -4 -L 1 --mk_rec_dmn time "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc" "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc.tmp" && mv "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc.tmp" "/vast/bzaitch1/trp_climate_model_data/era5land_1970_2024_hourly/1970_01.nc"' returned non-zero exit status 137.