Environment setup:
```
conda create -n earth2studio python=3.12 -y 
conda activate earth2studio
pip install uv
export UV_CACHE_DIR="/projectnb/eb-general/wade/uv_cache"
uv pip install "earth2studio @ git+https://github.com/NVIDIA/earth2studio.git@0.10.0"
uv pip install "earth2studio[fcn]"
uv pip install numpy matplotlib pandas xarray cartopy cmocean tqdm
uv pip install "makani @ git+https://github.com/NVIDIA/modulus-makani.git@28f38e3e929ed1303476518552c64673bbd6f722"
uv pip install earth2studio[sfno]
```


# Running inference with SFNO checkpoints

In [1]:
import os
import subprocess
from dotenv import load_dotenv

from earth2studio.io import ZarrBackend

# from earth2studio.run import deterministic
# from earth2studio.models.px import SFNO
### Updated source code for checkpoint selection and 
### specific variable saving functionality:
from deterministic_update import deterministic
from SFNO_update import SFNO

import earth2studio.data as data
from earth2studio.models.auto import Package
from utils import filename_to_year, datetime_range, open_hdf5 # these aren't used in this script currently

from datetime import datetime, timedelta
import json
import xarray as xr
from typing import List
import shutil
import sys
import gc
import numpy as np
import time

import torch

  import pynvml  # type: ignore[import]
  from .autonotebook import tqdm as notebook_tqdm


Should be using earth2studio version 0.10.0 and makani version 0.2.0.

In [2]:
# print versions of makani and earth2studio
import earth2studio
print(f"earth2studio version: {earth2studio.__version__}")
import makani
print(f"makani version: {makani.__version__}")


earth2studio version: 0.10.0rc0
makani version: 0.2.0


In [3]:
# Check if CUDA (GPU support) is available
is_available = torch.cuda.is_available()
print(f"Is CUDA available? {is_available}")

if is_available:
    # Get the number of available GPUs
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs available: {gpu_count}")

    # Get the ID of the current GPU
    current_gpu = torch.cuda.current_device()
    print(f"Current GPU ID: {current_gpu}")

    # Get the name of the current GPU
    gpu_name = torch.cuda.get_device_name(current_gpu)
    print(f"Current GPU Name: {gpu_name}")

    print(f"Memory (VRAM):      {torch.cuda.get_device_properties(current_gpu).total_memory / 1e9:.2f} GB")
else:
    print("CUDA is not available. Running on CPU.")

time_start = time.time()


Is CUDA available? True
Number of GPUs available: 1
Current GPU ID: 0
Current GPU Name: NVIDIA RTX A6000
Memory (VRAM):      47.70 GB


In [4]:
############# CONFIG FOR INFERENCE RUN #############

start_datetime = "2022-12-22T00:00:00" #"2019-09-03T00:00:00" #"2022-09-24T00:00:00" # "2021_09_20T00:00:00" # 
variables_to_select = ['tcwv', 'u10m', 'v10m'] #['msl'] #Only save selected variables - it slows down inference SIGNIFICANTLY to save all 74 variables
experiment_number = 1 # which experiment directory to output to
n_steps = 20  # number of 6hr steps to forecast
epochs_to_run = np.arange(1,90,2) # List or array of epochs/checkpoint numbers to run inference on

# boring =  False
ema = False

# Create the inference name based on the start datetime and number of steps
inference_name = datetime.fromisoformat(start_datetime).strftime("%Y_%m_%dT%H")+'_nsteps'+str(n_steps)
data_create_fp = "/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/init_files/Initialize_"+inference_name+".nc" # "/projectnb/eb-general/wade/sfno/inference_runs/Ian/Initialize_data/Initialize_"+inference_name+".nc"

# Calculate the final datetime based from the start datetime and number of steps
final_datetime = (datetime.fromisoformat(start_datetime) + timedelta(hours = int(n_steps*6))).isoformat() 

# Directories
results_out_dir = f"/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment{str(experiment_number)}/{final_datetime[:10].replace('-', '_')}/"

#################################################

In [5]:
data_create_fp, final_datetime, results_out_dir

('/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/init_files/Initialize_2022_12_22T00_nsteps20.nc',
 '2022-12-27T00:00:00',
 '/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/')

In [None]:
if os.path.exists(data_create_fp):
    print(f"Data already preprocessed: {data_create_fp}")
else:
    sys.exit(f"Data not found use Create_Initial_Data.ipynb to create: {data_create_fp}")

# make this xarray into a dataarray file for earth2studio
initial_data = data.DataArrayFile(data_create_fp)

fine_tuning_start_epoch = 71 # the epoch where fine-tuning starts (important for correctly accessing the checkpoints)

time_1 = time.time()
print(f"Data loaded in {time_1 - time_start:.2f} seconds")

for n_epoch in epochs_to_run: 
    time_2 = time.time()

    if ema:
        results_out_fp = results_out_dir+f"EMA_Checkpoint{n_epoch}_{inference_name}.nc"
    else:
        results_out_fp =  results_out_dir+"Checkpoint"+str(n_epoch)+"_"+inference_name+'.nc' 
    
    # Check if the results file already exists
    if os.path.exists(results_out_fp):
        print(f"Results file {results_out_fp} already exists. Skipping to next epoch.")
        continue  # Skip the rest of the loop and go to the next iteration
    else:
        os.makedirs(os.path.dirname(results_out_fp), exist_ok=True)

        load_dotenv()

        if n_epoch < fine_tuning_start_epoch: # pre-fine-tuning phase epochs are numbered 1-70
            src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/"
            checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch)+'.tar'
        else:
            src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/"
            n_epoch_multistep2 = n_epoch - (fine_tuning_start_epoch - 1) # fine-tuning phase epochs are numbered from 1-20
            checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch_multistep2)+'.tar'

        print(f"Loading model from {src_dir} with checkpoint {checkpoint_name}")
        # Load the model package from storage
        model_package = Package(src_dir, cache = False)
        model = SFNO.load_model(model_package, 
        checkpoint_name = checkpoint_name, EMA = ema
        )

        # Create the IO handler, store in memory
        io = ZarrBackend()
        
        with torch.no_grad():
            # run inference
            io = deterministic([start_datetime], n_steps, model, initial_data, io, 
            variables_list=variables_to_select
            )

        print(io.root.tree())

        # save results to netcdf
        # Open the Zarr group from the in-memory store using xarray
        ds = xr.open_zarr(io.root.store)
        
        # SANITY CHECKING...
        print("Dataset times:", ds["time"].values)
        print("Dataset dimensions:", {dim: ds.dims[dim] for dim in ds.dims})
        print("Lead times", ds["lead_time"].values)

        # Convert the 'time' coordinate in ds to datetime64 format
        ds["time"] = ds["time"].astype("datetime64[ns]")

        # Convert lead_time from nanoseconds to timedelta64[ns]
        base_time = ds["time"].values  # shape (n_time,)
        lead_timedelta = ds["lead_time"].values.astype("timedelta64[ns]")  # shape (n_lead_time,)
        # Broadcast to 2D: (time, lead_time)
        valid_timesteps = (base_time[:, None] + lead_timedelta[None, :]).flatten() 
        # Drop the old lead_time coordinate
        ds = ds.drop_vars("lead_time")

        # Assume ds has dimensions (time, lead_time, lat, lon) and only one time
        initial_time = str(ds["time"].values[0])  # Save the initial time as a string
        # Remove the time dimension by selecting the first (and only) time
        ds = ds.isel(time=0).drop_vars("time")
        # Add the initial time as a global attribute
        ds.attrs["initial_time"] = initial_time

        # Create valid_time by adding lead_timedelta to base_time
        ds = ds.rename({"lead_time": "valid_time"})
        # Assign valid_time as a coordinate
        ds = ds.assign_coords(valid_time=(("valid_time",), valid_timesteps))

        lead_times_to_save = np.arange(1,9,2) # in days
        lead_times_to_save = lead_times_to_save[lead_times_to_save <= n_steps//4] # only keep lead times that are within the n_steps range

        # Calculate the specific timestamps for these days
        start_dt = datetime.fromisoformat(start_datetime)
        target_timestamps = []
        for d in lead_times_to_save:
            ts = start_dt + timedelta(days=d)
            target_timestamps.append(np.datetime64(ts))

        print(f"Attempting to save forecasts for days: {lead_times_to_save}")

        available_times = ds["valid_time"].values
        times_to_save = [t for t in target_timestamps if t in available_times] # overlap between target lead times and available times

        if len(times_to_save) > 0:
            ds_subset = ds.sel(valid_time=times_to_save)
            ds_subset = ds_subset[variables_to_select]
            ds_subset.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
            print(f"Results saved to {results_out_fp} with {len(times_to_save)} timesteps.")
        else:
            print(f"ERROR: None of the target odd days were found in the output. available_times: {available_times}")

        # # only save the final time step
        # if np.datetime64(final_datetime) in ds["valid_time"].values:
        #     ds = ds.sel(valid_time=[final_datetime])
        #     ds = ds[variables_to_select]
        #     ds.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
        #     print(f"Results saved to {results_out_fp}")
        # else:
        #     print(f"ERROR: final_datetime {final_datetime} not found in ds['valid_time']. No file saved.")

        #some cleanup
        torch.cuda.empty_cache()
        del model_package
        del model
        del io
        del ds
        gc.collect()
        time_3 = time.time()
        print(f"Epoch {n_epoch} done: {time_3 - time_2:.2f} seconds")


Data already preprocessed: /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/init_files/Initialize_2022_12_22T00_nsteps20.nc
Data loaded in 5.40 seconds
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch1.tar
[32m2026-01-05 14:27:21.286[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:27:21.314[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cpu[0m
[32m2026-01-05 14:27:22.057[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m92[0m - [32m[1mFetched data from DataArrayFile[0m
[32m2026-01-05 14:27:22.300[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m122[0m - [1mInference starting![0m




# Code with GPU utilization and timing monitoring

In [7]:
if os.path.exists(data_create_fp):
    print(f"Data already preprocessed: {data_create_fp}")
else:
    sys.exit(f"Data not found use Create_Initial_Data.ipynb to create: {data_create_fp}")

# make this xarray into a dataarray file for earth2studio
initial_data = data.DataArrayFile(data_create_fp)

fine_tuning_start_epoch = 71 

time_1 = time.time()
print(f"Data loaded in {time_1 - time_start:.2f} seconds")

for n_epoch in epochs_to_run: 
    time_2 = time.time()
    
    # --- MONITORING START: Reset peak memory tracker for this epoch ---
    torch.cuda.reset_peak_memory_stats()
    # ----------------------------------------------------------------

    if ema:
        results_out_fp = results_out_dir+f"EMA_Checkpoint{n_epoch}_{inference_name}.nc"
    else:
        results_out_fp =  results_out_dir+"Checkpoint"+str(n_epoch)+"_"+inference_name+'.nc' 
    
    # Check if the results file already exists
    if os.path.exists(results_out_fp):
        print(f"Results file {results_out_fp} already exists. Skipping to next epoch.")
        continue 
    else:
        os.makedirs(os.path.dirname(results_out_fp), exist_ok=True)

        load_dotenv()

        if n_epoch < fine_tuning_start_epoch: 
            src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/"
            checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch)+'.tar'
        else:
            src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/"
            n_epoch_multistep2 = n_epoch - (fine_tuning_start_epoch - 1) 
            checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch_multistep2)+'.tar'

        print(f"Loading model from {src_dir} with checkpoint {checkpoint_name}")
        model_package = Package(src_dir, cache = False)
        model = SFNO.load_model(model_package, 
        checkpoint_name = checkpoint_name, EMA = ema
        )

        io = ZarrBackend()
        
        with torch.no_grad():
            io = deterministic([start_datetime], n_steps, model, initial_data, io, 
            variables_list=variables_to_select
            )

        # --- MONITORING: Capture GPU stats immediately after inference ---
        peak_mem = torch.cuda.max_memory_allocated() / 1e9 # Convert to GB
        try:
            # Quick snapshot of GPU utilization via nvidia-smi
            gpu_stats = subprocess.check_output(
                ["nvidia-smi", "--query-gpu=utilization.gpu", "--format=csv,noheader,nounits"], 
                encoding='utf-8'
            ).strip()
            gpu_util = f"{gpu_stats}%"
        except:
            gpu_util = "N/A"
        # ---------------------------------------------------------------

        # save results to netcdf
        ds = xr.open_zarr(io.root.store)
        
        ds["time"] = ds["time"].astype("datetime64[ns]")

        base_time = ds["time"].values  
        lead_timedelta = ds["lead_time"].values.astype("timedelta64[ns]")  
        valid_timesteps = (base_time[:, None] + lead_timedelta[None, :]).flatten() 
        ds = ds.drop_vars("lead_time")

        initial_time = str(ds["time"].values[0]) 
        ds = ds.isel(time=0).drop_vars("time")
        ds.attrs["initial_time"] = initial_time

        ds = ds.rename({"lead_time": "valid_time"})
        ds = ds.assign_coords(valid_time=(("valid_time",), valid_timesteps))

        lead_times_to_save = np.arange(1,9,2) # in days
        lead_times_to_save = lead_times_to_save[lead_times_to_save <= n_steps//4] 

        start_dt = datetime.fromisoformat(start_datetime)
        target_timestamps = []
        for d in lead_times_to_save:
            ts = start_dt + timedelta(days=int(d))
            target_timestamps.append(np.datetime64(ts))

        available_times = ds["valid_time"].values
        times_to_save = [t for t in target_timestamps if t in available_times] 

        if len(times_to_save) > 0:
            ds_subset = ds.sel(valid_time=times_to_save)
            ds_subset = ds_subset[variables_to_select]
            ds_subset.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
            print(f"Results saved to {results_out_fp} with {len(times_to_save)} timesteps.")
        else:
            print(f"ERROR: None of the target odd days were found. available_times: {available_times}")

        # Cleanup
        torch.cuda.empty_cache()
        del model_package
        del model
        del io
        del ds
        gc.collect()
        
        # --- MONITORING: Final timing prints ---
        time_3 = time.time()
        epoch_dur = time_3 - time_2
        total_elapsed = time_3 - time_start
        print(f"Epoch {n_epoch} Stats:")
        print(f"  Duration: {epoch_dur:.2f}s | Total Elapsed: {total_elapsed/60:.2f} min")
        print(f"  Peak VRAM: {peak_mem:.2f} GB | GPU Util Snapshot: {gpu_util}")
        print("-" * 50)
        # ---------------------------------------

Data already preprocessed: /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/init_files/Initialize_2022_12_22T00_nsteps20.nc
Data loaded in 464.76 seconds
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch1.tar
[32m2026-01-05 14:46:16.397[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:46:16.397[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m2026-01-05 14:46:16.688[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m92[0m - [32m[1mFetched data from DataArrayFile[0m
[32m2026-01-05 14:46:16.723[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m122[0m - [1mInference starting![

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:46:32.608[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint1_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 1 Stats:
  Duration: 26.27s | Total Elapsed: 8.18 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch3.tar
[32m2026-01-05 14:47:11.388[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:47:11.389[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m2026

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:47:27.692[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint3_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 3 Stats:
  Duration: 53.41s | Total Elapsed: 9.07 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch5.tar
[32m2026-01-05 14:48:04.863[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:48:04.863[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m20

Running inference: 100%|██████████| 21/21 [00:16<00:00,  1.31it/s]


[32m2026-01-05 14:48:21.284[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint5_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 5 Stats:
  Duration: 53.62s | Total Elapsed: 9.97 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch7.tar
[32m2026-01-05 14:49:00.508[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:49:00.508[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m2026

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.31it/s]


[32m2026-01-05 14:49:16.969[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint7_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 7 Stats:
  Duration: 55.65s | Total Elapsed: 10.90 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch9.tar
[32m2026-01-05 14:49:54.315[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:49:54.315[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m202

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:50:10.680[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint9_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 9 Stats:
  Duration: 53.72s | Total Elapsed: 11.79 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 68
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch11.tar
[32m2026-01-05 14:50:49.906[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:50:49.907[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m2

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:51:06.286[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint11_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 11 Stats:
  Duration: 55.67s | Total Elapsed: 12.72 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch13.tar
[32m2026-01-05 14:51:44.716[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:51:44.716[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:52:01.085[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint13_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 13 Stats:
  Duration: 54.69s | Total Elapsed: 13.63 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch15.tar
[32m2026-01-05 14:52:37.758[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:52:37.759[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:52:54.093[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint15_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 15 Stats:
  Duration: 53.08s | Total Elapsed: 14.51 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch17.tar
[32m2026-01-05 14:53:30.926[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:53:30.926[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:53:47.257[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint17_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 17 Stats:
  Duration: 53.08s | Total Elapsed: 15.40 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch19.tar
[32m2026-01-05 14:54:26.028[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:54:26.028[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:54:42.308[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint19_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 19 Stats:
  Duration: 55.13s | Total Elapsed: 16.32 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch21.tar
[32m2026-01-05 14:55:20.088[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:55:20.088[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:55:36.435[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint21_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 21 Stats:
  Duration: 54.04s | Total Elapsed: 17.22 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch23.tar
[32m2026-01-05 14:56:13.300[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:56:13.300[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:56:29.646[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint23_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 23 Stats:
  Duration: 53.29s | Total Elapsed: 18.11 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch25.tar
[32m2026-01-05 14:57:09.361[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:57:09.361[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:57:25.674[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint25_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 25 Stats:
  Duration: 55.94s | Total Elapsed: 19.04 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch27.tar
[32m2026-01-05 14:58:02.331[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:58:02.332[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:58:18.656[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint27_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 27 Stats:
  Duration: 53.07s | Total Elapsed: 19.92 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch29.tar
[32m2026-01-05 14:58:57.047[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:58:57.047[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 14:59:13.385[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint29_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 29 Stats:
  Duration: 54.66s | Total Elapsed: 20.83 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch31.tar
[32m2026-01-05 14:59:52.021[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 14:59:52.021[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:00:08.375[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint31_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 31 Stats:
  Duration: 55.06s | Total Elapsed: 21.75 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch33.tar
[32m2026-01-05 15:00:46.029[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:00:46.029[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:01:02.398[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint33_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 33 Stats:
  Duration: 54.01s | Total Elapsed: 22.65 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch35.tar
[32m2026-01-05 15:01:39.036[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:01:39.036[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.31it/s]


[32m2026-01-05 15:01:55.444[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint35_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 35 Stats:
  Duration: 52.99s | Total Elapsed: 23.54 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch37.tar
[32m2026-01-05 15:02:34.534[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:02:34.534[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:02:50.902[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint37_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 37 Stats:
  Duration: 55.52s | Total Elapsed: 24.46 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch39.tar
[32m2026-01-05 15:03:28.694[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:03:28.694[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:03:45.052[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint39_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 39 Stats:
  Duration: 54.10s | Total Elapsed: 25.36 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 98
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch41.tar
[32m2026-01-05 15:04:21.225[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:04:21.226[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:04:37.545[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint41_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 41 Stats:
  Duration: 52.54s | Total Elapsed: 26.24 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch43.tar
[32m2026-01-05 15:05:14.623[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:05:14.624[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:05:30.964[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint43_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 43 Stats:
  Duration: 53.36s | Total Elapsed: 27.13 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch45.tar
[32m2026-01-05 15:06:07.735[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:06:07.735[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:06:24.090[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint45_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 45 Stats:
  Duration: 53.22s | Total Elapsed: 28.01 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 99
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch47.tar
[32m2026-01-05 15:07:01.978[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:07:01.979[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:07:18.358[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint47_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 47 Stats:
  Duration: 54.16s | Total Elapsed: 28.92 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch49.tar
[32m2026-01-05 15:07:55.257[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:07:55.257[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.31it/s]


[32m2026-01-05 15:08:11.670[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint49_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 49 Stats:
  Duration: 53.39s | Total Elapsed: 29.81 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch51.tar
[32m2026-01-05 15:08:48.214[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:08:48.214[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:09:04.587[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint51_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 51 Stats:
  Duration: 52.93s | Total Elapsed: 30.69 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch53.tar
[32m2026-01-05 15:09:41.490[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:09:41.490[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:09:57.859[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint53_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 53 Stats:
  Duration: 53.24s | Total Elapsed: 31.58 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 99
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch55.tar
[32m2026-01-05 15:10:38.391[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:10:38.391[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32

Running inference: 100%|██████████| 21/21 [00:16<00:00,  1.31it/s]


[32m2026-01-05 15:10:54.837[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint55_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 55 Stats:
  Duration: 57.00s | Total Elapsed: 32.53 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch57.tar
[32m2026-01-05 15:11:31.628[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:11:31.628[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:11:47.951[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint57_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 57 Stats:
  Duration: 53.04s | Total Elapsed: 33.41 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch59.tar
[32m2026-01-05 15:12:26.879[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:12:26.879[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:12:43.241[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint59_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 59 Stats:
  Duration: 55.37s | Total Elapsed: 34.33 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 98
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch61.tar
[32m2026-01-05 15:13:18.905[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:13:18.905[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:13:35.182[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint61_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 61 Stats:
  Duration: 51.87s | Total Elapsed: 35.20 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch63.tar
[32m2026-01-05 15:14:11.698[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:14:11.699[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[3

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:14:27.982[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint63_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 63 Stats:
  Duration: 52.87s | Total Elapsed: 36.08 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 94
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch65.tar
[32m2026-01-05 15:15:05.212[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:15:05.213[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:15:21.485[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint65_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 65 Stats:
  Duration: 53.43s | Total Elapsed: 36.97 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch67.tar
[32m2026-01-05 15:15:58.445[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:15:58.445[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:16:14.714[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint67_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 67 Stats:
  Duration: 53.30s | Total Elapsed: 37.86 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/ with checkpoint ckpt_mp0_epoch69.tar
[32m2026-01-05 15:16:50.569[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:16:50.569[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference device: cuda[0m
[32m

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:17:06.842[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint69_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 69 Stats:
  Duration: 52.06s | Total Elapsed: 38.73 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch1.tar
[32m2026-01-05 15:17:45.227[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:17:45.228[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference de

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:18:01.514[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint71_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 71 Stats:
  Duration: 54.74s | Total Elapsed: 39.64 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch3.tar
[32m2026-01-05 15:18:40.345[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:18:40.346[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference de

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:18:56.665[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint73_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 73 Stats:
  Duration: 55.21s | Total Elapsed: 40.56 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 83
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch5.tar
[32m2026-01-05 15:19:23.406[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:19:23.406[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference d

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:19:39.765[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint75_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 75 Stats:
  Duration: 43.12s | Total Elapsed: 41.28 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch7.tar
[32m2026-01-05 15:20:00.413[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:20:00.414[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference de

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:20:16.764[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint77_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 77 Stats:
  Duration: 36.93s | Total Elapsed: 41.89 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch9.tar
[32m2026-01-05 15:20:40.723[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:20:40.723[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference 

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:20:57.072[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint79_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 79 Stats:
  Duration: 40.35s | Total Elapsed: 42.56 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 83
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch11.tar
[32m2026-01-05 15:21:16.373[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:21:16.373[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference 

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:21:32.722[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint81_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 81 Stats:
  Duration: 35.61s | Total Elapsed: 43.16 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 98
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch13.tar
[32m2026-01-05 15:22:00.469[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:22:00.469[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference 

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:22:16.808[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint83_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 83 Stats:
  Duration: 44.10s | Total Elapsed: 43.89 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 99
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch15.tar
[32m2026-01-05 15:22:37.168[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:22:37.168[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference 

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:22:53.549[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint85_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 85 Stats:
  Duration: 36.71s | Total Elapsed: 44.51 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch17.tar
[32m2026-01-05 15:23:12.874[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:23:12.874[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference d

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:23:29.280[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint87_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 87 Stats:
  Duration: 35.74s | Total Elapsed: 45.10 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 0
0
0
0
0
0
0
0%
--------------------------------------------------
Loading model from /projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/ with checkpoint ckpt_mp0_epoch19.tar
[32m2026-01-05 15:23:49.317[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m61[0m - [1mRunning simple workflow![0m
[32m2026-01-05 15:23:49.317[0m | [1mINFO    [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m68[0m - [1mInference d

Running inference: 100%|██████████| 21/21 [00:15<00:00,  1.32it/s]


[32m2026-01-05 15:24:05.745[0m | [32m[1mSUCCESS [0m | [36mdeterministic_update[0m:[36mdeterministic[0m:[36m146[0m - [32m[1mInference complete[0m
Results saved to /projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment1/2022_12_27/Checkpoint89_2022_12_22T00_nsteps20.nc with 3 timesteps.
Epoch 89 Stats:
  Duration: 36.45s | Total Elapsed: 45.71 min
  Peak VRAM: 16.25 GB | GPU Util Snapshot: 100
0
0
0
0
0
0
0%
--------------------------------------------------


In [3]:
# Compare the output of my forecast to Becca's saved forecast
my_forecast ='/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment0/2022_09_29/Checkpoint70_2022_09_24T00_nsteps20.nc' 
beccas_forecast = '/projectnb/eb-general/wade/sfno/inference_runs/Ian/leadtime_fivedays/Checkpoint70_2022_09_24T00_nsteps20.nc'
my_ds = xr.open_dataset(my_forecast)
beccas_ds = xr.open_dataset(beccas_forecast)
print(my_ds)
print(beccas_ds)

<xarray.Dataset> Size: 12MB
Dimensions:     (valid_time: 1, lat: 721, lon: 1440)
Coordinates:
  * valid_time  (valid_time) datetime64[ns] 8B 2022-09-29
  * lat         (lat) float64 6kB 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
  * lon         (lon) float64 12kB 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
Data variables:
    msl         (valid_time, lat, lon) float32 4MB ...
    u10m        (valid_time, lat, lon) float32 4MB ...
    v10m        (valid_time, lat, lon) float32 4MB ...
Attributes:
    initial_time:  2022-09-24T00:00:00.000000000
<xarray.Dataset> Size: 12MB
Dimensions:     (valid_time: 1, lat: 721, lon: 1440)
Coordinates:
  * valid_time  (valid_time) datetime64[ns] 8B 2022-09-29
  * lat         (lat) float64 6kB 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
  * lon         (lon) float64 12kB 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
Data variables:
    msl         (valid_time, lat, lon) float32 4MB ...
    u10m        (valid_time, lat, lon) float32 4MB ...
    v10m

To check what epoch a checkpoint is (e.g. if epoch number is not in the filepath):

In [38]:
import torch
import os

dir='/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/training_checkpoints/' # step 1 of training (epochs 1-70)
dir2='/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/training_checkpoints/' # step 2 of training (epochs 71-90

# List of files to check
files_to_check = [dir + "best_ckpt_mp0.tar", 
                  dir + "ckpt_mp0.tar",
                  dir2 +  "best_ckpt_mp0.tar",
                    dir2 + "ckpt_mp0.tar"
                    ]
                
for filename in files_to_check:
    # Load the checkpoint
    # map_location='cpu' allows you to inspect this even without a GPU
    # weights_only=False allows loading the full dictionary structure
    checkpoint = torch.load(filename, map_location='cpu', weights_only=False)
    epoch = checkpoint.get('epoch', 'N/A')
    
    print(f"{filename:<25}")
    print(f'epoch: {str(epoch)}')
    print()

/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/training_checkpoints/best_ckpt_mp0.tar
epoch: 70

/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/training_checkpoints/ckpt_mp0.tar
epoch: 70

/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/training_checkpoints/best_ckpt_mp0.tar
epoch: 19

/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/training_checkpoints/ckpt_mp0.tar
epoch: 20



the above output shows that the best_ckpt is the best ckpt *for each phase*, and ckpt_mp0 is the *final* ckpt for that phase
- checkpoint numbering resets for each phase so phase 2 is numbered 1-20


# Original script:

In [None]:
import os
import subprocess
from dotenv import load_dotenv

from earth2studio.io import ZarrBackend
from SFNO_update import SFNO
import earth2studio.data as data
from earth2studio.models.auto import Package
from utils import filename_to_year, datetime_range, open_hdf5
from deterministic_update import deterministic

from datetime import datetime, timedelta
import json
import xarray as xr
from typing import List
import shutil
import sys
import gc
import numpy as np
import time

import torch

# Check if CUDA (GPU support) is available
is_available = torch.cuda.is_available()
print(f"Is CUDA available? {is_available}")

if is_available:
    # Get the number of available GPUs
    gpu_count = torch.cuda.device_count()
    print(f"Number of GPUs available: {gpu_count}")

    # Get the ID of the current GPU
    current_gpu = torch.cuda.current_device()
    print(f"Current GPU ID: {current_gpu}")

    # Get the name of the current GPU
    gpu_name = torch.cuda.get_device_name(current_gpu)
    print(f"Current GPU Name: {gpu_name}")

    print(f"Memory (VRAM):      {torch.cuda.get_device_properties(current_gpu).total_memory / 1e9:.2f} GB")
else:
    print("CUDA is not available. Running on CPU.")

time_start = time.time()

############# Double check these before running the script #############
#select start datetime and n_steps, each n_step = 6hrs
start_datetime = "2021-09-20T00:00:00" # "2021_09_20T00:00:00"
variables_to_select = ['tcwv'] #Only save selected variables - it slows down inference SIGNIFICANTLY to save all 74 variables
experiment_number = 0
n_steps = 20  #number of 6hr steps to forecast

boring = False
ema = False

# Create the inference name based on the start datetime and number of steps
inference_name = datetime.fromisoformat(start_datetime).strftime("%Y_%m_%dT%H")+'_nsteps'+str(n_steps)
data_create_fp = "/projectnb/eb-general/wade/sfno/inference_runs/Ian/Initialize_data/Initialize_"+inference_name+".nc"

# Calculate the final datetime based from the start datetime and number of steps
final_datetime = (datetime.fromisoformat(start_datetime) + timedelta(hours = int(n_steps*6))).isoformat() 

# Directories
results_out_dir = f"/projectnb/eb-general/wade/sfno/inference_runs/sandbox/Experiment{str(experiment_number)}/{final_datetime[:10].replace('-', '_')}/"

############# Double check these before running the script #############


if os.path.exists(data_create_fp):
    print(f"Data already preprocessed: {data_create_fp}")
else:
    sys.exit(f"Data not found use Create_Initial_Data.ipynb to create: {data_create_fp}")

#make this xarray into a dataarray file for earth2studio
initial_data = data.DataArrayFile(data_create_fp)

time_1 = time.time()
print(f"Data loaded in {time_1 - time_start:.2f} seconds")


for n_epoch in np.arange(1,3): #70,1):
    time_2 = time.time()
    # if boring:
    #     # Create the final datetime string in the desired format
    #     
    # else:# Create the final datetime string in the desired format

    if ema:
        results_out_fp = results_out_dir+f"EMA_Checkpoint{n_epoch}_{inference_name}.nc"
    else:
        results_out_fp = results_out_fp = results_out_dir+"/Checkpoint"+str(n_epoch)+"_"+inference_name+'.nc' 
    
    # Check if the results file already exists
    if os.path.exists(results_out_fp):
        print(f"Results file {results_out_fp} already exists. Skipping to next epoch.")
        continue  # Skip the rest of the loop and go to the next iteration
    else:
        os.makedirs(os.path.dirname(results_out_fp), exist_ok=True)

        load_dotenv()  

        # Make temporary folder with all the metadata in it.
        src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/"
        # Load the model package from storage
        model_package = Package(src_dir, cache = False)
        model = SFNO.load_model(model_package, checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch)+'.tar', EMA = ema)

        # Create the IO handler, store in memory
        io = ZarrBackend()
        
        with torch.no_grad():
            # run inference
            io = deterministic([start_datetime], n_steps, model, initial_data, io, variables_list=variables_to_select)

        print(io.root.tree())


        # save results to netcdf
        # Open the Zarr group from the in-memory store using xarray
        ds = xr.open_zarr(io.root.store)

        # Convert the 'time' coordinate in ds to datetime64 format
        ds["time"] = ds["time"].astype("datetime64[ns]")

        # Convert lead_time from nanoseconds to timedelta64[ns]
        base_time = ds["time"].values  # shape (n_time,)
        lead_timedelta = ds["lead_time"].values.astype("timedelta64[ns]")  # shape (n_lead_time,)
        # Broadcast to 2D: (time, lead_time)
        valid_timesteps = (base_time[:, None] + lead_timedelta[None, :]).flatten() 
        # Drop the old lead_time coordinate
        ds = ds.drop_vars("lead_time")

        # Assume ds has dimensions (time, lead_time, lat, lon) and only one time
        initial_time = str(ds["time"].values[0])  # Save the initial time as a string
        # Remove the time dimension by selecting the first (and only) time
        ds = ds.isel(time=0).drop_vars("time")
        # Add the initial time as a global attribute
        ds.attrs["initial_time"] = initial_time

        # Create valid_time by adding lead_timedelta to base_time
        ds = ds.rename({"lead_time": "valid_time"})
        # Assign valid_time as a coordinate
        ds = ds.assign_coords(valid_time=(("valid_time",), valid_timesteps))

        # only save the final time step
        if np.datetime64(final_datetime) in ds["valid_time"].values:
            ds = ds.sel(valid_time=[final_datetime])
            ds = ds[variables_to_select]
            ds.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
            print(f"Results saved to {results_out_fp}")
        else:
            print(f"ERROR: final_datetime {final_datetime} not found in ds['valid_time']. No file saved.")


        #some cleanup
        torch.cuda.empty_cache()
        del model_package
        del model
        del io
        del ds
        gc.collect()
        time_3 = time.time()
        print(f"Epoch {n_epoch} done: {time_3 - time_2:.2f} seconds")



#     for n_epoch in np.arange(36,71,1):
#         time_2 = time.time()
#         if boring:
#             # Create the final datetime string in the desired format
#             results_out_fp = "/barnes-engr-scratch2/C837824079/Experiment"+str(experiment_number)+"/Forecasts_Boring/"+final_datetime[:10].replace("-", "_")+"/Checkpoint"+str(n_epoch)+"_"+inference_name+'.nc'
#         else:# Create the final datetime string in the desired format
#             if ema:
#                 results_out_fp = f"/barnes-engr-scratch2/C837824079/Experiment{str(experiment_number)}/Forecast/EMA_9/Checkpoint{n_epoch}_{inference_name}.nc"         
#             else:
#                 results_out_fp = "/projectnb/eb-general/rbaiman/SFNO/Example_Inference/Example_Forecast/Checkpoint"+str(n_epoch)+"_"+inference_name+'.nc'

#         # Check if the results file already exists
#         if os.path.exists(results_out_fp):
#             print(f"Results file {results_out_fp} already exists. Skipping to next epoch.")
#             continue  # Skip the rest of the loop and go to the next iteration
#         else:
#             os.makedirs(os.path.dirname(results_out_fp), exist_ok=True)

#             load_dotenv()  

#             # Make temporary folder with all the metadata in it.
#             src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999/"

#             # Load the model package from storage
#             model_package = Package(src_dir, cache = False)
#             model = SFNO.load_model(model_package, checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch)+'.tar', EMA = ema)

#             # Create the IO handler, store in memory
#             io = ZarrBackend()
            
#             with torch.no_grad():
#                 # run inference
#                 io = deterministic([start_datetime], n_steps, model, initial_data, io, variables_list=variables_to_select)

#             print(io.root.tree())


#             # save results to netcdf
#             # Open the Zarr group from the in-memory store using xarray
#             ds = xr.open_zarr(io.root.store)

#             # Convert the 'time' coordinate in ds to datetime64 format
#             ds["time"] = ds["time"].astype("datetime64[ns]")

#             # Convert lead_time from nanoseconds to timedelta64[ns]
#             base_time = ds["time"].values  # shape (n_time,)
#             lead_timedelta = ds["lead_time"].values.astype("timedelta64[ns]")  # shape (n_lead_time,)
#             # Broadcast to 2D: (time, lead_time)
#             valid_timesteps = (base_time[:, None] + lead_timedelta[None, :]).flatten() 
#             # Drop the old lead_time coordinate
#             ds = ds.drop_vars("lead_time")

#             # Assume ds has dimensions (time, lead_time, lat, lon) and only one time
#             initial_time = str(ds["time"].values[0])  # Save the initial time as a string
#             # Remove the time dimension by selecting the first (and only) time
#             ds = ds.isel(time=0).drop_vars("time")
#             # Add the initial time as a global attribute
#             ds.attrs["initial_time"] = initial_time

#             # Create valid_time by adding lead_timedelta to base_time
#             ds = ds.rename({"lead_time": "valid_time"})
#             # Assign valid_time as a coordinate
#             ds = ds.assign_coords(valid_time=(("valid_time",), valid_timesteps))

#             # only save the final time step
#             if np.datetime64(final_datetime) in ds["valid_time"].values:
#                 ds = ds.sel(valid_time=[final_datetime])
#                 ds = ds[variables_to_select]
#                 ds.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
#                 print(f"Results saved to {results_out_fp}")
#             else:
#                 print(f"ERROR: final_datetime {final_datetime} not found in ds['valid_time']. No file saved.")


#             #some cleanup
#             torch.cuda.empty_cache()
#             del model_package
#             del model
#             del io
#             del ds
#             gc.collect()
#             time_3 = time.time()
#             print(f"Epoch {n_epoch} done: {time_3 - time_2:.2f} seconds")


# for n_epoch in np.arange(1,21,1):
#     time_2 = time.time()
#     # Create the final datetime string in the desired format
#     if boring:
#         # Create the final datetime string in the desired format
#         results_out_fp = "/barnes-engr-scratch2/C837824079/Experiment"+str(experiment_number)+"/Forecasts_Boring/"+final_datetime[:10].replace("-", "_")+"/Checkpoint"+str(n_epoch+70)+"_"+inference_name+'.nc'
#     else:# Create the final datetime string in the desired format
#         if ema:
#             results_out_fp = f"/barnes-engr-scratch2/C837824079/Experiment{str(experiment_number)}/Forecast/EMA_9/Checkpoint{n_epoch+70}_{inference_name}.nc"
#         else:
#             results_out_fp = "/projectnb/eb-general/rbaiman/SFNO/Example_Inference/Example_Forecast/Checkpoint"+str(n_epoch+70)+"_"+inference_name+'.nc'

    
#     # Check if the results file already exists
#     if os.path.exists(results_out_fp):
#         print(f"Results file {results_out_fp} already exists. Skipping to next epoch.")
#         continue  # Skip the rest of the loop and go to the next iteration
#     else:
#         os.makedirs(os.path.dirname(results_out_fp), exist_ok=True)

#         load_dotenv()  

#         # Make temporary folder with all the metadata in it.
#         src_dir = "/projectnb/eb-general/shared_data/data/processed/FourCastNet_sfno/Checkpoints_SFNO/multistep_sfno_linear_74chq_sc3_layers8_edim384_dt6h_wstgl2/v0.1.0-seed999-multistep2/"

#         # Load the model package from storage
#         model_package = Package(src_dir, cache = False)
#         model = SFNO.load_model(model_package, checkpoint_name = 'ckpt_mp0_epoch'+str(n_epoch)+'.tar', EMA = ema)

#         # Create the IO handler, store in memory
#         io = ZarrBackend()

#         print(f"Running inference for {inference_name}")
#         with torch.no_grad():
#             # run inference
#             io = deterministic([start_datetime], n_steps, model, initial_data, io, variables_list=variables_to_select)

#         # print(io.root.tree())

#         # save results to netcdf
#         # Open the Zarr group from the in-memory store using xarray
#         ds = xr.open_zarr(io.root.store)

#         # Convert the 'time' coordinate in ds to datetime64 format
#         ds["time"] = ds["time"].astype("datetime64[ns]")

#         # Convert lead_time from nanoseconds to timedelta64[ns]
#         base_time = ds["time"].values  # shape (n_time,)
#         lead_timedelta = ds["lead_time"].values.astype("timedelta64[ns]")  # shape (n_lead_time,)
#         # Broadcast to 2D: (time, lead_time)
#         valid_timesteps = (base_time[:, None] + lead_timedelta[None, :]).flatten() 
#         # Drop the old lead_time coordinate
#         ds = ds.drop_vars("lead_time")

#         # Assume ds has dimensions (time, lead_time, lat, lon) and only one time
#         initial_time = str(ds["time"].values[0])  # Save the initial time as a string
#         # Remove the time dimension by selecting the first (and only) time
#         ds = ds.isel(time=0).drop_vars("time")
#         # Add the initial time as a global attribute
#         ds.attrs["initial_time"] = initial_time

#         # Create valid_time by adding lead_timedelta to base_time
#         ds = ds.rename({"lead_time": "valid_time"})
#         # Assign valid_time as a coordinate
#         ds = ds.assign_coords(valid_time=(("valid_time",), valid_timesteps))

#         # only save the final time step
#         if np.datetime64(final_datetime) in ds["valid_time"].values:
#             ds = ds.sel(valid_time=[final_datetime])
#             ds = ds[variables_to_select]
#             ds.to_netcdf(results_out_fp, mode="w", format="NETCDF4")
#             print(f"Results saved to {results_out_fp}")
#         else:
#             print(f"ERROR: final_datetime {final_datetime} not found in ds['valid_time']. No file saved.")


#         #some cleanup
#         torch.cuda.empty_cache()
#         del model_package
#         del model
#         del io
#         del ds
#         gc.collect()
#         time_3 = time.time()
#         print(f"Epoch {n_epoch+70} done: {time_3 - time_2:.2f} seconds")

