In [1]:
import xarray as xr
from deepsensor.data import DataProcessor
from deepsensor_greatlakes.preprocessor import SeasonalCycleProcessor
from deepsensor_greatlakes.utils import standardize_dates
import os
import joblib
import gc
import json
from datetime import datetime

In [2]:
# ---------- Paths ----------
turbo_dir = "/nfs/turbo/seas-dannes/SST-sensor-placement-input"
os.makedirs(turbo_dir, exist_ok=True)

config_dir = os.path.expanduser("~/deepsensor-greatlakes/deepsensor_config")
dp_dir = os.path.join(config_dir, "data_processor")
seasonal_dir = os.path.join(config_dir, "seasonal_cycles")
os.makedirs(dp_dir, exist_ok=True)
os.makedirs(seasonal_dir, exist_ok=True)

# Raw dataset paths
glsea_path = "/nfs/turbo/seas-dannes/SST-sensor-placement-input/GLSEA_combined.zarr"
ice_path = "/nfs/turbo/seas-dannes/SST-sensor-placement-input/NSIDC/ice_concentration_interp_on_glsea_grid.zarr"

manifest = {
    "created": str(datetime.now()),
    "seasonal_cycle": {},
    "data_processor": {}
}

In [3]:
# ---------- Step 1: Process GLSEA ----------
print("üîπ Processing GLSEA...")
glsea = xr.open_zarr(glsea_path)

# Standardize dates
glsea = standardize_dates(glsea)

# 1A. Seasonal anomaly
seasonal_processor = SeasonalCycleProcessor()
seasonal_processor.calculate(glsea)
seasonal_paths = seasonal_processor.save(seasonal_dir)
glsea_anom = seasonal_processor.compute_anomalies(glsea).rename({"sst": "sst_anom"})
del glsea; gc.collect()

# Save seasonal cycle info in manifest
manifest["seasonal_cycle"]["glsea"] = {
    "id": seasonal_processor.metadata["id"],
    "cycle_path": seasonal_paths["seasonal_cycle_path"],
    "metadata_path": seasonal_paths["metadata_path"]
}

# 1B. Fit and apply DataProcessor
dp = DataProcessor(x1_name="lat", x2_name="lon")
glsea_proc = dp(glsea_anom)

# Save GLSEA output
glsea_out = os.path.join(turbo_dir, "glsea_anom_processed.zarr")
glsea_proc.to_zarr(glsea_out, mode="w")
del glsea_anom, glsea_proc; gc.collect()

print("‚úÖ GLSEA done.")

üîπ Processing GLSEA...
‚úÖ GLSEA done.


In [4]:
# ---------- Step 2: Process ICE (no anomalies) ----------
print("üîπ Processing ice_concentration...")
ice = xr.open_zarr(ice_path)
ice = standardize_dates(ice)  # Just in case it's not already done
ice_proc = dp(ice, method="min_max")
ice_out = os.path.join(turbo_dir, "ice_concentration_processed.zarr")
ice_proc.to_zarr(ice_out, mode="w")
del ice, ice_proc; gc.collect()

# Save processor
dp.save(dp_dir)
manifest["data_processor"]["config_path"] = os.path.join(dp_dir, "data_processor_config.json")

print("‚úÖ Ice done.")

üîπ Processing ice_concentration...
‚úÖ Ice done.


In [5]:
# ---------- Save manifest ----------
manifest_path = os.path.join(config_dir, "manifest.json")
with open(manifest_path, "w") as f:
    json.dump(manifest, f, indent=4)

print("üìù Saved manifest to:", manifest_path)
print("üéâ Preprocessing complete. Only GLSEA and ICE processed.")

üìù Saved manifest to: /home/dannes/deepsensor-greatlakes/deepsensor_config/manifest.json
üéâ Preprocessing complete. Only GLSEA and ICE processed.
