In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import json
import geopandas as gpd
import rioxarray

In [None]:
# Compute MAE of the forecast averaged over all time steps, latitudes, and longitudes per month

In [None]:
# MAE function
def compute_allpoints_alltime_mae(truth, pred):
    return np.abs(truth - pred).mean(dim=["time", "latitude", "longitude"])

def subset_northern_temperate(ds):
    """Subset dataset to the Northern Temperate Zone: 35°N to 60°N."""
    return ds.where((ds.latitude >= 35) & (ds.latitude <= 60), drop=True)

def subset_southern_temperate(ds):
    """Subset dataset to the Northern Temperate Zone: 35°N to 60°N."""
    return ds.where((ds.latitude <= -35) & (ds.latitude >= -60), drop=True)

# Function to subset to subpolar and polar regions
def subset_polar_regions(ds):
    """Subset dataset to the (Sub-)Polar Zones: 60°N to 90°N and -60°N to -90°N """
    return ds.where((ds.latitude >= 60) | (ds.latitude <= -60), drop=True)

def subset_tropics(ds):
    """Subset dataset to the Tropics: -23.5° to 23.5° latitude."""
    return ds.where((ds.latitude >= -23.5) & (ds.latitude <= 23.5), drop=True)

def subset_subtropics(ds):
    """
    Subset dataset to the subtropics:
    - Northern Subtropics: 23.5°N to 35°N
    - Southern Subtropics: 23.5°S to 35°S
    """
    return ds.where(
        ((ds.latitude >= 23.5) & (ds.latitude <= 35)) | 
        ((ds.latitude <= -23.5) & (ds.latitude >= -35)),
        drop=True
    )

def subset_africa(ds, africa_gdf):
    """
    Subset an xarray dataset to the Africa region using a GeoDataFrame polygon.
    """
    ds_rio = ds.rio.write_crs("EPSG:4326", inplace=False)
    return ds_rio.rio.clip(africa_gdf.geometry, africa_gdf.crs, drop=True)

# Preprocess function for unit conversion
def preprocess(ds):
    if "t2m" in ds:
        ds["t2m"] = ds["t2m"] - 273.15
        ds["t2m"].attrs["units"] = "Celsius"
    if "msl" in ds:
        ds["msl"] = ds["msl"] / 100.0
        ds["msl"].attrs["units"] = "hPa"
    if "tp" in ds:
        ds = ds.drop_vars("tp")
    return ds

africa_gdf = gpd.read_file("Africa_outline.geojson").to_crs("EPSG:4326")

In [None]:
# Compute MAE of IFS-HRES (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-01-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]


# Collect monthly MAE datasets
monthly_mae_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_marsfc_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_fc_sv_q.nc")
    print(f"starting for {folder_str}")

    fc = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})

    print(f"preprocessed and aligned for {folder_str}")

    # Subset to southern temperate
    fc = subset_southern_temperate(fc)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    mae_dict = {var: compute_allpoints_alltime_mae(era5[var], fc[var]) for var in variables}
    mae_ds = xr.Dataset(mae_dict)
    mae_ds = mae_ds.expand_dims(month=[month.month])

    monthly_mae_datasets.append(mae_ds)
    print(f"computed monthly MAE for {folder_str}")

# Concatenate and save
annual_mae_ds = xr.concat(monthly_mae_datasets, dim="month")
mae_mean = annual_mae_ds.mean(dim="month")
mae_mean = mae_mean.expand_dims(month=[13])
annual_mae_ds_with_mean = xr.concat([annual_mae_ds, mae_mean], dim="month")

annual_mae_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_marsfc_MAE_monthly.nc")
print("MAE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_marsfc_MAE_monthly.nc'")

In [None]:
# Compute MAE of AIFS (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-03-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]

# Collect monthly MAE datasets
monthly_mae_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_marsai_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_gcai_sv_q.nc")
    print(f"starting for {folder_str}")

    ai = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})

    print(f"preprocessed and aligned for {folder_str}")

    # Subset to southern temperate
    ai = subset_southern_temperate(ai)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    mae_dict = {var: compute_allpoints_alltime_mae(era5[var], ai[var]) for var in variables}
    mae_ds = xr.Dataset(mae_dict)
    mae_ds = mae_ds.expand_dims(month=[month.month])

    monthly_mae_datasets.append(mae_ds)
    print(f"computed monthly MAE for {folder_str}")

# Concatenate and save
annual_mae_ds = xr.concat(monthly_mae_datasets, dim="month")
mae_mean = annual_mae_ds.mean(dim="month")
mae_mean = mae_mean.expand_dims(month=[13])
annual_mae_ds_with_mean = xr.concat([annual_mae_ds, mae_mean], dim="month")

annual_mae_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_marsai_MAE_monthly.nc")
print("MAE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_marsai_MAE_monthly.nc'")

In [None]:
# Compute MAE of GraphCast (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-01-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]

# Collect monthly MAE datasets
monthly_mae_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_gc_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_gcai_sv_q.nc")
    print(f"starting for {folder_str}")

    gc = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})

    if "time" in gc.coords:
        gc = gc.drop_vars("time")
    gc = gc.swap_dims({"step": "valid_time"})
    gc = gc.rename({"valid_time": "time"})

    print(f"preprocessed and aligned for {folder_str}")

    # Subset to southern temperate
    gc = subset_southern_temperate(gc)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    mae_dict = {var: compute_allpoints_alltime_mae(era5[var], gc[var]) for var in variables}
    mae_ds = xr.Dataset(mae_dict)
    mae_ds = mae_ds.expand_dims(month=[month.month])

    monthly_mae_datasets.append(mae_ds)
    print(f"computed monthly MAE for {folder_str}")

# Concatenate and save
annual_mae_ds = xr.concat(monthly_mae_datasets, dim="month")
mae_mean = annual_mae_ds.mean(dim="month")
mae_mean = mae_mean.expand_dims(month=[13])
annual_mae_ds_with_mean = xr.concat([annual_mae_ds, mae_mean], dim="month")

annual_mae_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_gc_MAE_monthly.nc")
print("MAE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_gc_MAE_monthly.nc'")

In [None]:
# Merge all models for MAE together

In [None]:
# Load datasets
airmae = xr.open_dataset("SouthernTemperate/PerMonth/SouthernTemperate_marsai_MAE_monthly.nc")
gcmae = xr.open_dataset("SouthernTemperate/PerMonth/SouthernTemperate_gc_MAE_monthly.nc")
fcmae = xr.open_dataset("SouthernTemperate/PerMonth/SouthernTemperate_marsfc_MAE_monthly.nc")

# Ensure all datasets have the full month range 1 to 13
full_months = np.arange(1, 14)

# Reindex to include all months, filling missing with NaN
airmae = airmae.reindex(month=full_months)
gcmae = gcmae.reindex(month=full_months)
fcmae = fcmae.reindex(month=full_months)

# Drop any unrelated extra coordinates to match structure 
drop_coords = ["meanSea", "surface", "isobaricInhPa", "number"]
gcmae = gcmae.drop_vars([c for c in drop_coords if c in gcmae])
airmae = airmae.drop_vars([c for c in drop_coords if c in airmae])
fcmae = fcmae.drop_vars([c for c in drop_coords if c in fcmae])

# Stack them into a new 'model' dimension
combined = xr.concat([airmae, gcmae, fcmae], dim="model")

# Add model labels
combined = combined.assign_coords(model=["marsai", "gc", "marsfc"])

# Save the merged dataset (optional)
combined.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_MAE_monthly_allmodels.nc")

# Print confirmation
print("Merged dataset created with shape:", combined.sizes)
print("Models:", combined.model.values)