In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import os
import geopandas as gpd
import rioxarray

In [None]:
# Compute RMSE of the forecast averaged over all time steps, latitudes, and longitudes per month

In [None]:
# RMSE function
def compute_allpoints_alltime_rmse(truth, pred):
    return np.sqrt(((truth - pred) ** 2).mean(dim=["time", "latitude", "longitude"]))

# Preprocess function for unit conversion
def preprocess(ds):
    if "t2m" in ds:
        ds["t2m"] = ds["t2m"] - 273.15
        ds["t2m"].attrs["units"] = "Celsius"
    if "msl" in ds:
        ds["msl"] = ds["msl"] / 100.0
        ds["msl"].attrs["units"] = "hPa"
    if "tp" in ds:
        ds = ds.drop_vars("tp")
    return ds

def subset_northern_temperate(ds):
    """Subset dataset to the Northern Temperate Zone: 35°N to 60°N."""
    return ds.where((ds.latitude >= 35) & (ds.latitude <= 60), drop=True)

def subset_southern_temperate(ds):
    """Subset dataset to the Northern Temperate Zone: 35°N to 60°N."""
    return ds.where((ds.latitude <= -35) & (ds.latitude >= -60), drop=True)

# Function to subset to subpolar and polar regions
def subset_polar_regions(ds):
    """Subset dataset to the (Sub-)Polar Zones: 60°N to 90°N and -60°N to -90°N """
    return ds.where((ds.latitude >= 60) | (ds.latitude <= -60), drop=True)

def subset_tropics(ds):
    """Subset dataset to the Tropics: -23.5° to 23.5° latitude."""
    return ds.where((ds.latitude >= -23.5) & (ds.latitude <= 23.5), drop=True)

def subset_subtropics(ds):
    """
    Subset dataset to the subtropics:
    - Northern Subtropics: 23.5°N to 35°N
    - Southern Subtropics: 23.5°S to 35°S
    """
    return ds.where(
        ((ds.latitude >= 23.5) & (ds.latitude <= 35)) | 
        ((ds.latitude <= -23.5) & (ds.latitude >= -35)),
        drop=True
    )

def subset_africa(ds, africa_gdf):
    """
    Subset an xarray dataset to the Africa region using a GeoDataFrame polygon.
    """
    ds_rio = ds.rio.write_crs("EPSG:4326", inplace=False)
    return ds_rio.rio.clip(africa_gdf.geometry, africa_gdf.crs, drop=True)

africa_gdf = gpd.read_file("Africa_outline.geojson").to_crs("EPSG:4326")

In [None]:
# Compute RMSE of IFS-HRES (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-01-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]


# Collect monthly RMSE datasets
monthly_rmse_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_marsfc_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_fc_sv_q.nc")
    print(f" Start for {folder_str}")

    fc = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})
    print(f"preprocessed for {folder_str}")

    # Subset to southern temperate
    fc = subset_southern_temperate(fc)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    rmse_dict = {var: compute_allpoints_alltime_rmse(era5[var], fc[var]) for var in variables}
    rmse_ds = xr.Dataset(rmse_dict)
    rmse_ds = rmse_ds.expand_dims(month=[month.month]) # number from 1 to 12 is added instead of datetime

    monthly_rmse_datasets.append(rmse_ds)
    
    print(f"computed monthly rmse for {folder_str}\n")

# Concatenate all monthly datasets
annual_rmse_ds = xr.concat(monthly_rmse_datasets, dim="month")

# Compute the average RMSE across all 12 months
rmse_mean = annual_rmse_ds.mean(dim="month")
rmse_mean = rmse_mean.expand_dims(month=[13])  # Use a fake "13th" month

# Concatenate the mean as the 13th entry
annual_rmse_ds_with_mean = xr.concat([annual_rmse_ds, rmse_mean], dim="month")

# Save the result
annual_rmse_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_marsfc_RMSE_monthly.nc")
print("RMSE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_marsfc_RMSE_monthly.nc'")

In [None]:
# Compute RMSE of AIFS (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-03-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]

# Collect monthly RMSE datasets
monthly_rmse_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_marsai_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_gcai_sv_q.nc")
    print(f"starting for {folder_str}")

    ai = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})

    print(f"preprocessed, renamed in era5 for {folder_str}")
    
    # Subset to southern temperate
    ai = subset_southern_temperate(ai)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    rmse_dict = {var: compute_allpoints_alltime_rmse(era5[var], ai[var]) for var in variables}
    rmse_ds = xr.Dataset(rmse_dict)
    rmse_ds = rmse_ds.expand_dims(month=[month.month]) # assign numbers 3 to 12 

    monthly_rmse_datasets.append(rmse_ds)
    
    print(f"computed monthly rmse for {folder_str}")

# Concatenate all monthly datasets
annual_rmse_ds = xr.concat(monthly_rmse_datasets, dim="month")

# Compute the average RMSE across all 12 months
rmse_mean = annual_rmse_ds.mean(dim="month")
rmse_mean = rmse_mean.expand_dims(month=[13])  # Use a fake "13th" month

# Concatenate the mean as the 13th entry
annual_rmse_ds_with_mean = xr.concat([annual_rmse_ds, rmse_mean], dim="month")

# Save the result
annual_rmse_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_marsai_RMSE_monthly.nc")
print("RMSE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_marsai_RMSE_monthly.nc'")

In [None]:
# Compute RMSE of GraphCast (apply needed subset function depending on required geographic region)

In [None]:
# Root directory where all monthly folders are stored
base_dir = "../Surface Variables/"

# Define monthly date range and variables
months = pd.date_range("2024-01-01", "2024-12-01", freq="MS")
variables = ["u10", "v10", "t2m", "msl", "q"]

# Collect monthly RMSE datasets
monthly_rmse_datasets = []

for month in months:
    folder_str = month.strftime("%Y%m01")
    forecast_path = os.path.join(base_dir, folder_str, f"{folder_str}_gc_sv_q.nc")
    truth_path = os.path.join(base_dir, folder_str, f"{folder_str}_era5_gcai_sv_q.nc")
    print(f"starting for {folder_str}")

    gc = preprocess(xr.open_dataset(forecast_path))
    era5 = preprocess(xr.open_dataset(truth_path))
    era5 = era5.rename({"valid_time": "time"})
    
    # Drop the existing "time" coordinate to avoid conflicts
    if "time" in gc.coords:
        gc = gc.drop_vars("time")

    # Swap the "step" dimension with "valid_time" and rename it to "time"
    gc = gc.swap_dims({"step": "valid_time"})  # make valid_time a dimension
    gc = gc.rename({"valid_time": "time"})     # rename the dimension to "time"

    print(f"preprocessed, renamed in era5, swapped and renamed in gc for {folder_str}")

    # Subset to polar regions
    # gc = subset_polar_regions(gc)
    # era5 = subset_polar_regions(era5)
    
    # Subset to southern temperate
    gc = subset_southern_temperate(gc)
    era5 = subset_southern_temperate(era5)
    print(f"subset computed for {folder_str}")

    rmse_dict = {var: compute_allpoints_alltime_rmse(era5[var], gc[var]) for var in variables}
    rmse_ds = xr.Dataset(rmse_dict)
    rmse_ds = rmse_ds.expand_dims(month=[month.month]) # assign number 1 to 12 

    monthly_rmse_datasets.append(rmse_ds)
    
    print(f"computed monthly rmse for {folder_str}")

# Concatenate all monthly datasets
annual_rmse_ds = xr.concat(monthly_rmse_datasets, dim="month")

# Compute the average RMSE across all 12 months
rmse_mean = annual_rmse_ds.mean(dim="month")
rmse_mean = rmse_mean.expand_dims(month=[13])  # Use a fake "13th" month

# Concatenate the mean as the 13th entry
annual_rmse_ds_with_mean = xr.concat([annual_rmse_ds, rmse_mean], dim="month")

# Save the result
annual_rmse_ds_with_mean.to_netcdf("SouthernTemperate/PerMonth/SouthernTemperate_gc_RMSE_monthly.nc")

print("RMSE calculations complete, including 13th-month average. Saved to 'SouthernTemperate/PerMonth/SouthernTemperate_gc_RMSE_monthly.nc'")