In [1]:
import xarray as xr
import numpy as np

In [None]:
# Notebook to compute the MBE of IFS-HRES, AIFS, and GraphCast averaged over all 12 months per grid cell per lead time

In [2]:
# Preprocess function to convert units
def preprocess(ds):
    if "t2m" in ds:
        ds["t2m"] = ds["t2m"] - 273.15
        ds["t2m"].attrs["units"] = "Celsius"
    if "msl" in ds:
        ds["msl"] = ds["msl"] / 100.0
        ds["msl"].attrs["units"] = "hPa"
    if "tp" in ds:
        ds = ds.drop_vars("tp")
    return ds

In [None]:
# IFS-HRES - Compute the Mean Bias Error per lead time per grid cell across all 12 months

In [None]:
forecast_files = sorted([
    "../../Surface Variables/20240101/20240101_marsfc_sv_q.nc", "../../Surface Variables/20240201/20240201_marsfc_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_marsfc_sv_q.nc", "../../Surface Variables/20240401/20240401_marsfc_sv_q.nc",
    "../../Surface Variables/20240501/20240501_marsfc_sv_q.nc", "../../Surface Variables/20240601/20240601_marsfc_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_marsfc_sv_q.nc", "../../Surface Variables/20240801/20240801_marsfc_sv_q.nc",
    "../../Surface Variables/20240901/20240901_marsfc_sv_q.nc", "../../Surface Variables/20241001/20241001_marsfc_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_marsfc_sv_q.nc", "../../Surface Variables/20241201/20241201_marsfc_sv_q.nc"
])

truth_files = sorted([
    "../../Surface Variables/20240101/20240101_era5_fc_sv_q.nc", "../../Surface Variables/20240201/20240201_era5_fc_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_era5_fc_sv_q.nc", "../../Surface Variables/20240401/20240401_era5_fc_sv_q.nc",
    "../../Surface Variables/20240501/20240501_era5_fc_sv_q.nc", "../../Surface Variables/20240601/20240601_era5_fc_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_era5_fc_sv_q.nc", "../../Surface Variables/20240801/20240801_era5_fc_sv_q.nc",
    "../../Surface Variables/20240901/20240901_era5_fc_sv_q.nc", "../../Surface Variables/20241001/20241001_era5_fc_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_era5_fc_sv_q.nc", "../../Surface Variables/20241201/20241201_era5_fc_sv_q.nc"
])

variables = ['t2m', 'q', 'u10', 'v10', 'msl']
results = {}

for var in variables:
    print(f"starting for {var}")
    errors = []
    for f_path, t_path in zip(forecast_files, truth_files):
        print(f"starting for {f_path}")
        # Open forecast and truth files
        ds_f = preprocess(xr.open_dataset(f_path))
        ds_t = preprocess(xr.open_dataset(t_path))
        print(f"preprocessing done")
        
        ds_t = ds_t.rename({"valid_time": "time"})

        # assign numbers for timesteps instead of datetime stamps
        ds_f = ds_f.assign_coords(time=np.arange(len(ds_f.time)).astype("float64"))
        ds_t = ds_t.assign_coords(time=np.arange(len(ds_t.time)).astype("float64"))

        forecast, truth = ds_f[var], ds_t[var]
        forecast, truth = xr.align(forecast, truth)

        errors.append(forecast - truth)
        print(f"bias computed")

    err_stack = xr.concat(errors, dim='month')
    bias = err_stack.mean(dim='month')
    results[var] = bias
    print(f"mean bias computed")

xr.Dataset(results).to_netcdf('Global_marsfc_MBE_MAP_leadtimes.nc')
print("Bias dataset saved as 'Global_marsfc_MBE_MAP_leadtimes.nc'")

In [None]:
# for AIFS

In [3]:
forecast_files = sorted([
   # "../../Surface Variables/20240101/20240101_marsai_sv_q.nc", "../../Surface Variables/20240201/20240201_marsai_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_marsai_sv_q.nc", "../../Surface Variables/20240401/20240401_marsai_sv_q.nc",
    "../../Surface Variables/20240501/20240501_marsai_sv_q.nc", "../../Surface Variables/20240601/20240601_marsai_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_marsai_sv_q.nc", "../../Surface Variables/20240801/20240801_marsai_sv_q.nc",
    "../../Surface Variables/20240901/20240901_marsai_sv_q.nc", "../../Surface Variables/20241001/20241001_marsai_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_marsai_sv_q.nc", "../../Surface Variables/20241201/20241201_marsai_sv_q.nc"
])

truth_files = sorted([
 #   "../../Surface Variables/20240101/20240101_era5_gcai_sv_q.nc", "../../Surface Variables/20240201/20240201_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_era5_gcai_sv_q.nc", "../../Surface Variables/20240401/20240401_era5_gcai_sv_q.nc",
    "../../Surface Variables/20240501/20240501_era5_gcai_sv_q.nc", "../../Surface Variables/20240601/20240601_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_era5_gcai_sv_q.nc", "../../Surface Variables/20240801/20240801_era5_gcai_sv_q.nc",
    "../../Surface Variables/20240901/20240901_era5_gcai_sv_q.nc", "../../Surface Variables/20241001/20241001_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_era5_gcai_sv_q.nc", "../../Surface Variables/20241201/20241201_era5_gcai_sv_q.nc"
])

variables = ['t2m', 'q', 'u10', 'v10', 'msl']
results = {}

for var in variables:
    print(f"starting for {var}")
    errors = []
    for f_path, t_path in zip(forecast_files, truth_files):
        print(f"starting for {f_path}")
        # Open forecast and truth files
        ds_f = preprocess(xr.open_dataset(f_path))
        ds_t = preprocess(xr.open_dataset(t_path))
        print(f"preprocessing done")
        
        ds_t = ds_t.rename({"valid_time": "time"})

        # assign numbers for timesteps instead of datetime stamps
        ds_f = ds_f.assign_coords(time=np.arange(len(ds_f.time)).astype("float64"))
        ds_t = ds_t.assign_coords(time=np.arange(len(ds_t.time)).astype("float64"))

        forecast, truth = ds_f[var], ds_t[var]
        forecast, truth = xr.align(forecast, truth)

        errors.append(forecast - truth)
        print(f"bias computed")

    err_stack = xr.concat(errors, dim='month')
    bias = err_stack.mean(dim='month')
    results[var] = bias
    print(f"mean bias computed")

xr.Dataset(results).to_netcdf('Global_marsai_MBE_MAP_leadtimes.nc')
print("Bias dataset saved as 'Global_marsai_MBE_MAP_leadtimes.nc'")

starting for t2m
starting for ../../Surface Variables/20240301/20240301_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240401/20240401_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240501/20240501_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240601/20240601_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240701/20240701_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240801/20240801_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240901/20240901_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20241001/20241001_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20241101/20241101_marsai_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20241201/20241201_m

In [None]:
# for Graphcast

In [4]:
forecast_files = sorted([
    "../../Surface Variables/20240101/20240101_gc_sv_q.nc", "../../Surface Variables/20240201/20240201_gc_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_gc_sv_q.nc", "../../Surface Variables/20240401/20240401_gc_sv_q.nc",
    "../../Surface Variables/20240501/20240501_gc_sv_q.nc", "../../Surface Variables/20240601/20240601_gc_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_gc_sv_q.nc", "../../Surface Variables/20240801/20240801_gc_sv_q.nc",
    "../../Surface Variables/20240901/20240901_gc_sv_q.nc", "../../Surface Variables/20241001/20241001_gc_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_gc_sv_q.nc", "../../Surface Variables/20241201/20241201_gc_sv_q.nc"
])

truth_files = sorted([
    "../../Surface Variables/20240101/20240101_era5_gcai_sv_q.nc", "../../Surface Variables/20240201/20240201_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20240301/20240301_era5_gcai_sv_q.nc", "../../Surface Variables/20240401/20240401_era5_gcai_sv_q.nc",
    "../../Surface Variables/20240501/20240501_era5_gcai_sv_q.nc", "../../Surface Variables/20240601/20240601_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20240701/20240701_era5_gcai_sv_q.nc", "../../Surface Variables/20240801/20240801_era5_gcai_sv_q.nc",
    "../../Surface Variables/20240901/20240901_era5_gcai_sv_q.nc", "../../Surface Variables/20241001/20241001_era5_gcai_sv_q.nc", 
    "../../Surface Variables/20241101/20241101_era5_gcai_sv_q.nc", "../../Surface Variables/20241201/20241201_era5_gcai_sv_q.nc"
])

variables = ['t2m', 'q', 'u10', 'v10', 'msl']
results = {}

for var in variables:
    print(f"starting for {var}")
    errors = []
    for f_path, t_path in zip(forecast_files, truth_files):
        print(f"starting for {f_path}")
        # Open forecast and truth files
        ds_f = preprocess(xr.open_dataset(f_path))
        ds_t = preprocess(xr.open_dataset(t_path))
        print(f"preprocessing done")
        
        ds_t = ds_t.rename({"valid_time": "time"})

        # Drop the existing "time" coordinate to avoid conflicts
        if "time" in ds_f.coords:
            ds_f = ds_f.drop_vars("time")
    
        # Swap the "step" dimension with "valid_time" and rename it to "time"
        ds_f = ds_f.swap_dims({"step": "valid_time"})  # make valid_time a dimension
        ds_f = ds_f.rename({"valid_time": "time"})     # rename the dimension to "time"

        # assign numbers for timesteps instead of datetime stamps
        ds_f = ds_f.assign_coords(time=np.arange(len(ds_f.time)).astype("float64"))
        ds_t = ds_t.assign_coords(time=np.arange(len(ds_t.time)).astype("float64"))

        forecast, truth = ds_f[var], ds_t[var]
        forecast, truth = xr.align(forecast, truth)

        errors.append(forecast - truth)
        print(f"bias computed")

    err_stack = xr.concat(errors, dim='month')
    bias = err_stack.mean(dim='month')
    results[var] = bias
    print(f"mean bias computed")

xr.Dataset(results).to_netcdf('Global_gc_MBE_MAP_leadtimes.nc')
print("Bias dataset saved as 'Global_gc_MBE_MAP_leadtimes.nc'")

starting for t2m
starting for ../../Surface Variables/20240101/20240101_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240201/20240201_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240301/20240301_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240401/20240401_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240501/20240501_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240601/20240601_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240701/20240701_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240801/20240801_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20240901/20240901_gc_sv_q.nc
preprocessing done
bias computed
starting for ../../Surface Variables/20241001/20241001_gc_sv_q.nc
preprocessing done
bias co