In [None]:
import xarray as xr
import numpy as np

In [None]:
# Merge Surface Variables (t2m, u10, v10, msl) with the Pressure Variable (Q) for GraphCast, IFS-HRES, AIFS
# The DataProcessing_TimeFix script has to be run before this one
# Merging scripts include an additional check that the longitudes are in the needed -180 to 180 range
# Merging scripts also changes the unit of specific humidity (Q) from kg/kg to g/kg

In [None]:
# Merge GraphCast Surface Variables with Q
# Run this for every Month

In [None]:
# Load datasets
gc = xr.open_dataset("Surface Variables/20241201/20241201_gc_sv.nc")
gcq = xr.open_dataset("Pressure Variables/20241201/20241201_gc_q.nc")

# --- Step 1: Ensure longitudes are in [-180, 180] ---
def to_180(longitudes):
    if np.any(longitudes > 180):
        print("Converting longitudes to [-180, 180] range.")
        return (((longitudes + 180) % 360) - 180)
    else:
        print("Longitudes already in [-180, 180] range.")
        return longitudes

gcq = gcq.assign_coords(longitude=to_180(gcq.longitude)).sortby("longitude")
gc = gc.assign_coords(longitude=to_180(gc.longitude)).sortby("longitude")


# Align q variable to gc's grid
gc = gc.transpose("step", "latitude", "longitude")
q_aligned = gcq["q"].transpose("step", "latitude", "longitude")
q_reindexed = q_aligned.interp(longitude=gc.longitude)

# Convert units from kg/kg to g/kg
q_converted = q_reindexed * 1000
q_converted.attrs = q_reindexed.attrs.copy()  # Copy metadata
q_converted.attrs["units"] = "g/kg"            # Update units

# Add to gc dataset
gc["q"] = q_converted

# save the merged dataset
gc.to_netcdf("Surface Variables/20241201/20241201_gc_sv_q.nc")

In [None]:
# Merge AIFS Surface Variables with Q
# Run this for every Month

In [None]:
# Load the datasets
aisv = xr.open_dataset("Surface Variables/20240301/20240301_marsai_sv.nc")
aiq = xr.open_dataset("Pressure Variables/20240301/20240301_marsai_q.nc")

# --- Step 1: Ensure longitudes are in [-180, 180] ---
def to_180(longitudes):
    if np.any(longitudes > 180):
        print("Converting longitudes to [-180, 180] range.")
        return (((longitudes + 180) % 360) - 180)
    else:
        print("Longitudes already in [-180, 180] range.")
        return longitudes

aiq = aiq.assign_coords(longitude=to_180(aiq.longitude)).sortby("longitude")
aisv = aisv.assign_coords(longitude=to_180(aisv.longitude)).sortby("longitude")

# --- Step 2: Align longitudes (interpolate if necessary) ---
if not np.allclose(aiq.longitude, aisv.longitude):
    print("Interpolating aiq to match aisv longitude grid.")
    q_interp = aiq["q"].interp(longitude=aisv.longitude).transpose("time", "latitude", "longitude")
else:
    print("Longitudes already aligned. No interpolation needed.")
    q_interp = aiq["q"].transpose("time", "latitude", "longitude")

aisv = aisv.transpose("time", "latitude", "longitude")


# --- Step 3: Convert units from kg/kg to g/kg ---
q_g_per_kg = q_interp * 1000
q_g_per_kg.attrs = q_interp.attrs.copy()
q_g_per_kg.attrs["units"] = "g/kg"

# --- Step 4: Add q to aisv dataset ---
aisv["q"] = q_g_per_kg

# Save to file
aisv.to_netcdf("Surface Variables/20240301/20240301_marsai_sv_q.nc")

In [None]:
# Merge IFS-HRES Surface Variables with Q
# Run this for every Month

In [None]:
# Load the datasets
fcsv = xr.open_dataset("Surface Variables/20241101/20241101_marsfc_sv.nc")
fcq = xr.open_dataset("Pressure Variables/20241101/20241101_marsfc_q.nc")

# --- Step 1: Ensure longitudes are in [-180, 180] ---
def to_180(longitudes):
    if np.any(longitudes > 180):
        print("Converting longitudes to [-180, 180] range.")
        return (((longitudes + 180) % 360) - 180)
    else:
        print("Longitudes already in [-180, 180] range.")
        return longitudes

fcq = fcq.assign_coords(longitude=to_180(fcq.longitude)).sortby("longitude")
fcsv = fcsv.assign_coords(longitude=to_180(fcsv.longitude)).sortby("longitude")

# --- Step 2: Align longitudes (interpolate if necessary) ---
if not np.allclose(fcq.longitude, fcsv.longitude):
    print("Interpolating fcq to match aisv longitude grid.")
    q_interp = fcq["q"].interp(longitude=fcsv.longitude).transpose("time", "latitude", "longitude")
else:
    print("Longitudes already aligned. No interpolation needed.")
    q_interp = fcq["q"].transpose("time", "latitude", "longitude")

fcsv = fcsv.transpose("time", "latitude", "longitude")


# --- Step 3: Convert units from kg/kg to g/kg ---
q_g_per_kg = q_interp * 1000
q_g_per_kg.attrs = q_interp.attrs.copy()
q_g_per_kg.attrs["units"] = "g/kg"

# --- Step 4: Add q to aisv dataset ---
fcsv["q"] = q_g_per_kg

# Save to file
fcsv.to_netcdf("Surface Variables/20241101/20241101_marsfc_sv_q.nc")

In [None]:
# Merge ERA5 (Time version for GC and AIFS) Surface Variables with Q
# Run this for every Month

In [None]:
# Load datasets
erasv = xr.open_dataset("Surface Variables/20240301/20240301_era5_gcai_sv.nc")
eraq = xr.open_dataset("Pressure Variables/20240301/20240301_era5_gcai_q.nc")

# --- Step 1: Ensure longitudes are in [-180, 180] ---
def to_180(longitudes):
    if np.any(longitudes > 180):
        print("Converting longitudes to [-180, 180] range.")
        return (((longitudes + 180) % 360) - 180)
    else:
        print("Longitudes already in [-180, 180] range.")
        return longitudes

eraq = eraq.assign_coords(longitude=to_180(eraq.longitude)).sortby("longitude")
erasv = erasv.assign_coords(longitude=to_180(erasv.longitude)).sortby("longitude")

# Align eraq variable to erasv's grid
# Extract q and squeeze out the pressure_level (since it only has one value)
q_aligned = eraq["q"].squeeze(dim="pressure_level", drop=True).transpose("valid_time", "latitude", "longitude")  # Shape: (valid_time, latitude, longitude)
q_reindexed = q_aligned.interp(longitude=erasv.longitude)
erasv = erasv.transpose("valid_time", "latitude", "longitude")

# Convert units from kg/kg to g/kg
q_converted = q_reindexed * 1000
q_converted.attrs = q_reindexed.attrs.copy()  # Copy metadata
q_converted.attrs["units"] = "g/kg"            # Update units

# Add to gc dataset
erasv["q"] = q_converted

# save the merged dataset
erasv.to_netcdf("Surface Variables/20240301/20240301_era5_gcai_sv_q.nc")

In [None]:
# Merge ERA5 (Time version for IFS-HRES) Surface Variables with Q
# Run this for every Month

In [None]:
# Load datasets
erasv = xr.open_dataset("Surface Variables/20240301/20240301_era5_fc_sv.nc")
eraq = xr.open_dataset("Pressure Variables/20240301/20240301_era5_fc_q.nc")

# --- Step 1: Ensure longitudes are in [-180, 180] ---
def to_180(longitudes):
    if np.any(longitudes > 180):
        print("Converting longitudes to [-180, 180] range.")
        return (((longitudes + 180) % 360) - 180)
    else:
        print("Longitudes already in [-180, 180] range.")
        return longitudes

eraq = eraq.assign_coords(longitude=to_180(eraq.longitude)).sortby("longitude")
erasv = erasv.assign_coords(longitude=to_180(erasv.longitude)).sortby("longitude")


# Align q variable to era5_sv's grid
# Extract q and squeeze out the pressure_level (since it only has one value)
q_aligned = eraq["q"].squeeze(dim="pressure_level", drop=True).transpose("valid_time", "latitude", "longitude")  # Shape: (valid_time, latitude, longitude)
q_reindexed = q_aligned.interp(longitude=erasv.longitude)
erasv = erasv.transpose("valid_time", "latitude", "longitude")


# Convert units from kg/kg to g/kg
q_converted = q_reindexed * 1000
q_converted.attrs = q_reindexed.attrs.copy()  # Copy metadata
q_converted.attrs["units"] = "g/kg"            # Update units

# Add to gc dataset
erasv["q"] = q_converted

# save the merged dataset
erasv.to_netcdf("Surface Variables/20240301/20240301_era5_fc_sv_q.nc")