<a href="https://colab.research.google.com/github/costpetrides/FAIRMODE-WG5/blob/main/JRC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install netCDF4

Collecting netCDF4
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m84.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


In [6]:
import netCDF4 as nc
import numpy as np
import os

# === Ρυθμίσεις ===
input_dir = "/content/XGBSF"
output_dir = "/content/Results"
variables_to_fix = ["SURF_ug_NO2", "SURF_ppb_O3", "SURF_ug_PM25_rh50"]
correct_dims = ("time", "lat", "lon")
os.makedirs(output_dir, exist_ok=True)

reshaped_count = 0
copied_count = 0

for file in os.listdir(input_dir):
    if file.endswith(".nc"):
        input_path = os.path.join(input_dir, file)
        output_path = os.path.join(output_dir, file)

        print(f"\nProcessing: {file}")

        with nc.Dataset(input_path, "r") as src:
            lon = src.variables["lon"][:]
            lat = src.variables["lat"][:]
            time = src.variables["time"][:]
            global_attrs = {attr: src.getncattr(attr) for attr in src.ncattrs()}

            reshaped_vars = {}
            copied_vars = {}

            for var_name in variables_to_fix:
                if var_name not in src.variables:
                    print(f"  ⚠ {var_name} not found in {file}. Skipping this variable.")
                    continue

                dims = src.variables[var_name].dimensions
                data = src.variables[var_name][:]
                var_attrs = {attr: src.variables[var_name].getncattr(attr) for attr in src.variables[var_name].ncattrs()}

                if dims != correct_dims:
                    # === Εφαρμόζουμε reshape ===
                    print(f"  🔄 Reshaping {var_name}: {dims} -> {correct_dims}")
                    if dims == ("lon", "lat", "time"):
                        data_reshaped = np.transpose(data, (2, 1, 0))
                    elif dims == ("lat", "lon", "time"):
                        data_reshaped = np.transpose(data, (2, 0, 1))
                    else:
                        print(f"  ⚠ Unknown dims {dims} for {var_name}. Skipping this variable.")
                        continue
                    reshaped_vars[var_name] = (data_reshaped, var_attrs)
                else:
                    # === Είναι ήδη σωστή, απλή αντιγραφή ===
                    print(f"  ✅ {var_name} already correct. Copying as-is.")
                    copied_vars[var_name] = (data, var_attrs)

        # === Δημιουργία output NetCDF ===
        with nc.Dataset(output_path, "w", format="NETCDF4") as dst:
            dst.createDimension("lon", len(lon))
            dst.createDimension("lat", len(lat))
            dst.createDimension("time", None)

            lon_var = dst.createVariable("lon", "f4", ("lon",))
            lat_var = dst.createVariable("lat", "f4", ("lat",))
            time_var = dst.createVariable("time", "f4", ("time",))

            lon_var[:] = lon
            lat_var[:] = lat
            time_var[:] = time

            # === Global attributes ===
            for attr, value in global_attrs.items():
                dst.setncattr(attr, value)

            # === Γράφουμε reshaped μεταβλητές ===
            for var_name, (data_reshaped, var_attrs) in reshaped_vars.items():
                fill_value = var_attrs.pop("_FillValue", -9999.0)
                var_out = dst.createVariable(var_name, "f4", correct_dims, fill_value=fill_value)
                for attr, value in var_attrs.items():
                    var_out.setncattr(attr, value)
                var_out[:, :, :] = data_reshaped
                reshaped_count += 1

            # === Γράφουμε copied μεταβλητές ===
            for var_name, (data_original, var_attrs) in copied_vars.items():
                fill_value = var_attrs.pop("_FillValue", -9999.0)
                var_out = dst.createVariable(var_name, "f4", correct_dims, fill_value=fill_value)
                for attr, value in var_attrs.items():
                    var_out.setncattr(attr, value)
                var_out[:, :, :] = data_original
                copied_count += 1

        print(f"  📦 File written to: {output_path}")

# === Τελικό summary ===
print(f"\n✅ Batch process completed!")
print(f"  ➡ Variables reshaped: {reshaped_count}")
print(f"  ➡ Variables copied (already correct): {copied_count}")



Processing: BaseCase_UoA_NO2_SCA.Neigh.Add.XGB_CORR_YEARLY.nc
  🔄 Reshaping SURF_ug_NO2: ('lon', 'lat', 'time') -> ('time', 'lat', 'lon')
  ⚠ SURF_ppb_O3 not found in BaseCase_UoA_NO2_SCA.Neigh.Add.XGB_CORR_YEARLY.nc. Skipping this variable.
  ⚠ SURF_ug_PM25_rh50 not found in BaseCase_UoA_NO2_SCA.Neigh.Add.XGB_CORR_YEARLY.nc. Skipping this variable.
  📦 File written to: /content/Results/BaseCase_UoA_NO2_SCA.Neigh.Add.XGB_CORR_YEARLY.nc

Processing: BaseCase_UoA_O3_SCA.Neigh.Add.XGB_CORR_YEARLY.nc
  ⚠ SURF_ug_NO2 not found in BaseCase_UoA_O3_SCA.Neigh.Add.XGB_CORR_YEARLY.nc. Skipping this variable.
  🔄 Reshaping SURF_ppb_O3: ('lon', 'lat', 'time') -> ('time', 'lat', 'lon')
  ⚠ SURF_ug_PM25_rh50 not found in BaseCase_UoA_O3_SCA.Neigh.Add.XGB_CORR_YEARLY.nc. Skipping this variable.
  📦 File written to: /content/Results/BaseCase_UoA_O3_SCA.Neigh.Add.XGB_CORR_YEARLY.nc

Processing: Scen_UoA_NO2_SCA.Neigh.Add.XGB_CORR_YEARLY.nc
  🔄 Reshaping SURF_ug_NO2: ('lon', 'lat', 'time') -> ('time', '