In [1]:
# !pip install cfgrib xarray matplotlib pandas
# !pip install dask

In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import cfgrib

### ERA5_Monthly_averaged_reanalysis_by_hour_of-day_data_on_single_levels_from_2013-2023.grib

#### Combine Datasets

In [3]:
file_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\ERA5_Monthly_averaged_reanalysis_by_hour_of-day_data_on_single_levels_from_2013-2023.grib"

# Replace backslashes with forward slashes
updated_path = file_path.replace('\\', '/')

# Print the updated path
print("Original Path:", file_path)
print("Updated Path:", updated_path)

# Use open_datasets and NO open_dataset(from Error)
datasets = cfgrib.open_datasets(updated_path)
print(datasets)


Original Path: C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\ERA5_Monthly_averaged_reanalysis_by_hour_of-day_data_on_single_levels_from_2013-2023.grib
Updated Path: C:/Users/giann/OneDrive/Desktop/Thesis/Copernicus_Data/ERA5_Monthly_averaged_reanalysis_by_hour_of-day_data_on_single_levels_from_2013-2023.grib
[<xarray.Dataset> Size: 220MB
Dimensions:              (time: 528, latitude: 153, longitude: 341)
Coordinates:
    number               int32 4B 0
  * time                 (time) datetime64[ns] 4kB 2013-01-01 ... 2023-12-01T...
    step                 timedelta64[ns] 8B 00:00:00
    depthBelowLandLayer  float64 8B 0.0
  * latitude             (latitude) float64 1kB 72.0 71.75 71.5 ... 34.25 34.0
  * longitude            (longitude) float64 3kB -25.0 -24.75 ... 59.75 60.0
    valid_time           (time) datetime64[ns] 4kB 2013-01-01 ... 2023-12-01T...
Data variables:
    swvl1                (time, latitude, longitude) float32 110MB ...
    stl1                 (time, lati

#### Data Cleaning

In [4]:

# Define how to rename variables for clarity
rename_dict = {
    "t2m": "temperature_2m",
    "tp": "total_precipitation",
    "u10": "wind_u_component",
    "v10": "wind_v_component",
    "d2m": "dewpoint_temperature_2m",
    "sp": "surface_pressure",
    "swvl1": "volumetric_soil_water_layer_1",
    "swvl2": "volumetric_soil_water_layer_2",
    "stl1": "soil_temperature_level_1",
    "smlt": "snowmelt",
    "pev": "potential_evaporation",
    "e": "evaporation",
    "ro": "runoff",
    "slt": "soil_type"
}

# Process each dataset
for i, ds in enumerate(datasets):  # Loop through all datasets
    print(f"\nProcessing Dataset {i}...")  # Print which dataset is being processed

    # Fill missing values
    ds = ds.fillna(value=float("nan"))  # Replace any missing values with NaN
    print("Filled missing values.")

    # Remove duplicate time entries
    time_index = pd.Index(ds.time.values)  # Convert the time data to a pandas index so it works
    if time_index.duplicated().any():  # Check for duplicates
        ds = ds.sel(time=~time_index.duplicated())  # Keep only unique time entries
        print("Removed duplicate time steps.")

    # Validate variable ranges (only for specific variables)
    for var in ds.variables:  # Loop through all variables in the dataset
        if var in rename_dict:  # Check if the variable is in the rename list
            if "temperature" in rename_dict[var]:  # For temperature variables
                ds[var] = ds[var].where((ds[var] >= 200) & (ds[var] <= 350), float("nan"))  # Keep valid ranges
                print(f"Step 3: Validated range for {rename_dict[var]} (200K to 350K).")
            elif "precipitation" in rename_dict[var]:  # For precipitation variables
                ds[var] = ds[var].where(ds[var] >= 0, float("nan"))  # Precipitation must be non-negative
                print(f"Step 3: Validated range for {rename_dict[var]} (>= 0).")

    # Convert units for certain variables
    if "t2m" in ds.variables:  # Convert temperature from Kelvin to Celsius
        ds["t2m"] = ds["t2m"] - 273.15
        print("Converted temperature to Celsius.")

    if "sp" in ds.variables:  # Convert surface pressure from Pa to hPa
        ds["sp"] = ds["sp"] / 100
        print("Converted surface pressure to hPa.")

    if "u10" in ds.variables and "v10" in ds.variables:  # Calculate wind speed
        ds["wind_speed"] = (ds["u10"]**2 + ds["v10"]**2)**0.5
        print("Calculated wind speed.")

    # Add missing time steps (if any)
    expected_time = pd.date_range(start=str(ds.time.min().values), end=str(ds.time.max().values), freq="1M")
    if not ds.time.equals(expected_time):  # Check if any time steps are missing
        ds = ds.reindex(time=expected_time)  # Add the missing time steps, filling with NaN
        print("Fixed missing time steps.")

    # Resample data to monthly averages
    ds = ds.resample(time="ME").mean()  # Group data by month and calculate averages
    print("Resampled data to monthly averages.")

    # Rename variables
    existing_vars = set(ds.variables)  # Get the list of variables in this dataset
    rename_vars = {k: v for k, v in rename_dict.items() if k in existing_vars}  # Find variables to rename
    ds = ds.rename(rename_vars)  # Apply the renaming
    print("Renamed variables.")

    # Update metadata for renamed variables
    for var in ds.data_vars:  # Go through all variables in the dataset
        if var in rename_dict.values():  # If the variable was renamed
            ds[var].attrs["description"] = f"Cleaned variable: {var}"  # Add a description
    print("Updated metadata.")

    # Print the summary of the processed dataset
    print(f"\nProcessed Dataset {i} Summary:")  # Display a summary
    print(ds)



Processing Dataset 0...
Filled missing values.
Step 3: Validated range for soil_temperature_level_1 (200K to 350K).
Fixed missing time steps.
Resampled data to monthly averages.
Renamed variables.
Updated metadata.

Processed Dataset 0 Summary:
<xarray.Dataset> Size: 55MB
Dimensions:                        (time: 131, latitude: 153, longitude: 341)
Coordinates:
  * latitude                       (latitude) float64 1kB 72.0 71.75 ... 34.0
  * longitude                      (longitude) float64 3kB -25.0 -24.75 ... 60.0
    number                         int32 4B 0
    step                           timedelta64[ns] 8B 00:00:00
    depthBelowLandLayer            float64 8B 0.0
  * time                           (time) datetime64[ns] 1kB 2013-01-31 ... 2...
Data variables:
    volumetric_soil_water_layer_1  (time, latitude, longitude) float32 27MB n...
    soil_temperature_level_1       (time, latitude, longitude) float32 27MB n...
Attributes:
    GRIB_edition:            1
    GRIB_centre

#### Data Cleaning

### Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2013-2020

#### Combine Datasets & Cleaning 

In [5]:

folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2013-2020"

# Find all .nc files in the folder
file_list = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".nc")]

# Fix the 'step' dimension and ensure it's unique
def preprocess(ds):
    if "step" in ds.coords and "valid_time" in ds.coords:
        # Replace 'step' with 'valid_time'
        ds = ds.assign_coords(step=("step", ds["valid_time"].values))
    
    # Deduplicate or aggregate 'step'
    if "step" in ds.coords:
        # Remove duplicates, keeping the first occurrence
        _, index = np.unique(ds["step"], return_index=True)
        # ds = ds.isel(step=index) 
        ds = ds.groupby("step").mean()
    return ds

# Categorize files by member size
files_by_member_size = {25: [], 51: []}

for file in file_list:
    ds = xr.open_dataset(file)
    member_size = ds.sizes.get('member', None)
    if member_size == 25:
        files_by_member_size[25].append(file)
    elif member_size == 51:
        files_by_member_size[51].append(file)

# Combine datasets
combined_datasets = {}

for member_size, files in files_by_member_size.items():
    if files:
        # Use open_mfdataset to process files and reduce memory usage
        
        # Chunking divides the dataset into manageable parts based on one or more dimensions
        # Each "chunk" is a smaller portion of the dataset, and operations are performed on these
        # chunks sequentially or in parallel, which helps reduce memory usage and allows processing 
        # of datasets that are too large to fit into RAM.
        ds = xr.open_mfdataset(
            files,
            preprocess=preprocess,
            combine="nested", #assumes that the files are ordered logically in the same way as the desired concatenation order
            concat_dim="member",
            chunks={"step": 10}  
        )
        
        # Sort the dataset by 'step' in ascending order
        ds = ds.sortby("step", ascending=True)

        
        # Fill missing values
        ds = ds.fillna(float("nan"))  # Replace missing data with NaN
        print(f"Filled missing values for dataset with {member_size} members.")

        # Validate data ranges
        if "discharge" in ds.variables:  # Ensure river discharge is non-negative
            ds["discharge"] = ds["discharge"].where(ds["discharge"] >= 0, float("nan"))
            print("Validated river discharge values (>= 0).")

        if "temperature" in ds.variables:  # Ensure temperature is within a realistic range
            ds["temperature"] = ds["temperature"].where((ds["temperature"] >= 200) & (ds["temperature"] <= 350), float("nan"))
            print("Validated temperature range (200K to 350K).")

        # Convert units
        if "temperature" in ds.variables:  # Convert temperature from Kelvin to Celsius
            ds["temperature"] = ds["temperature"] - 273.15
            print("Converted temperature to Celsius.")

        if "precipitation" in ds.variables:  # Convert precipitation to millimeters
            ds["precipitation"] = ds["precipitation"] * 1000
            print("Converted precipitation to millimeters.")

        # Rename variables for clarity
        rename_dict = {
            "discharge": "river_discharge",
            "temperature": "air_temperature",
            "precipitation": "total_precipitation"
        }
        existing_vars = set(ds.variables)  # Get all variables in the dataset
        rename_vars = {k: v for k, v in rename_dict.items() if k in existing_vars}  # Find variables to rename
        ds = ds.rename(rename_vars)  # Apply the renaming
        print("Renamed variables for clarity.")

        # Update metadata
        for var in ds.data_vars:  # Add descriptions to all variables
            ds[var].attrs["description"] = f"Cleaned variable: {var}"
        print("Updated metadata.")

        
        combined_datasets[member_size] = ds
        print(f"Combined dataset with {member_size} members:")
        print(ds)
    else:
        print(f"No datasets found with {member_size} members.")


Filled missing values for dataset with 25 members.
Renamed variables for clarity.
Updated metadata.
Combined dataset with 25 members:
<xarray.Dataset> Size: 1TB
Dimensions:                       (step: 54, y: 950, x: 1000, member: 1200)
Coordinates:
  * y                             (y) float64 8kB 5.498e+06 ... 7.525e+05
  * x                             (x) float64 8kB 2.502e+06 ... 7.498e+06
  * step                          (step) datetime64[ns] 432B 2013-01-31 ... 2...
    number                        (member) int64 10kB dask.array<chunksize=(25,), meta=np.ndarray>
Dimensions without coordinates: member
Data variables:
    rdis                          (step, y, x, member) float32 246GB dask.array<chunksize=(1, 950, 1000, 25), meta=np.ndarray>
    latitude                      (member, step, y, x) float32 246GB dask.array<chunksize=(25, 1, 950, 1000), meta=np.ndarray>
    longitude                     (member, step, y, x) float32 246GB dask.array<chunksize=(25, 1, 950, 1000), met

### Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2021-2023

#### See Dimension of Dataset

In [6]:

folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2021-2023"

# Get the list of all .nc files in the folder
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".nc")]

# Go through each file and show its dimensions
for idx, file_path in enumerate(file_list):
    try:
        # Open the dataset
        ds = xr.open_dataset(file_path)
        
        # Print the file name and its dimensions
        print(f"Dataset {idx + 1}: {os.path.basename(file_path)}")
        print(ds.dims)  # Show dataset dimensions
        print("-" * 40)  # Add a separator for clarity

        
        # Close the dataset to free space in memory
        ds.close()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")


Dataset 1: fairCRPSS_seas5_EFAS_01_v1.nc
----------------------------------------
Dataset 2: fairCRPSS_seas5_EFAS_02_v1.nc
----------------------------------------
Dataset 3: fairCRPSS_seas5_EFAS_03_v1.nc
----------------------------------------
Dataset 4: fairCRPSS_seas5_EFAS_04_v1.nc
----------------------------------------
Dataset 5: fairCRPSS_seas5_EFAS_05_v1.nc
----------------------------------------
Dataset 6: fairCRPSS_seas5_EFAS_06_v1.nc
----------------------------------------
Dataset 7: fairCRPSS_seas5_EFAS_07_v1.nc
----------------------------------------
Dataset 8: fairCRPSS_seas5_EFAS_08_v1.nc
----------------------------------------
Dataset 9: fairCRPSS_seas5_EFAS_09_v1.nc
----------------------------------------
Dataset 10: fairCRPSS_seas5_EFAS_10_v1.nc
----------------------------------------
Dataset 11: fairCRPSS_seas5_EFAS_11_v1.nc
----------------------------------------
Dataset 12: fairCRPSS_seas5_EFAS_12_v1.nc
----------------------------------------
Dataset 13: r

#### Combine Datasets & Cleaning 

In [7]:

# # Path to the folder containing the .nc files
# folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2021-2023"

# # Get a list of all .nc files in the folder
# file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".nc")]

# # Function to preprocess each dataset before combining
# def preprocess(ds):
#     # Ensure 'longitude' and 'latitude' are coordinates (not just variables)
#     if "longitude" not in ds.coords and "longitude" in ds.variables:
#         ds = ds.set_coords("longitude")
#     if "latitude" not in ds.coords and "latitude" in ds.variables:
#         ds = ds.set_coords("latitude")

#     # If 'step' dimension is missing but 'time' exists, create a dummy 'step'
#     if "step" not in ds.sizes and "time" in ds.sizes:
#         ds["step"] = ("time", np.arange(ds.sizes["time"]))  # Add sequential numbers as 'step'
#         ds = ds.swap_dims({"time": "step"})  # Use 'step' as the main dimension instead of 'time'

#     # If 'step' exists, adjust its values to be in days
#     if "step" in ds.coords:
#         step_values = ds["step"].values
#         # Convert 'step' values to days
#         step_values_in_days = (step_values - step_values.min()) / (60 * 60 * 24 * 1e9)
#         ds = ds.assign_coords(step=("step", step_values_in_days.astype("float64")))

#         # Sort by 'step' and remove duplicates
#         ds = ds.sortby("step")
#         _, index = np.unique(ds["step"], return_index=True)
#         ds = ds.isel(step=index)

#     return ds

# # Function to combine multiple datasets into one
# def combine_datasets(files):
#     try:
#         # Use xarray to open and combine multiple files
#         combined = xr.open_mfdataset(
#             files,
#             preprocess=preprocess,  # Preprocess each file before combining
#             combine="by_coords",    # Match data using their coordinates (like latitude, longitude)
#             chunks={"step": 10}     # Break data into smaller pieces for faster processing
#         )
#         print("Datasets combined successfully!")
#         return combined
#     except Exception as e:
#         print(f"Error combining datasets: {e}")
#         return None

# # Combine the first half of the files
# ds_combined1 = combine_datasets(file_list[:16])  # Process first 16 files
# if ds_combined1:
#     print("First combined dataset:")
#     print(ds_combined1)

# # Combine the second half of the files
# ds_combined2 = combine_datasets(file_list[16:])  # Process remaining files
# if ds_combined2:
#     print("Second combined dataset:")
#     print(ds_combined2)








# Path to the folder with all .nc files
folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2021-2023"

# Get a list of all .nc files in the folder
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".nc")]

# Function to clean and prepare each dataset
def preprocess(ds):
    # Make sure 'longitude' and 'latitude' are treated as coordinates
    if "longitude" not in ds.coords and "longitude" in ds.variables:
        ds = ds.set_coords("longitude")
    if "latitude" not in ds.coords and "latitude" in ds.variables:
        ds = ds.set_coords("latitude")

    # If 'step' is missing, create it from 'time'
    if "step" not in ds.sizes and "time" in ds.sizes:
        ds["step"] = ("time", np.arange(ds.sizes["time"]))  # Add step values
        ds = ds.swap_dims({"time": "step"})  # Use 'step' instead of 'time'

    # Convert 'step' values to days
    if "step" in ds.coords:
        step_values = ds["step"].values
        step_values_in_days = (step_values - step_values.min()) / (60 * 60 * 24 * 1e9)  # Convert to days
        ds = ds.assign_coords(step=("step", step_values_in_days.astype("float64")))
        ds = ds.sortby("step")  # Sort steps by value

    # Replace missing values with 0
    ds = ds.fillna(0)

    # Remove duplicate steps
    if "step" in ds.dims:
        _, index = np.unique(ds["step"], return_index=True)
        ds = ds.isel(step=index)

    # Make sure values are within reasonable ranges
    for var in ds.data_vars:
        ds[var] = ds[var].clip(min=0)  # Set all negative values to 0

    # Remove outliers (extreme values)
    for var in ds.data_vars:
        if isinstance(ds[var].data, np.ndarray):  # Only apply if not using dask
            lower = ds[var].quantile(0.01).compute()
            upper = ds[var].quantile(0.99).compute()
            ds[var] = ds[var].where((ds[var] >= lower) & (ds[var] <= upper), np.nan)

    # Make sure latitude and longitude values are valid
    if "latitude" in ds.coords and "longitude" in ds.coords:
        valid_lat = (ds["latitude"] >= -90) & (ds["latitude"] <= 90)
        valid_lon = (ds["longitude"] >= -180) & (ds["longitude"] <= 180)
        ds = ds.where(valid_lat.compute() & valid_lon.compute(), drop=True)  # Keep only valid coordinates

    # Add metadata to show the dataset was cleaned
    ds.attrs["processed"] = "True"
    ds.attrs["description"] = "Cleaned dataset with missing values fixed and outliers removed."

    return ds

# Function to combine multiple datasets into one
def combine_datasets(files):
    try:
        # Open and combine datasets
        combined = xr.open_mfdataset(
            files,
            preprocess=preprocess,  # Clean each dataset before combining
            combine="by_coords",    # Match datasets by their coordinates
            chunks={}               # Avoid chunking issues
        )
        # Fix chunking along spatial dimensions to avoid errors
        combined = combined.chunk({"y": -1, "x": -1})
        print("Datasets combined successfully!")
        return combined
    except Exception as e:
        print(f"Error combining datasets: {e}")
        return None

# Combine the first half of the datasets
ds_combined1 = combine_datasets(file_list[:16])  # Process the first set of files
if ds_combined1:
    print("First combined dataset:")
    print(ds_combined1)

# Combine the second half of the datasets
ds_combined2 = combine_datasets(file_list[16:])  # Process the second set of files
if ds_combined2:
    print("Second combined dataset:")
    print(ds_combined2)





Datasets combined successfully!
First combined dataset:
<xarray.Dataset> Size: 5GB
Dimensions:                       (time: 1, y: 950, x: 1000, step: 24,
                                   member: 51)
Coordinates:
  * time                          (time) datetime64[ns] 8B 2016-01-01
  * y                             (y) int32 4kB 5497500 5492500 ... 752500
  * x                             (x) int32 4kB 2502500 2507500 ... 7497500
  * step                          (step) float64 192B -8.031e+04 ... 9.956e+04
    latitude                      (y, x) float32 4MB dask.array<chunksize=(950, 1000), meta=np.ndarray>
    longitude                     (y, x) float32 4MB dask.array<chunksize=(950, 1000), meta=np.ndarray>
    number                        (member) int64 408B dask.array<chunksize=(51,), meta=np.ndarray>
    valid_time                    (step) datetime64[ns] 192B dask.array<chunksize=(7,), meta=np.ndarray>
Dimensions without coordinates: member
Data variables:
    fairCRPSS      

### Climate_indicators_for_Europe_from_2013_to_2023_derived_from_reanalysis

#### Combine Datasets

In [9]:
folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Climate_indicators_for_Europe_from_2013_to_2023_derived_from_reanalysis"

# Find all .nc files in the folder
file_list = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".nc")]

# Open and combine multiple datasets using nested merging
combined_dataset = xr.open_mfdataset(file_list, combine='nested', concat_dim='step', coords='minimal')
# Combine files in order
# Merge along the 'step' dimension
# Use only needed coordinates

print("Datasets combined successfully!")


Datasets combined successfully!


#### Data Cleaning

In [10]:
# Fill missing values
combined_dataset = combined_dataset.fillna(float("nan"))  # Replace missing values with NaN
print("Filled missing values.")

# Validate data ranges. 
if "temperature" in combined_dataset.variables:
    # Most global atmospheric temperatures fall between 200 K (-73°C) and 350 K (77°C)
    combined_dataset["temperature"] = combined_dataset["temperature"].where(
        (combined_dataset["temperature"] >= 200) & (combined_dataset["temperature"] <= 350), float("nan")
    )
    print("Validated temperature range (200K to 350K).")

if "precipitation" in combined_dataset.variables:
    # Precipitation cannot be negative
    combined_dataset["precipitation"] = combined_dataset["precipitation"].where(
        combined_dataset["precipitation"] >= 0, float("nan")
    )
    print("Validated precipitation (>= 0).")

# Convert units
if "temperature" in combined_dataset.variables:
    # Convert temperature from Kelvin to Celsius
    combined_dataset["temperature"] = combined_dataset["temperature"] - 273.15
    combined_dataset["temperature"].attrs["units"] = "Celsius"
    print("Converted temperature to Celsius.")

if "precipitation" in combined_dataset.variables:
     # Convert precipitation to millimeters
    combined_dataset["precipitation"] = combined_dataset["precipitation"] * 1000
    combined_dataset["precipitation"].attrs["units"] = "mm"
    print("Converted precipitation to millimeters.")

# Rename variables for clarity
# Example rename dictionary
rename_dict = {
    "temperature": "air_temperature",
    "precipitation": "total_precipitation",
    "discharge": "river_discharge"
}
# Get all variables in the dataset
existing_vars = set(combined_dataset.variables)
# Find variables to rename
rename_vars = {k: v for k, v in rename_dict.items() if k in existing_vars}
combined_dataset = combined_dataset.rename(rename_vars)
print("Renamed variables for clarity.")

# Update metadata
# Add descriptions to each variable
for var in combined_dataset.data_vars:
    combined_dataset[var].attrs["description"] = f"Cleaned variable: {var}"
print("Updated metadata.")

# Print the summary of the cleaned dataset
print("\nCleaned Dataset Summary:")
print(combined_dataset)


Filled missing values.
Renamed variables for clarity.
Updated metadata.

Cleaned Dataset Summary:
<xarray.Dataset> Size: 36GB
Dimensions:      (step: 20, time: 1008, lat: 185, lon: 271)
Coordinates:
  * time         (time) datetime64[ns] 8kB 1940-01-01 1940-02-01 ... 2023-12-01
  * lat          (lat) float64 1kB 26.5 26.75 27.0 27.25 ... 72.0 72.25 72.5
  * lon          (lon) float64 2kB -22.0 -21.75 -21.5 -21.25 ... 45.0 45.25 45.5
    realization  int64 8B 0
Dimensions without coordinates: step
Data variables:
    t2m          (step, time, lat, lon) float64 8GB dask.array<chunksize=(1, 1008, 185, 271), meta=np.ndarray>
    data         (step, time, lat, lon) float64 8GB dask.array<chunksize=(1, 1008, 185, 271), meta=np.ndarray>
    tp           (step, time, lat, lon) float32 4GB dask.array<chunksize=(1, 1008, 185, 271), meta=np.ndarray>
    cdd          (step, time, lat, lon) float64 8GB dask.array<chunksize=(1, 1008, 185, 271), meta=np.ndarray>
    fwi          (step, time, lat, lon

#### See Dimension of Dataset

In [11]:
folder_path = r"C:\Users\giann\OneDrive\Desktop\Thesis\Copernicus_Data\Multi-model_seasonal_reforecasts_of_river_discharge_for Europe_2021-2023"

# Get the list of all .nc files in the folder
file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".nc")]

# Loop through each file and print its dimensions
for idx, file_path in enumerate(file_list):
    try:
        # Open the dataset
        ds = xr.open_dataset(file_path)
        
        # Print the Dataset name and its dimensions
        print(f"Dataset {idx + 1}: {os.path.basename(file_path)}")
        print(ds.dims)  # Show dataset dimensions
        print("-" * 40)  # Add a separator for clarity
        
         # Close the dataset to save memory
        ds.close()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

Dataset 1: fairCRPSS_seas5_EFAS_01_v1.nc
----------------------------------------
Dataset 2: fairCRPSS_seas5_EFAS_02_v1.nc
----------------------------------------
Dataset 3: fairCRPSS_seas5_EFAS_03_v1.nc
----------------------------------------
Dataset 4: fairCRPSS_seas5_EFAS_04_v1.nc
----------------------------------------
Dataset 5: fairCRPSS_seas5_EFAS_05_v1.nc
----------------------------------------
Dataset 6: fairCRPSS_seas5_EFAS_06_v1.nc
----------------------------------------
Dataset 7: fairCRPSS_seas5_EFAS_07_v1.nc
----------------------------------------
Dataset 8: fairCRPSS_seas5_EFAS_08_v1.nc
----------------------------------------
Dataset 9: fairCRPSS_seas5_EFAS_09_v1.nc
----------------------------------------
Dataset 10: fairCRPSS_seas5_EFAS_10_v1.nc
----------------------------------------
Dataset 11: fairCRPSS_seas5_EFAS_11_v1.nc
----------------------------------------
Dataset 12: fairCRPSS_seas5_EFAS_12_v1.nc
----------------------------------------
Dataset 13: r