# Aggregate hourly netCDF data to daily sums

### Single file

In [None]:
import xarray as xr
import pandas as pd

# Define the file path to the netCDF file
netcdf_file_path = input("Enter the path to the hourly netCDF file: ")
# Add 'daily_aggregated' to the file name
output_daily_file_path = netcdf_file_path.replace('.nc', '_daily_aggregated.nc')

# Open the netCDF dataset
ds = xr.open_dataset(netcdf_file_path)

# Ensure the 'time' coordinate is in datetime format
ds['time'] = pd.to_datetime(ds['time'].values)

# Resample data to daily frequency, summing the values for each day
ds_daily = ds.resample(time='1D').sum()

# Save the daily aggregated dataset to a new netCDF file
ds_daily.to_netcdf(output_daily_file_path)

# Close the dataset
ds.close()
ds_daily.close()

print(f"Daily aggregated data saved to {output_daily_file_path}")


### Loop through all files in a directory and aggregate hourly data to daily

In [None]:
import xarray as xr
import pandas as pd
import os
from pathlib import Path

def get_aggregation_method(file_name):
    if "evaporation" in file_name or "total_precipitation" in file_name or "surface_net_solar_radiation" in file_name:
        return "sum"
    elif "2m_temperature" in file_name and "maximum" not in file_name and "minimum" not in file_name:
        return "mean"
    elif "maximum_2m_temperature" in file_name:
        return "max"
    elif "minimum_2m_temperature" in file_name:
        return "min"
    else:
        return "sum"  # Default to sum if the variable is not recognized

def process_nc_file(file_path):
    # Open the netCDF dataset
    ds = xr.open_dataset(file_path)

    if 'time' in ds.coords:
        print("time coordinate exists")
        # Ensure the 'time' coordinate is in datetime format
        ds['time'] = pd.to_datetime(ds['time'].values)
    elif 'valid_time' in ds.coords:
        print("valid_time coordinate exists")
        # Convert 'valid_time' to datetime and rename it to 'time'
        ds['valid_time'] = pd.to_datetime(ds['valid_time'].values)
        ds = ds.rename({'valid_time': 'time'})
    else:
        print("time coordinate does not exist")
    

    # Determine the aggregation method based on the file name
    agg_method = get_aggregation_method(file_path.name.lower())

    # Resample data to daily frequency, applying the appropriate aggregation method
    if agg_method == "sum":
        ds_daily = ds.resample(time='1D').sum()
    elif agg_method == "mean":
        ds_daily = ds.resample(time='1D').mean()
    elif agg_method == "max":
        ds_daily = ds.resample(time='1D').max()
    elif agg_method == "min":
        ds_daily = ds.resample(time='1D').min()

    # Generate output file path
    output_file_path = file_path.with_name(f"{file_path.stem}_daily_aggregated_{agg_method}{file_path.suffix}")

    # Save the daily aggregated dataset to a new netCDF file
    ds_daily.to_netcdf(output_file_path)

    # Close the datasets
    ds.close()
    ds_daily.close()

    print(f"Daily aggregated data saved to {output_file_path} using {agg_method} method")

def main():
    # Get the folder path from the user
    folder_path = input("Enter the path to the folder containing .nc files: ")
    folder = Path(folder_path)

    # Check if the folder exists
    if not folder.is_dir():
        print(f"The folder {folder_path} does not exist.")
        return

    # Loop through all .nc files in the specified folder
    nc_files = list(folder.glob('*.nc'))
    
    if not nc_files:
        print(f"No .nc files found in {folder_path}")
        return

    for file_path in nc_files:
        print(f"Processing {file_path}...")
        process_nc_file(file_path)

    print("All files processed successfully.")

main()
