# ERA5 Data to Daily Resolution

@author: Caroline Gasten
The script transforms all data needed for calculating the SPI and SPEI from ERA5 data from hourly to daily time steps.
- For temperature and dewpoint temperature, daily minima and maxima are calculated
- Hourly precipitation, surface net solar and surface net thermal radiation values are summed over 24h
- For mean sea level pressure and wind components the mean is used

## Settings

In [1]:
# importing necessary packages
import xarray as xr
import os
import rioxarray
import cftime

In [2]:
#paths
path_input = #path to raw ERA5 data
path_output = #path to directory with daily ERA5 data

In [3]:
#group variables by aggregation type
variables_minmax = ['t2m', '2d'] 
variables_sum = ['ssr', 'tp', 'str']
variables_mean = ['msl', 'u10', 'v10']

## Daily minimum and maximum of temperature data and dewpoint temperature data

Hourly data in the ERA5 dataset is always saved to the end of the time step if not instantaneous. For example, total precipitation data for 2002 includes the data from 2001-12-31 23:00:00 until 2002-12-31 23:00:00. Therefore, always the first hour of the next year is included and the first hour of the present year is discarded to compile the data to daily resolution with the daily interval starting at 12AM and going up to 12AM the next day.

In [None]:
for year in range(1993, 2023):
    print(year)
    for i in range(len(variables_minmax)):
        variable = variables_minmax[i]
        print(variable)
        
        #file paths for input and output files
        file_in = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year))
        file_in2 = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year+1))
        file_out_max = os.path.join(path_output, 'era5_%s_%4d_daily.nc' %(variable + 'max', year))
        file_out_min = os.path.join(path_output, 'era5_%s_%4d_daily.nc' %(variable + 'min', year))
        
        #open hourly datasets for year and subsequent year to get the first hour of the first day from the subsequent year
        ds = rioxarray.open_rasterio(file_in, mask_and_scale=True)
        ds2 = rioxarray.open_rasterio(file_in2, mask_and_scale=True).isel(time=0)
        
        #concatenate datasets
        ds = xr.concat([ds, ds2], dim='time')
        
        #aggregation to daily
        ds_max = ds.resample(time='D', closed='right').max().sel(time=slice(cftime.DatetimeGregorian(year, 1, 1, 0, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(year, 12, 31, 0, 0, 0, 0, has_year_zero=False)))
        ds_min = ds.resample(time='D', closed='right').min().sel(time=slice(cftime.DatetimeGregorian(year, 1, 1, 0, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(year, 12, 31, 0, 0, 0, 0, has_year_zero=False)))
        
        #save output
        ds_max.to_netcdf(file_out_max)
        ds_min.to_netcdf(file_out_min)

## Daily sum of total precipitation, surface net solar and surface net thermal radiation

In [None]:
for year in range(1993, 2023):
    print(year)
    for i in range(len(variables_sum)):
        variable = variables_sum[i]
        print(variable)
        
        #file paths for input and output files
        file_in = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year))
        file_in2 = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year+1))
        file_out = os.path.join(path_output, 'era5_%s_%4d_daily.nc' %(variable, year))
        
        #open hourly datasets for year and subsequent year to get the first hour of the first day from the subsequent year
        ds = rioxarray.open_rasterio(file_in, mask_and_scale=True)
        ds2 = rioxarray.open_rasterio(file_in2, mask_and_scale=True).isel(time=0)
        
        #concatenate datasets
        ds = xr.concat([ds, ds2], dim='time')
        
        #fill below zero precipitation values with zero (small negative values in dataset result from closing the energy balance in ERA5)
        if variable=='tp':
            ds = xr.where(ds<0, 0, ds)
        
        #aggregation to daily
        ds_sum = ds.resample(time='D').sum().sel(time=slice(cftime.DatetimeGregorian(year, 1, 1, 0, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(year, 12, 31, 0, 0, 0, 0, has_year_zero=False)))
        
        #save output
        ds_sum.to_netcdf(file_out)
        

## Daily mean of mean sea level pressure and wind components

In [None]:
for year in range(1993, 2023):
    print(year)
    for i in range(len(variables_sum)):
        variable = variables_mean[i]
        print(variable)
        
        #file paths for input and output files
        file_in = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year))
        file_in2 = os.path.join(path_input, 'era5_%s_%4d.nc' %(variable, year+1))
        file_out = os.path.join(path_output, 'era5_%s_%4d_daily.nc' %(variable, year))
        
        #open hourly datasets for year and subsequent year to get the first hour of the first day from the subsequent year
        ds = rioxarray.open_rasterio(file_in, mask_and_scale=True)
        ds2 = rioxarray.open_rasterio(file_in2, mask_and_scale=True).isel(time=0)
        
        #concatenate datasets
        ds = xr.concat([ds, ds2], dim='time')
        
        #aggregation to daily
        ds_mean = ds.resample(time='D').mean().sel(time=slice(cftime.DatetimeGregorian(year, 1, 1, 0, 0, 0, 0, has_year_zero=False), cftime.DatetimeGregorian(year, 12, 31, 0, 0, 0, 0, has_year_zero=False)))
        
        #save output
        ds_mean.to_netcdf(file_out)