In [3]:
import xarray as xr

import glob
import os

import time
import tqdm.notebook as tq

### Resample hourly data to daily values

In [5]:
# read folder with files and versions of ERA's
parent_folder = 'E:\ERA5_data\CONUS'
my_folder = glob.glob(parent_folder + '\*')

# list of variables
my_variables = [glob.glob(folder + '\*') for folder in my_folder]
# from glob to create single folders
single_level, prelim_level = my_variables
# list of variables (names)
variables = [var.split('\\')[-1] for var in single_level]
# merge files into same category
list_of_files = dict()
for i in range(len(variables)):
    list_of_files[variables[i]]=[item for sublist in 
                                 [glob.glob(prelim_level[i] + '\*.nc'), glob.glob(single_level[i] + '\*.nc')]
                                 for item in sublist]

In [6]:
def read_netcdf(net_cdf_on_disk):
    """
    create variable data and variable n ame
    from information of net_cdf file
    
    net_cdf_on_disk - link to file on disk
    """

    dataset = xr.open_dataset(net_cdf_on_disk,
                              chunks='auto')

    variable_name = list(dataset.variables)[-1]

    varible_data = dataset[variable_name]
    dataset.close()
    # calculate daily values
    if variable_name in ['t2m', 'msdrswrf']:
        varible_data_mean = varible_data.resample(time='1D').mean()
        variable_data_min = varible_data.resample(time='1D').min()
        variable_data_max = varible_data.resample(time='1D').max()
        return variable_name, variable_data_min, variable_data_max, varible_data_mean
    else:
        varible_data_mean = varible_data.resample(time='1D').sum()
        return variable_name, None, None, varible_data_mean

In [None]:
# loop for file resampling
for var in list_of_files.keys():
    start_time = time.time()
    print('Переносим в суточные переменные %s\n' % var)

    for file_on_disk in tq.tqdm(list_of_files[var], 'Читаем файлы по %s' % var):

        variable_name, net_cdf_transfer_min, net_cdf_transfer_max, net_cdf_transfer = read_netcdf(
            file_on_disk)

        year = file_on_disk[-7:-3]
        if var == '2m_temperature':
            directory = parent_folder + '\{}\{}'.format('daily',
                                                        var)
            min_dir = parent_folder + '\{}\{}_min'.format('daily',
                                                          var)
            max_dir = parent_folder + '\{}\{}_max'.format('daily',
                                                          var)
            os.makedirs(directory,
                        exist_ok=True)
            os.makedirs(min_dir,
                        exist_ok=True)
            os.makedirs(max_dir,
                        exist_ok=True)

            net_cdf_transfer_min.to_netcdf(min_dir + '\{}_min_{}.nc'.format(
                variable_name,
                year))

            net_cdf_transfer_max.to_netcdf(max_dir + '\{}_max_{}.nc'.format(
                variable_name,
                year))

            net_cdf_transfer.to_netcdf(directory + '\{}_{}.nc'.format(
                variable_name,
                year))
        else:
            directory = parent_folder + '\{}\{}'.format('daily',
                                                        var)
            os.makedirs(directory,
                        exist_ok=True)
            net_cdf_transfer.to_netcdf(directory + '\{}_{}.nc'.format(
                variable_name,
                year))

        end_time = time.time()

    print("Перенос завершён за %.2f минут" % ((end_time - start_time) / 60))
