In [8]:
import glob
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
import xarray as xr
import multiprocessing as mp

def load_and_organize_precip_file(path):
    dataset_temp = xr.open_dataset(path)
    dataset_temp = dataset_temp.sortby('latitude')
    dataset_temp = dataset_temp.VAR_2T
    min_lon, max_lon, min_lat, max_lat = (180, 330, 20, 80)
    dataset_temp = dataset_temp.sel(
                longitude=slice(min_lon, max_lon), 
                latitude=slice(min_lat, max_lat)
            )
    dates = dataset_temp.time
    data = xr.DataArray(
        dataset_temp.transpose('time','latitude','longitude').values,
        coords={'time': dates, 'lat': dataset_temp.latitude.values, 'lon': dataset_temp.longitude.values},
        dims=['time', 'lat', 'lon'],
        attrs=dataset_temp.attrs
    )
    return data

def process_and_save(file):
    data_temp = load_and_organize_precip_file(file)
    data_temp_mean = data_temp.resample(time='1D').mean()
    # data_temp_max = data_temp.resample(time='1D').max()
    # data_temp_min = data_temp.resample(time='1D').min()
    
    data_temp_mean = data_temp_mean.to_dataset(name='MeanTemp')
    # data_temp_min = data_temp_min.to_dataset(name='MinTemp')
    # data_temp_max = data_temp_max.to_dataset(name='MaxTemp')
    
    data_temp_mean.to_netcdf(f"{path_out}TMean_{file.split('.')[-2]}.nc")
    # data_temp_max.to_netcdf(f"{path_out}TMax_{file.split('.')[-2]}.nc")
    # data_temp_min.to_netcdf(f"{path_out}TMin_{file.split('.')[-2]}.nc")

In [9]:
# Base path where the files are stored
path_era5 = '/glade/campaign/collections/rda/data/d633000/e5.oper.an.sfc/'

# List to store all the file paths
file_paths = []

# Loop over the years from 1940 to 2023
for year in range(1981, 2021):
    pattern = f'{path_era5}{year}*/e5.oper.an.sfc.128_167_2t.ll025sc.*.nc'
    matched_files = glob.glob(pattern)
    file_paths.extend(matched_files)

file_paths = np.sort(file_paths)

In [10]:
path_out = '/glade/derecho/scratch/jhayron/Data4Predictability/TEMP_ERA5_Daily/'

# Use multiprocessing Pool
if __name__ == '__main__':
    with mp.Pool(mp.cpu_count()) as pool:
        results = pool.map(process_and_save, file_paths)

    print("Processing complete.")

Processing complete.
