In [1]:
import glob
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
import xarray as xr
import multiprocessing as mp

In [2]:
def weighted_merge_datasets(full_dataset, ds_temp, weight_full=0.25, weight_temp=0.75):
    # Ensure dates are aligned properly and handle overlapping dates
    combined = xr.concat([full_dataset, ds_temp], dim='time')
    
    # Identify overlapping dates
    overlapping_times = np.intersect1d(full_dataset['time'].values, ds_temp['time'].values)
    
    # Apply weighted average to overlapping dates
    for time in overlapping_times:
        mask_full = full_dataset['time'] == time
        mask_temp = ds_temp['time'] == time
        
        combined['PrecipitationRate'].loc[{'time': time}] = (
            full_dataset['PrecipitationRate'].sel(time=time) * weight_full + 
            ds_temp['PrecipitationRate'].sel(time=time) * weight_temp
        )
    
    # Drop duplicate times after handling the overlap
    combined = combined.drop_duplicates(dim='time')

    return combined

In [3]:
path_files = '/glade/derecho/scratch/jhayron/Data4Predictability/PRECIP_ERA5_Daily/'
files = np.sort(glob.glob(f'{path_files}*.nc'))

In [4]:
full_dataset = xr.open_dataset(files[0])
for file in files[1:]:
    ds_temp = xr.open_dataset(file)
    full_dataset = weighted_merge_datasets(full_dataset, ds_temp)

In [5]:
full_dataset.to_netcdf('/glade/derecho/scratch/jhayron/Data4Predictability/PRECIP_ERA5_Daily.nc')