In [1]:
import os
import numpy as np
import xarray as xr

In [2]:
era_dir = '/gpfs/fs1/collections/rda/data/ds633.0/e5.oper.an.pl'
years = [int(x) for x in sorted(os.listdir(era_dir))]

print(f'Found {len(years)} years of ERA5 data, starting with {years[0]} and ending with {years[-1]}.')

names = ['u', 'v']

ds_fnames = []
for year in years:
    year_dir = f'{era_dir}/{year}'
    fnames = [x for x in os.listdir(year_dir) if x.endswith('nc')]
    
    for name in names:
        fname = [x for x in fnames if f'_{name.lower()}.' in x][0]
        ds_fnames.append(f'{year_dir}/{fname}')
        
print(f'Found {len(ds_fnames)} files out of an expected {len(years) * len(names)}.')

Found 518 years of ERA5 data, starting with 197901 and ending with 202202.
Found 1036 files out of an expected 1036.


In [3]:
with xr.open_mfdataset(ds_fnames, combine='by_coords') as ds:
    ds = ds.rename({
        'level' : 'level',
        'U' : 'u',
        'V' : 'v',
    }).drop('utc_date')
    
    # resample to 
    ds = ds.resample(time='1d').mean()
    display(ds)

<xarray.Dataset>
Dimensions:    (latitude: 721, level: 37, longitude: 1440, time: 15755)
Coordinates:
  * time       (time) datetime64[ns] 1979-01-02 1979-01-03 ... 2022-02-19
  * latitude   (latitude) float64 90.0 89.75 89.5 89.25 ... -89.5 -89.75 -90.0
  * level      (level) float64 1.0 2.0 3.0 5.0 7.0 ... 925.0 950.0 975.0 1e+03
  * longitude  (longitude) float64 0.0 0.25 0.5 0.75 ... 359.0 359.2 359.5 359.8
Data variables:
    u          (time, level, latitude, longitude) float32 dask.array<shape=(15755, 37, 721, 1440), chunksize=(11, 37, 721, 1440)>
    v          (time, level, latitude, longitude) float32 dask.array<shape=(15755, 37, 721, 1440), chunksize=(11, 37, 721, 1440)>

In [4]:
u, v = ds['u'], ds['v']

u_bar = u.mean('longitude')
v_bar = v.mean('longitude')

v_prime = v - v_bar
u_prime = u - u_bar

In [5]:
EKE = ((u_prime**2 + v_prime**2)/2)

In [6]:
ds["up"] = u_prime
ds["vp"] = v_prime
ds["EKE"] = EKE

In [7]:
# take zonal means
ds = ds.mean('longitude')

In [None]:
# force computation so can use the array
ds = ds.compute()

  x = np.divide(x1, x2, out)


In [None]:
ds.to_netcdf("/glade/u/home/cvalva/AMandEnergy/eddy_daily_data.nc")