# Compile ET Monthly Datsets

Below are the Python codes needed to download and process six ET datasets with monthly time steps. On my Dell Precision 3570 with a 12th Gen Intel i7-1255U 1.70 GHz processor and a 250Mb/s internet connection, each of these datasets takes less than 15 minutes to download and process, except for the WBET dataset. This dataset can take ~24 hours to fully process due to the high resolution and large file sizes. While the other datasets use at most 1GB of disk space each, the WBET dataset will use ~100GB at peak useage and ~50GB for the final processed file. This can be reduced if the full date range is not utilized.

## TerraClimate Dataset

In [None]:
# TerraClimate Dataset
import fsspec
import xarray as xr
import os
import hvplot.xarray

if not os.path.isdir('terraclimate'): os.mkdir('terraclimate')

# If compiled netcdf is not made from downloads, make it
if not os.path.isfile('terraclimate/TerraClimate_AET.nc'):
    fs = fsspec.filesystem('https', timeout=3600)
    url = 'https://climate.northwestknowledge.net/TERRACLIMATE-DATA/'
    
    years = range(1958, 2023)
    
    # Download all of the individual year files. Do this recursively as fs may timeout if
    #   a full list of files is called at once
    paths = []
    for year in years:
        file = f"TerraClimate_aet_{year}.nc"
        # Create full URL
        paths.append(url +file)
        
    fs.get(paths, 'terraclimate/')

    # Open the files and combine
    ds = xr.open_mfdataset(['terraclimate/'+f"TerraClimate_aet_{year}.nc" for year in years], 
                           engine='netcdf4', chunks={'lat': 116, 'lon': 288, 'time': -1})
    
    # Only keep CONUS range of data
    ds = ds.sel(lat=slice(53, 24))
    ds = ds.sel(lon=slice(-126, -66))

    # Rechunk to appropriate size
    ds = ds.chunk({'lat': 116, 'lon': 288, 'time': -1})

    # Replace unicode characters in summary (degree symbol)
    ds.attrs['summary'] = ds.attrs['summary'].replace(ds.attrs['summary'][64:66], ' deg')

    # Update aet units to include time span
    ds['aet'].attrs['units'] = 'mm.month-1'
    ds['aet'].attrs['long_name'] = 'Total Actual Evapotranspiration'

    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'    

    # Save xarray dataset to netcdf
    _ = ds.to_netcdf(path='terraclimate/TerraClimate_AET.nc', engine='netcdf4', format='NETCDF4')

    # Remove downloaded files to reduce storage, as the data is now in the combined netcdf
    for year in years: os.remove('terraclimate/'+f"TerraClimate_aet_{year}.nc")
    
ds = xr.open_dataset('terraclimate/TerraClimate_AET.nc', engine='netcdf4', chunks={'lat': 116, 'lon': 288, 'time': -1})
ds
#ds.aet.hvplot(groupby='time')

## ERA-5 Dataset

> NOTE: You will need a [Climate Data Store](https://cds.climate.copernicus.eu/cdsapp) (CDS) [account](https://cds.climate.copernicus.eu/user/register) to access this data. Once you have an account, make sure to [configure `cdsapi`](https://github.com/ecmwf/cdsapi#configure) for the download to work.

In [None]:
# ERA-5 Dataset
import xarray as xr
import cdsapi
import os
import zipfile
import hvplot.xarray

c = cdsapi.Client()

if not os.path.isdir('era5'): os.mkdir('era5')

if not os.path.isfile('era5/ERA5_AET.nc'):
    # This command is generated from Climate Data Store
    #   (https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land-monthly-means?tab=overview)
    c.retrieve(
        'reanalysis-era5-land-monthly-means',
        {
            'product_type': 'monthly_averaged_reanalysis',
            'variable': 'total_evaporation',
            'year': [
                '1950', '1951', '1952', '1953', '1954', '1955', '1956', '1957', '1958', '1959',
                '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969',
                '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979',
                '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989',
                '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999',
                '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009',
                '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019',
                '2020', '2021', '2022',
            ],
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'time': '00:00',
            'format': 'netcdf.zip',
            'area': [
            53, -126, 24,
            -66,
            ],
        },
        'era5/ERA5_AET.netcdf.zip')

    # Extract the contents of zip file and remove zip file
    zipfile.ZipFile('era5/ERA5_AET.netcdf.zip').extract('data.nc', 'era5/')
    os.remove('era5/ERA5_AET.netcdf.zip')

    # Data values are negative to indicate inverse of precipitation (see docs) and in meters.
    #   We want to switch to positive values and mm, along with setting any then negative values to 0.
    #   Additionally, from the documentation, the monthly means have units that include "per day". We want "per month".
    #   So, we need to multiply each month by the number of days in it.
    ds = xr.open_dataset('era5/data.nc', engine='netcdf4', chunks={'longitude': 601, 'latitude': 97, 'time': -1})
    ds = -1e3 * ds
    ds = ds.where(~(ds < 0), 0)
    ds = ds * ds.get_index('time').days_in_month.values.reshape(len(ds.get_index('time')), 1, 1)

    # Add new metadata attributes
    ds['e'].attrs['units'] = 'mm.month-1'
    ds['e'].attrs['description'] = "Accumulated amount of water that has evaporated from the Earth's surface, "+ \
                                   'including a simplified representation of transpiration (from vegetation), '+ \
                                   'into vapour in the air above.'
    ds['e'].attrs['long_name'] = 'Total Evaporation'
    ds['e'].attrs['dimensions'] = 'lon lat time'

    # Rename the coordinates to a common format and add some metadata attributes
    ds = ds.rename({'longitude': 'lon','latitude': 'lat', 'e': 'aet'})
    ds['lat'].attrs['description'] = 'Latitude of the center of the grid cell'
    ds['lat'].attrs['standard_name'] = 'latitude'
    ds['lat'].attrs['axis'] = 'Y'

    ds['lon'].attrs['description'] = 'Longitude of the center of the grid cell'
    ds['lon'].attrs['standard_name'] = 'longitude'
    ds['lon'].attrs['axis'] = 'X'

    ds['time'].attrs['standard_name'] = 'time'
    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['axis'] = 'T'
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'    

    # Chunk and save
    ds = ds.chunk({'lon': 601, 'lat': 97, 'time': -1})
    ds.to_netcdf(path='era5/ERA5_AET.nc', engine='netcdf4', format='NETCDF4')

    os.remove('era5/data.nc')

ds = xr.open_dataset('era5/ERA5_AET.nc', engine='netcdf4', chunks={'lon': 601, 'lat': 97, 'time': -1})
ds
#ds.aet.hvplot(groupby='time')

## NLDAS Dataset

> NOTE: You will need a [EarthData Login](https://wiki.earthdata.nasa.gov/display/EL/How+To+Register+For+an+EarthData+Login+Profile) to access this data. Once you have an login, make sure to [link the login to the NASA GESDISC Data Archive](https://disc.gsfc.nasa.gov/earthdata-login) for the download to work.

In [None]:
#NLDAS Dataset
import fsspec
import xarray as xr
import aiohttp
import os
import hvplot.xarray

if not os.path.isdir('nldas'): os.mkdir('nldas')

# If compiled netcdf is not made from downloads, make it
if not os.path.isfile('nldas/NLDAS_AET.nc'):

    # Requires an account to access the data
    # Username and Password are given as OS environmental variables (NASA_EARTHDATA_USERNAME and NASAS_EARTHDATA_PASSWORD)
    fs = fsspec.filesystem('https', timeout=3600, client_kwargs={'auth': aiohttp.BasicAuth(os.environ['NASA_EARTHDATA_USERNAME'],
                                                                                           password=os.environ['NASA_EARTHDATA_PASSWORD'])})
    base_url = 'https://data.gesdisc.earthdata.nasa.gov/data/NLDAS/NLDAS_NOAH0125_M.2.0/'
    
    months = range(1, 13)
    years = range(1979, 2023)
    
    # Make list of paths
    paths = []
    first_iter = True
    for year in years:
        for month in months:
            # Create full URL
            filepath = f"{year}/NLDAS_NOAH0125_M.A{year}"+str(month).zfill(2)+'.020.nc'
            paths.append(base_url + filepath)

    fs.get(paths, 'nldas/')

    # Open first year to get list of variables to drop
    ds = xr.open_dataset('nldas/NLDAS_NOAH0125_M.A197901.020.nc', engine='netcdf4', chunks={})
    drop_vars = [var for var in list(ds.data_vars) if var != 'Evap']
                        
    # Open all files and combine. Use one chunk as file is only 200MB total
    ds = xr.open_mfdataset(['nldas/'+f"NLDAS_NOAH0125_M.A{year}"+str(month).zfill(2)+'.020.nc' for year in years for month in months],
                           drop_variables=drop_vars, engine='netcdf4', chunks={'lat': -1, 'lon': -1, 'time': -1})

    # Drop January of 1979 as it starts on the 2nd. See NLDAS docs for details.
    ds = ds.where(ds.time != ds.time[0], drop=True)

    # Rename variable and coords to common names and add attributes
    ds = ds.rename({'Evap': 'aet'})
    # Units are in kg.m-2.month-1, which is equivalent to mm.month-1 assuming a water density of 1g.cm-3
    #   (mm = kg.m-2 / g.cm-3 * 1e3g.kg-1 * 1e-6m3.cm-3 * 1e3mm.m-1) 
    ds['aet'].attrs['units'] = 'mm.month-1'
    ds['aet'].attrs['description'] = 'Actual Total Evapotranspiration'
    ds['aet'].attrs['dimensions'] = 'lon lat time'

    # Add some metadata attributes
    ds['lat'].attrs['description'] = 'Latitude of the center of the grid cell'
    ds['lat'].attrs['axis'] = 'Y'

    ds['lon'].attrs['description'] = 'Longitude of the center of the grid cell'
    ds['lon'].attrs['axis'] = 'X'

    ds['time'].attrs['standard_name'] = 'time'
    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['axis'] = 'T'
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'
    del ds['time'].attrs['begin_date']
    del ds['time'].attrs['begin_time']
    del ds['time'].attrs['end_date']
    del ds['time'].attrs['end_time']
    del ds['time'].attrs['bounds']
   
    # Save dataset to netcdf
    ds.to_netcdf(path='nldas/NLDAS_AET.nc', format='NETCDF4', engine='netcdf4')
    
    # Remove downloaded files to reduce storage, as the data is now in the combined netcdf
    for year in years:
        for month in months: os.remove('nldas/'+f"NLDAS_NOAH0125_M.A{year}"+str(month).zfill(2)+'.020.nc')

ds = xr.open_dataset('nldas/NLDAS_AET.nc', engine='netcdf4', chunks={'lat': 224, 'lon': 232, 'time': -1})
ds
#ds.aet.hvplot(groupby='time')

## GLEAM Dataset

> NOTE: You will need a [GLEAM Login](https://www.gleam.eu/#downloads) to access this data. Once you have an login, you can use it to access the sftp site.

In [None]:
#GLEAM v3.7b Dataset
import fsspec
import xarray as xr
import os
import pandas as pd
import hvplot.xarray

sftp_host = 'sftp://hydras.ugent.be'
gleam_creds_sftp = dict(
    username = os.environ["GLEAM_USERNAME"],
    password = os.environ["GLEAM_PASSWORD"],
    port = int(os.environ["GLEAM_PORT"])
    )

if not os.path.isdir('gleam'): os.mkdir('gleam')

# Download the GLEAM data
if not os.path.isfile('gleam/GLEAM_AET.nc'):
    # host input excludes the sftp prefix
    fs = fsspec.filesystem('sftp', host=sftp_host[7:], **gleam_creds_sftp)

    filepath = "/data/v3.7b/monthly/E_2003-2022_GLEAM_v3.7b_MO.nc"
    path = sftp_host + filepath

    fs.get(path, 'gleam/')
    
    # Open the file
    ds = xr.open_dataset("gleam/E_2003-2022_GLEAM_v3.7b_MO.nc", engine='netcdf4', chunks={'lon': 480, 'lat': 240, 'time': -1})

    # Only keep CONUS range of data and rechunk
    ds = ds.sel(lat=slice(53, 24))
    ds = ds.sel(lon=slice(-126, -66))
    ds = ds.chunk({'lat': -1, 'lon': -1, 'time': -1})

    # Adjust month dates to be first of month rather than end of month for consistency with other datasets
    ds = ds.reindex({'time': ds.get_index('time').shift(periods=-1, freq='MS')}, method='backfill')

    # Rename variable to common name and add new metadata attributes
    ds = ds.rename({'E': 'aet'})
    ds['aet'].attrs['description'] = 'Actual total evaporation from GLEAM 3.7b'
    ds['aet'].attrs['long_name'] = 'Actual evaporation'
    ds['aet'].attrs['dimensions'] = 'lon lat time'

    # Add some coordinate metadata attributes
    ds['lat'].attrs['units'] = 'degrees_north'
    ds['lat'].attrs['description'] = 'Latitude of the center of the grid cell'
    ds['lat'].attrs['long_name'] = 'latitude'
    ds['lat'].attrs['standard_name'] = 'latitude'
    ds['lat'].attrs['axis'] = 'Y'
  
    ds['lon'].attrs['units'] = 'degrees_east'
    ds['lon'].attrs['description'] = 'Longitude of the center of the grid cell'
    ds['lon'].attrs['long_name'] = 'longitude'
    ds['lon'].attrs['standard_name'] = 'longitude'
    ds['lon'].attrs['axis'] = 'X'

    ds['time'].attrs['long_name'] = 'time'
    ds['time'].attrs['standard_name'] = 'time'
    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'
    ds['time'].attrs['axis'] = 'T'

    ds.to_netcdf(path='gleam/GLEAM_AET.nc', format='NETCDF4', engine='netcdf4')

    # Remove downloaded file to reduce storage, as the data is now in the new netcdf
    os.remove("gleam/E_2003-2022_GLEAM_v3.7b_MO.nc")
    
ds = xr.open_dataset('gleam/GLEAM_AET.nc', engine='netcdf4', chunks={'lat': 116, 'lon': 288, 'time': -1})
ds
#ds.aet.hvplot(groupby='time')

## WBET (Rietz et al. 2023) Dataset

> NOTE: You will need a [SciencBase Account](https://www.sciencebase.gov/directory/newUser/create) to download this data via the Python code. If you don't have an account, you will have to manually download [the data](https://www.sciencebase.gov/catalog/item/64135576d34eb496d1ce3d2e).

In [None]:
import sciencebasepy
import os
import re
import rioxarray
import xarray as xr
import pandas as pd
import zipfile_deflate64 as zipfile
import io
import hvplot.xarray

if not os.path.isdir('wbet'): os.mkdir('wbet')

if not os.path.isfile('wbet/WBET_AET.nc'):
    # Establish a session.
    sb = sciencebasepy.SbSession()
    
    # Login required to access cloud files via sciencebasepy
    sb.login(os.environ["SCIENCEBASE_USERNAME"], os.environ["SCIENCEBASE_PASSWORD"])
        
    # Get list of files for monthly ET data
    file_list = sb.get_item_file_info(sb.get_item('64135576d34eb496d1ce3d2e'))
    filenames = [i['name'] for i in file_list if re.search('ET.*_monthly.zip', i['name']) is not None]

    # Download the files (these files are big (6GB a piece), so this will take a while...)
    _ = sb.download_cloud_files(filenames, sb.generate_S3_download_links('64135576d34eb496d1ce3d2e', filenames), 'wbet')

    # Open the GeoTIFF files to xarray
    ds_monthly_list = []
    for zippedfiles in filenames:

        # Access zip file without unzipping
        zfile = zipfile.ZipFile('wbet/' + zippedfiles)
        zip_file_list = zfile.namelist()
        
        # Select the GeoTIFF files from zip_file_list and extract
        gtif_files = [file for file in zip_file_list if re.search('.*(\.tif)$', file) is not None]
        zfile.extractall('wbet/', gtif_files)

        # Delete downloaded file to save disk space, since files are now extracted
        os.remove('wbet/'+zippedfiles)
       
        for gtif in gtif_files:
            # Read in each extracted GeoTIFF
            ds_month = rioxarray.open_rasterio('wbet/'+gtif, chunks={}, band_as_variable=True)
            
            # Remove spatial_ref coord and rename coords to corresponding names. Assign the date to the Dataset
            # ds_month = ds_month.reset_coords('spatial_ref', drop=True)
            # 5th-8th characters of file indicate year, 10th-11th indicate month (index start at 0, characters at 1)
            year, month = gtif[4:8], gtif[9:11]
            date = year+'-'+month
            ds_month = ds_month.rename({'x': 'lon','y': 'lat', 'band_1': 'aet'}).assign_coords(time=pd.to_datetime(date)).expand_dims(dim="time")
            ds_month = ds_month.drop_vars('spatial_ref')

            # Stack the monthly Datasets to list for concatenating
            ds_monthly_list.append(ds_month)
            ds_month.close()

        # Concatenate and save to netcdf
        ds = xr.concat(ds_monthly_list, dim='time')
        ds.to_netcdf(path='wbet/'+zippedfiles[:-4]+'.nc', format='NETCDF4', engine='netcdf4', 
                     encoding={'aet':{'zlib': True, 'complevel': 4}})

        # Delete extracted files to save disk space, since files are now compiled to netcdf
        for gtif in gtif_files: os.remove('wbet/'+gtif)

        # Reset variables
        del ds
        del ds_monthly_list
        ds_monthly_list = []

    # Open processed decade files as single dataset
    ds = xr.open_mfdataset(['wbet/'+file[:-4]+'.nc' for file in filenames], engine='netcdf4', 
                           chunks={'lat': 69, 'lon': 281, 'time': -1})

    #   From the metadata xml file, the monthly data are in units of mm.day-1. We want mm.month-1.
    #   So, we need to multiply each month by the number of days in it.
    ds = ds * ds.get_index('time').days_in_month.values.reshape(len(ds.get_index('time')), 1, 1)
    
    # Add new metadata attributes to variable and coordinates
    ds['aet'].attrs['unit'] = 'mm.month-1'
    ds['aet'].attrs['description'] = 'Actual Total Evapotranspiration via WBET from Reitz+2023'
    ds['aet'].attrs['long_name'] = 'Actual Evapotranspiration'
    ds['aet'].attrs['standard_name'] = 'Actual Evapotranspiration'
    ds['aet'].attrs['dimensions'] = 'lon lat time'

    ds['lat'].attrs['units'] = 'degrees_north'
    ds['lat'].attrs['description'] = 'Latitude of the center of the grid cell'
    ds['lat'].attrs['long_name'] = 'latitude'
    ds['lat'].attrs['standard_name'] = 'latitude'
    ds['lat'].attrs['axis'] = 'Y'
  
    ds['lon'].attrs['units'] = 'degrees_east'
    ds['lon'].attrs['description'] = 'Longitude of the center of the grid cell'
    ds['lon'].attrs['long_name'] = 'longitude'
    ds['lon'].attrs['standard_name'] = 'longitude'
    ds['lon'].attrs['axis'] = 'X'

    ds['time'].attrs['long_name'] = 'time'
    ds['time'].attrs['standard_name'] = 'time'
    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'
    ds['time'].attrs['axis'] = 'T'

    # Save dataset and remove processed files
    ds.to_netcdf(path='wbet/WBET_AET.nc', format='NETCDF4', engine='netcdf4', 
                     encoding={'aet':{'zlib': True, 'complevel': 9}})

    for file in filenames: os.remove('wbet/'+file[:-4]+'.nc')


# Open the saved netcdf
ds = xr.open_dataset('wbet/WBET_AET.nc', engine='netcdf4', chunks={'lat': 69, 'lon': 281, 'time': -1})
ds
#ds.aet.sel(time='2010-08', method='nearest').hvplot()

## SSEBop MODIS Dataset

In [None]:
# SSEBop Dataset
import fsspec
import rioxarray
import xarray as xr
import pandas as pd
import zipfile
import io
import os
import hvplot.xarray
import numpy as np

fs = fsspec.filesystem('https', timeout=3600)
url = 'https://edcintl.cr.usgs.gov/downloads/sciweb1/shared/uswem/web/conus/eta/modis_eta/monthly/downloads/'

dates = pd.date_range('2000-01-01', '2022-12-31', freq='MS')

# Make a directory for holding and extracting the zip files
if not os.path.isdir('ssebop'): os.mkdir('ssebop')

if not os.path.isfile('ssebop/SSEBop_AET.nc'):
    # Read the files to xarray and compile to monthly data
    for date in dates:
        # Download the zipfiles
        # (Do this individually vs all files in a list as server may disconnect downloading for list)
        fs.get(url + 'm' + date.strftime('%Y%m') + '.zip', 'ssebop/')
        
        # Access zip file without unzipping
        zfile = zipfile.ZipFile('ssebop/m' + date.strftime('%Y%m') + '.zip')

        # Read in GeoTIFF
        ds_monthly = rioxarray.open_rasterio(io.BytesIO(zfile.read('m' + date.strftime('%Y%m') + '.modisSSEBopETactual.tif')),
                                             chunks={}, band_as_variable=True)

        # Rename coords to corresponding names. Assign the date to the Dataset
        ds_monthly = ds_monthly.rename({'x': 'lon','y': 'lat', 'band_1': 'aet'}).assign_coords(time=date).expand_dims(dim="time")

        # Save monthly compiled dataset
        ds_monthly.to_netcdf(path='ssebop/'+date.strftime('%Y-%m')+'.modisSSEBopETactual.nc', engine='netcdf4')
        
        # Delete downloaded zipfile as we no longer need it
        os.remove('ssebop/m' + date.strftime('%Y%m') + '.zip')

            
    # Read in monthly netcdf Datasets into full Dataset and chunk
    #   Dates after 2016-07 have less indices values than those before. Read these date groups in seperately, slice,
    #   and align for concatenating into single file. Additionally after 2016-07, bodies of water have a fill value
    #   of 0 rather than NaN. Replace the 0 fill with NaN using a mask from before 2016-07 as 0 can be present on
    #   land surface in winter.
    pre_201608 = pd.date_range('2001-01-01', '2016-7-31', freq='MS')
    ds1 = xr.open_mfdataset(['ssebop/'+date+'.modisSSEBopETactual.nc' for date in pre_201608.strftime('%Y-%m')], engine='netcdf4')
    
    post_201608 = pd.date_range('2016-08-01', '2022-12-31', freq='MS')
    ds2 = xr.open_mfdataset(['ssebop/'+date+'.modisSSEBopETactual.nc' for date in post_201608.strftime('%Y-%m')], engine='netcdf4')

    # reindex lat and lon to match, values are off by floating point rounding errors
    # Match lat/lon range
    ds1 = ds1.sel(lon=ds2['lon'], lat=ds2['lat'], method='nearest', tolerance=1e-10)
    ds1, ds2 = xr.align(ds1, ds2, join='override', exclude='time')

    # Concatenate, remove spatial_ref var, and chunk
    ds = xr.concat([ds1, ds2], dim='time')
    ds = ds.drop_vars('spatial_ref')
    ds = ds.chunk({'lon': 348, 'lat': 218, 'time': -1})

    # Set 0 fill values to NaNs
    ds = ds.where(~np.isnan(ds.aet.isel(time=0)))

    # Add new metadata attributes
    ds['aet'].attrs['description'] = 'Actual evaporation from SSEBop MODIS, monthly total'
    ds['aet'].attrs['dimensions'] = 'lon lat time'
    ds['aet'].attrs['standard_name'] = 'Actual evaporation'
    ds['aet'].attrs['long_name'] = 'Actual evaporation'
    ds['aet'].attrs['units'] = 'mm.month-1'

    # Add some coordinate metadata attributes
    ds['lat'].attrs['units'] = 'degrees_north'
    ds['lat'].attrs['description'] = 'Latitude of the center of the grid cell'
    ds['lat'].attrs['long_name'] = 'latitude'
    ds['lat'].attrs['standard_name'] = 'latitude'
    ds['lat'].attrs['axis'] = 'Y'
  
    ds['lon'].attrs['units'] = 'degrees_east'
    ds['lon'].attrs['description'] = 'Longitude of the center of the grid cell'
    ds['lon'].attrs['long_name'] = 'longitude'
    ds['lon'].attrs['standard_name'] = 'longitude'
    ds['lon'].attrs['axis'] = 'X'

    ds['time'].attrs['long_name'] = 'time'
    ds['time'].attrs['standard_name'] = 'time'
    ds['time'].attrs['description'] = 'Monthly time step indicated by the first day of the month.'
    ds['time'].attrs['unit'] = 'month'    
    ds['time'].attrs['axis'] = 'T'


    # Save full Dataset
    ds.to_netcdf(path='ssebop/SSEBop_AET.nc', format='NETCDF4', engine='netcdf4', encoding={'aet':{'zlib': True, 'complevel': 4}})

    # Remove intermediate monthly files
    for date in dates.strftime('%Y-%m'): os.remove('ssebop/'+date+'.modisSSEBopETactual.nc')

ds = xr.open_dataset('ssebop/SSEBop_AET.nc', engine='netcdf4', chunks={'lon': 348, 'lat': 218, 'time': -1})
ds
#ds.aet.hvplot(groupby='time')