Data Download Daily LOCA2 to Monthly 
----------------------------------------------------------------

**Download LOC2 data for 1 simulation and converts daily values to monthly**

In [None]:
#If running this notebook in an environment outside of the Cal-Adapt Analytics Engine Jupyter Hub make sure to install intake-esm and s3fs packages
import intake

# Open catalog of available data sets using intake-esm package
cat = intake.open_esm_datastore('https://cadcat.s3.amazonaws.com/cae-collection.json')

Adding TimeScale and Variable ID:
table_id = time scale:  {"monthly": "mon", "daily": "day", "hourly": "1hr"}
=ssp370
variable_id = pr, tasmax, tasmin
grid_label = Catalog names we want 3k: {"45 km": "d01", "9 km": "d02", "3 km": "d03"}
activity_id="LOCA2"
experiment_id="ssp370" for right now.

In [None]:
#Setup easy to read naming convention
dict_table_id = {"monthly": "mon", "daily": "day", "hourly": "1hr"}
dict_grid_label = {"45 km": "d01", "9 km": "d02", "3 km": "d03"}

Get the catalog info for LOCA2 by querying for experiment and all other variables.

In [None]:
import geopandas
do_historical = True

#Set current variables here.
historical_id = "historical" 
simulation_id = "ssp370"
member_id = "r1i1p1f1"
source_id = "ACCESS-CM2"

#Set default variable
activity_id ="LOCA2"
table_id = dict_table_id["daily"]
variable_id = ["pr","tasmax","tasmin"]
grid_label = dict_grid_label["3 km"]

#Use these cordinates to clip around the watershed of interest.
latitude = [34.775317,42.432494]
longitude = [-123.097421,-117.980799]

#Set historical or other simulation time slice.
time_slice = (2015, 2100)  #Simulation
experiment_id = simulation_id

if do_historical: 
    time_slice = (1950, 2014)   #Historical
    experiment_id = historical_id

#Get sub catalog from variables above
cat_loca = cat.search(activity_id=activity_id, 
                      table_id=table_id, 
                      variable_id=variable_id,
                      experiment_id=experiment_id,
                      grid_label=grid_label,
                      member_id=member_id,
                      source_id=source_id
                     )

cat_loca.unique()["path"]


**This will give you an idea of the available query parameters that can be entered to retrieve a particular set of data. Below is a sample query against the whole catalog to refine catalog entries to those of interest:**

Load data, convert to monthly, and clip to area of interest.

In [None]:
#Bounding box
from shapely.geometry import mapping
import xarray as xr
cat_subset = cat_loca.search(member_id=member_id)
data_dict = cat_subset.to_dataset_dict(
     xarray_open_kwargs={'consolidated': True},
     storage_options={'anon': True})

dsname=""

# There should only be one dataset to work with at this point.
for dname, ds in data_dict.items():
    ds = ds.assign_coords({"simulation": ds.attrs["source_id"]})
        
    # Time slice
    ds = ds.sel(
        time=slice(str(time_slice[0]), str(time_slice[1]))
        )
    
    #Convert our daily values to monthly.  Precip is the accumulated and temperature is the average.
    ds_precip = ds['pr'].resample(time="M").sum()
    ds_temp = ds[['tasmin','tasmax']].resample(time="M").mean()
    
    #Merge the dataset back into on dataset.
    ds= xr.merge([ds_precip,ds_temp])

    #This needs to be done for the cliping.
    ds.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
    ds.rio.write_crs("EPSG:4326", inplace=True)
    
    #Get the subset of data for watershed.
    ds = ds.rio.clip_box(
        minx=longitude[0],
        miny=latitude[0],
        maxx=longitude[1],
        maxy=latitude[1],
        crs=4326,
    )
  
    dsname = dname
    #Update the dataset dictionary
    data_dict.update({dname: ds})
    
data_dict[dsname]

Save output to netCDF file.

In [None]:

import os
filout = '%s_%s_%s_%s.nc'%(str(dsname).replace("day","%s.mon"%member_id),
                           str(time_slice[0]),str(time_slice[1]),"box_clip") 
#Uncomment and set path to local diretory.
#filout=os.path.join("C:\Data\RandD\Visual Studio Code\DWR", filout)

data_dict[dsname].to_netcdf(filout)