In [None]:
import intake
from shapely.geometry import Point, Polygon
import geopandas as gpd
import numpy as np

In [None]:
basin_path = "Clip/CalSimIII_AllWatersheds_20200417_WGS84.shp"

In [None]:
def get_dataset(dataset_params):
    # Open catalog of available data sets using intake-esm package
    cat = intake.open_esm_datastore('https://cadcat.s3.amazonaws.com/cae-collection.json')
    cat_item = cat.search(
        activity_id=dataset_params["activity_id"],
        institution_id=dataset_params["institution_id"],
        table_id=dataset_params["table_id"], 
        variable_id=dataset_params["variable_id"],
        experiment_id=dataset_params["experiment_id"],
        grid_label=dataset_params["grid_label"],
        member_id=dataset_params["member_id"],
        source_id=dataset_params["source_id"],  
    )
    data_dict = cat_item.to_dataset_dict(
        xarray_open_kwargs={'consolidated': True},
        storage_options={'anon': True}
    )
    
    key = "{}.{}.{}.{}.{}.{}".format(
            dataset_params['activity_id'],
            dataset_params['institution_id'],
            dataset_params['source_id'],
            dataset_params['experiment_id'],
            dataset_params['table_id'],
            dataset_params['grid_label'],)
    
    return data_dict[key]

In [None]:
dataset_params = {
    "activity_id": "LOCA2",
    "institution_id": "UCSD",
    "table_id": "day",
    "variable_id": "pr",
    "experiment_id": "ssp245",
    "member_id": "r1i1p1f2",
    "source_id": "CNRM-ESM2-1",
    "grid_label": "d03",
}
ds = get_dataset(dataset_params)

In [None]:
# https://geohackweek.github.io/nDarrays/05-aggregation/

In [None]:
# time slice
time_slice = (2022, 2023)
ds = ds.sel(time=slice(str(time_slice[0]), str(time_slice[1])))

In [None]:
ts = ds.pr[0]

In [None]:
# average for months
ds = ds.resample(time="M").mean() # where M is for months

In [None]:
ds.attrs['title']

In [None]:
# SPATIAL MASKING

In [None]:
def intersect(lat, lon, polygons):
    pt = Point(lon, lat)
    for i, row in polygons.iterrows():
        polygon = row.geometry
        #if pt.within(polygon):
        if polygon.contains(pt):
            return row.OBJECTID
            break
    return None

In [None]:
def create_mask(time_slice, polygons, bbox):
    # initialize an empty array with same shape as ds
    mask = np.empty((ds.dims['lat'], ds.dims['lon']), dtype="object")

    # populate the array
    for i, x in enumerate(time_slice.coords['lon']):
        lon = x.values
        print(lon)

        for j, y in enumerate(time_slice.coords['lat']):
            lat = y.values
            
            if bbox["top"] > lat > bbox["bottom"] and bbox["left"] < lon < bbox["right"]:

                mask[(ds.dims['lat'] - 1) - j, i] = intersect(lat, lon, polygons)
            else:
                mask[(ds.dims['lat'] - 1) - j, i] = None

    return mask

In [None]:
data = gpd.read_file(basin_path)
polygons = data[['OBJECTID', 'geometry']]
bbox = {
    "top": 42.5,
    "bottom": 34.7,
    "left": -123.1,
    "right": -117.9,
}
mask = create_mask(ts, polygons, bbox)

In [None]:
mask

In [None]:
with open("Clip/mask.npy", "wb") as f:
    np.save(f, mask)

In [None]:
with open("Clip/mask.npy", "rb") as g:
   shp_mask = np.load(g, allow_pickle=True)

In [None]:
shp_mask

In [None]:
print(f"{mask[0, 0]} {mask[0, 1]}")
print(f"{mask[1, 0]} {mask[1, 1]}")
print()
print(f"{mask[494, 558]}")

In [None]:
flipped_shp_mask = np.flip(shp_mask, 1)

In [None]:
mask = flipped_shp_mask
print(f"{mask[0, 0]} {mask[0, 1]}")
print(f"{mask[1, 0]} {mask[1, 1]}")
print()
print(f"{mask[494, 558]}")

In [None]:
ds.coords['mask'] = (('lat', 'lon'), flipped_shp_mask)

In [None]:
ds['pr'].where(ds.mask is not None).plot()

In [None]:
import datetime
print(datetime.datetime.now())

In [None]:
%matplotlib inline

#map_data = ds['pr'].sel(time='2022-01-31').where(ds.mask != None)
map_data = ds['pr'].sel(time='2022-01-31').where(ds.mask == 166)
#map_data = ds['pr'].sel(time='2022-11-30')
map_data.plot()

In [None]:
type(map_data)

In [None]:
map_data.to_netcdf("test_166.nc")

In [None]:
ds.coords

In [None]:
mask_values = flipped_shp_mask[flipped_shp_mask != np.array(None)]

In [None]:
oids = np.unique(mask_values)

In [None]:
type(oids[10])

In [None]:
oids

In [None]:
time_slice = ds.pr[0]

In [None]:
df = time_slice.to_dataframe()

In [None]:
df.head()

In [None]:
df = df.reset_index()

In [None]:
df.head()

In [None]:
point_data = ds[0].pr.to_dataframe()

In [None]:
point_data.head()

In [None]:
ds.lat.attrs

In [None]:
ds.lon.attrs