In [None]:
import csv
import datetime
from io import StringIO
import json
import os
import tempfile
import zipfile

import pandas as pd
import intake
from shapely.geometry import Point, Polygon
import geopandas as gpd
import numpy as np
import xarray as xr
import dask
import panel as pn
pn.extension()

xr.set_options(keep_attrs=True)
dask.config.set({"array.slicing.split_large_chunks": True})


Load dask Area for faster computing.  Note, this will take awhile but in long run processing should be faster when compute is called.

In [None]:
from dask.distributed import progress
from dask.distributed import Client
from climakitae.cluster import Cluster
cluster = Cluster()
cluster.adapt(minimum=0, maximum=16)
client = cluster.get_client()


Get client link.

In [None]:
client

In [None]:
# VARIABLES
#Use these cordinates to clip around the watershed of interest.
# latitude = [34.775317,42.432494]
# longitude = [-123.097421,-117.980799]
bbox = {
    "maxy": 42.432494,
    "miny": 34.775317,
    "minx": -123.097421,
    "maxx": -117.980799,
}
run_list_path = "data/GCM_Run_List_All.csv"
esm_datastore = "https://cadcat.s3.amazonaws.com/cae-collection.json"
output_folder = "outputs"
mask_path = "mask/mask.npy"

In [None]:
region_dict = {
    0:  {'region_name': 'Goose Lake', 'weighting_factor': 0.0},
    11: {'region_name': 'Westside Streams of SJR', 'weighting_factor': 0.002758980030193925},
    16: {'region_name': 'Other Rim Inflows of Sac', 'weighting_factor': 0.06520559638738632},
    9:  {'region_name': 'Eastside Streams of Delta', 'weighting_factor': 0.047054700553417206},
    17: {'region_name': 'Other Rim Inflows of SJR', 'weighting_factor': 0.008974390104413033},
    5:  {'region_name': 'Upper Stanislaus River', 'weighting_factor': 0.03673909977078438},
    8:  {'region_name': 'Lake Millerton', 'weighting_factor': 0.05571430176496506},
    10: {'region_name': 'Westside Streams of Sac', 'weighting_factor': 0.0789882019162178},
    12: {'region_name': 'Valley Floor of Sac', 'weighting_factor': 0.06745839864015579},
    18: {'region_name': 'Lower Yuba-Bear Rim Inflow', 'weighting_factor': 0.018660200759768486},
    14: {'region_name': 'Tulare Basin', 'weighting_factor': 0.0},
    1:  {'region_name': 'Lake Shasta', 'weighting_factor': 0.1778690069913864},
    15: {'region_name': 'Lake Trinity', 'weighting_factor': 0.04051230102777481},
    2:  {'region_name': 'Upper Feather River', 'weighting_factor': 0.13809999823570251},
    13: {'region_name': 'Valley Floor of SJR', 'weighting_factor': 0.008356500416994095},
    3:  {'region_name': 'Upper Yuba River', 'weighting_factor': 0.07005230337381363},
    19: {'region_name': 'Delta', 'weighting_factor': 0.026663200929760933},
    4:  {'region_name': 'Upper American River', 'weighting_factor': 0.08627369999885559},
    # 99: {'region_name': 'Diversion from Echo Lake', 'weighting_factor': 0.0},
    6:  {'region_name': 'Upper Tuolumne River', 'weighting_factor': 0.05876690149307251},
    7:  {'region_name': 'Upper Merced River', 'weighting_factor': 0.030512800440192223}
}

In [None]:
def get_model_params(run_list_path):
    """Read each set of model parameters into dictionary from csv file.
    
    Return list of dictionaries.
    """
    model_params = []
    with open(run_list_path, "r") as src:
        d = csv.DictReader(src)
        for row in d:
            model_params.append(row)
    return model_params

In [None]:
def get_dataset(esm_datastore, model_params):
    """Return xarray.DataSet from model parameters."""
    # Open catalog of available data sets using intake-esm package
    cat = intake.open_esm_datastore(esm_datastore)
    cat_item = cat.search(
        activity_id=model_params["activity_id"],
        institution_id=model_params["institution_id"],
        table_id=model_params["table_id"], 
        variable_id=['pr','tasmax','tasmin'],
        experiment_id=model_params["experiment_id"],
        grid_label=model_params["grid_label"],
        member_id=model_params["member_id"],
        source_id=model_params["source_id"],  
    )
    # Add catalog item to dataset dict
    data_dict = cat_item.to_dataset_dict(
        xarray_open_kwargs={'consolidated': True},
        storage_options={'anon': True}
    )
    # Construct dataset key to retrieve from the dictionary
    key = "{}.{}.{}.{}.{}.{}".format(
            model_params['activity_id'],
            model_params['institution_id'],
            model_params['source_id'],
            model_params['experiment_id'],
            model_params['table_id'],
            model_params['grid_label'],)
    
    # Slice the dataset to the input time window.
    
    ds = slice_by_time_years_dataset(data_dict[key],model_params['start_year'],model_params['end_year'])
    ds = convert_daily_to_monthly_dataset(ds)
    # Trim trim down to cordinates.
    #ds = trim_to_lat_lon_dataset(ds)
    return ds

In [None]:
def add_mask_to_dataset(mask_path, ds):
    # attach the mask
    with open(mask_path, 'rb') as f:
        mask = np.load(f, allow_pickle=True)
    ds.coords['mask'] = (('lat', 'lon'), mask)
    return ds

In [None]:
def convert_daily_to_monthly_dataset(ds):
    #Convert our daily values to monthly.  Precip is the accumulated and temperature is the average.
     #86400 x kg/m2/s = daily value (mm)  Check this!!!!
    ds['pr'] = ds.pr * 86400
    ds.pr.attrs["units"] = 'mm/day' 
    ds_precip = ds['pr'].resample(time="M").sum()
    ds_precip.attrs["units"] = 'mm/mon' 
    ds_temp = ds[['tasmin','tasmax']].resample(time="M").mean()
    
    #Change the temp to C
    ds_temp = ds_temp[['tasmin','tasmax']] - 273.15
    ds_temp.tasmin.attrs["units"]  = 'degC'
    ds_temp.tasmax.attrs["units"]  = 'degC'
    
    
    #Merge the dataset back into on dataset.
    ds= xr.merge([ds_precip,ds_temp])
    return ds

In [None]:
def slice_by_time_years_dataset(ds,startyear,endyear):
    # Time slice
    ds = ds.sel(
        time=slice(str(startyear), str(endyear))
        )
    return ds

In [None]:
def trim_dataset_to_bbox(ds, bbox):
    #This needs to be done for the cliping.
    ds.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
    ds.rio.write_crs("EPSG:4326", inplace=True)
    
    #Get the subset of data for watershed.
    ds = ds.rio.clip_box(
        minx=bbox["minx"],
        miny=bbox["miny"],
        maxx=bbox["maxx"],
        maxy=bbox["maxy"],
        crs=4326,
    )
    return ds

In [None]:
def get_output_file_name_monthly(model_params,end_part):
    return '%s_%s_%s_%s.csv'%(model_params['source_id'],model_params['experiment_id'],model_params['member_id'],end_part)

In [None]:
def load_dataset_with_mask(esm_datastore_in, model_params_in):
    #Loads current dataset
    ds = get_dataset(esm_datastore_in, model_params_in)
    ds = add_mask_to_dataset(ds)
    ds = trim_dataset_to_bbox(ds, bbox)
    #ds = ds.compute()
    return ds
    

In [None]:
%matplotlib inline

In [None]:
# main loop
all_model_params = get_model_params(run_list_path)

for model_params in all_model_params:
    print(datetime.datetime.now())

    ds = get_dataset(esm_datastore, model_params)
    ds = add_mask_to_dataset(mask_path, ds)
    ds = trim_dataset_to_bbox(ds, bbox)
    ds = ds.compute()
    
    results_dict = {}
    weighted_results_dict = {}
    
    df_w = None
    
    for id_region, v in region_dict.items():
        print(id_region)
        
        # print("\tSpatial mask...")
        map_data = ds.where(ds.mask == id_region)
        
        # print("\tCalculating precip...")
        results_precip = map_data.pr.mean(['lat','lon'])
        results_precip.attrs["units"]  = 'mm/mon'
        
        # print("\tCalculating tasmin...")
        results_tasmin = map_data.tasmin.mean(['lat','lon'])
        results_tasmin.attrs["units"]  = 'degC'
        
        # print("\tCalculating tasmax...")
        results_tasmax = map_data.tasmax.mean(['lat','lon'])
        results_tasmax.attrs["units"]  = 'degC'
        
        # print("\tMerging...")
        ds_all= xr.merge([results_precip,results_tasmax,results_tasmin])
        
        # print("\tConverting to pandas dataframe...")
        df = ds_all.to_pandas()

        df.drop('spatial_ref',axis=1, inplace=True)

        df['Year'] = df.index.strftime('%Y')
        df['Month'] = df.index.strftime('%b')
        df['Tave (degC)'] = df[['tasmax','tasmin']].mean(axis=1)
        df.rename({'pr': 'Pr (mm)','tasmax':'Tasmax (degC)','tasmin' : 'Tasmin (degC)'}, axis=1,inplace=True)
        
        df_r = df.iloc[:,[3,4,0,1,2,5]]
        df_n = df_r.reset_index()
        df_n.drop('time' , axis=1, inplace=True)
        
        output_filename = get_output_file_name_monthly(model_params, id_region)
        results_dict[output_filename] = df_n
        
        # Weighted results
        weighting_factor = v['weighting_factor']
        
        df_weighted = df_n.copy(deep=True)
        df_weighted['Pr (mm) Weighted'] = df_weighted['Pr (mm)'] * weighting_factor
        df_weighted['Tasmax (degC) Weighted'] = df_weighted['Tasmax (degC)'] * weighting_factor
        df_weighted['Tasmin (degC) Weighted'] = df_weighted['Tasmin (degC)'] * weighting_factor
        df_weighted['Tave (degC) Weighted'] = df_weighted['Tave (degC)'] * weighting_factor
        
        if "dates" not in weighted_results_dict:
            df_dates = df_weighted[['Year', 'Month']]
            weighted_results_dict["dates"] = df_dates
        
        df_weighted = df_weighted.drop(['Year', 'Month', 'Pr (mm)', 'Tasmax (degC)', 'Tasmin (degC)', 'Tave (degC)'], axis=1)
        weighted_results_dict[id_region]=df_weighted
        
    break
print(datetime.datetime.now())    
    

In [None]:
df = results_dict['ACCESS-CM2_historical_r1i1p1f1_1.csv']

In [None]:
len(results_dict)

In [None]:
len(weighted_results_dict)

In [None]:
weighted_df_list = []
for k, v in weighted_results_dict.items():
    weighted_df_list.append(v)

In [None]:
from functools import reduce

In [None]:
weighted_result_df = reduce(lambda a, b: a.add(b, fill_value=0), weighted_df_list)

In [None]:
weighted_result_df

In [None]:
for k, v in results_dict.items():
    print(k)

In [None]:
# This works
# text_stream = StringIO();
# df_n.to_csv(text_stream, index=False)

zip_path = os.path.join(output_folder, "test4.zip")

#output_filename = get_output_file_name_monthly(model_params,id_subbasin)

with zipfile.ZipFile(zip_path, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
    for k, v in results_dict.items():
        text_stream = StringIO();
        v.to_csv(text_stream, index=False)
        
        zf.writestr(k, text_stream.getvalue())