In [None]:
import csv
import json
import os
import tempfile
import zipfile
import pandas as pd
import intake
from shapely.geometry import Point, Polygon
import geopandas as gpd
import numpy as np
import xarray as xr
import dask
import panel as pn
pn.extension()

xr.set_options(keep_attrs=True)
dask.config.set({"array.slicing.split_large_chunks": True})


Load dask Area for faster computing.  Note, this will take awhile but in long run processing should be faster when compute is called.

In [None]:
from dask.distributed import progress
from dask.distributed import Client
from climakitae.cluster import Cluster
cluster = Cluster()
cluster.adapt(minimum=0, maximum=16)
client = cluster.get_client()


Get client link.

In [None]:
client

In [None]:
# VARIABLES
#Use these cordinates to clip around the watershed of interest.
latitude = [34.775317,42.432494]
longitude = [-123.097421,-117.980799]
run_list_path = "data/GCM_Run_List_Test.csv"
esm_datastore = "https://cadcat.s3.amazonaws.com/cae-collection.json"
output_folder = "outputs"
mask_path = "mask/mask.npy"

In [None]:
def get_model_params(run_list_path):
    """Read each set of model parameters into dictionary from csv file.
    
    Return list of dictionaries.
    """
    model_params = []
    with open(run_list_path, "r") as src:
        d = csv.DictReader(src)
        for row in d:
            model_params.append(row)
    return model_params

In [None]:
def get_dataset(esm_datastore, model_params):
    """Return xarray.DataSet from model parameters."""
    # Open catalog of available data sets using intake-esm package
    cat = intake.open_esm_datastore(esm_datastore)
    cat_item = cat.search(
        activity_id=model_params["activity_id"],
        institution_id=model_params["institution_id"],
        table_id=model_params["table_id"], 
        variable_id=['pr','tasmax','tasmin'],
        experiment_id=model_params["experiment_id"],
        grid_label=model_params["grid_label"],
        member_id=model_params["member_id"],
        source_id=model_params["source_id"],  
    )
    # Add catalog item to dataset dict
    data_dict = cat_item.to_dataset_dict(
        #xarray_open_kwargs={'consolidated': True},
        storage_options={'anon': True}
    )
    # Construct dataset key to retrieve from the dictionary
    key = "{}.{}.{}.{}.{}.{}".format(
            model_params['activity_id'],
            model_params['institution_id'],
            model_params['source_id'],
            model_params['experiment_id'],
            model_params['table_id'],
            model_params['grid_label'],)
    
    # Slice the dataset to the input time window.
    
    ds = slice_by_time_years_dataset(data_dict[key],model_params['start_year'],model_params['end_year'])
    ds = convert_daily_to_monthly_dataset(ds)
    # Trim trim down to cordinates.
    #ds = trim_to_lat_lon_dataset(ds)
    return ds

In [None]:
def add_mask_to_dataset(ds):
    # attach the mask
    with open(mask_path, 'rb') as f:
        mask = np.load(f, allow_pickle=True)

    ds.coords['mask'] = (('lat', 'lon'), mask)
    ds = trim_to_lat_lon_dataset(ds)
    return ds

In [None]:
def convert_daily_to_monthly_dataset(ds):
    #Convert our daily values to monthly.  Precip is the accumulated and temperature is the average.
     #86400 x kg/m2/s = daily value (mm)  Check this!!!!
    ds['pr'] = ds.pr * 86400
    ds.pr.attrs["units"] = 'mm/day' 
    ds_precip = ds['pr'].resample(time="M").sum()
    ds_precip.attrs["units"] = 'mm/mon' 
    ds_temp = ds[['tasmin','tasmax']].resample(time="M").mean()
    
    #Change the temp to C
    ds_temp = ds_temp[['tasmin','tasmax']] - 273.15
    ds_temp.tasmin.attrs["units"]  = 'degC'
    ds_temp.tasmax.attrs["units"]  = 'degC'
    
    
    #Merge the dataset back into on dataset.
    ds= xr.merge([ds_precip,ds_temp])
    return ds

In [None]:
def slice_by_time_years_dataset(ds,startyear,endyear):
    # Time slice
    ds = ds.sel(
        time=slice(str(startyear), str(endyear))
        )
    return ds

In [None]:
def trim_to_lat_lon_dataset(ds):
    #This needs to be done for the cliping.
    ds.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
    ds.rio.write_crs("EPSG:4326", inplace=True)
    
    #Get the subset of data for watershed.
    ds = ds.rio.clip_box(
        minx=longitude[0],
        miny=latitude[0],
        maxx=longitude[1],
        maxy=latitude[1],
    )
    return ds

In [None]:
def get_output_file_name_monthly(model_params,end_part):
    return '%s_%s_%s_%s.csv'%(model_params['source_id'],model_params['experiment_id'],model_params['member_id'],end_part)

In [None]:
def get_load_dataset_with_mask(esm_datastore_in, model_params_in):
#Loads current dataset
    ds = get_dataset(esm_datastore_in, model_params_in)
    ds = add_mask_to_dataset(ds)
    ds = ds.compute()
    return ds
    

In [None]:
def process_one(esm_datastore_in, model_params_in):
    ds = get_load_dataset_with_mask(esm_datastore_in, model_params_in)
    #for each subasin:
        #Process outputs without wieghts and save to CSV.
        #Process 30 year rolling average and save to CSV.
    #Process with weights. This should be 1 value for all subasins.
    #Process 30 year rolling average. 
        

In [None]:
all_model_params = get_model_params(run_list_path)
model_params = all_model_params[0]
print(model_params)

In [None]:
ds = get_dataset(esm_datastore, model_params)

In [None]:
ds.coords

In [None]:
ds.attrs["units"]

Add mask to data set.  Mask is applied here and resulting dataset is clipped to box.

In [None]:
ds = add_mask_to_dataset(ds)

Load the dataset here.  This takes awhile.

In [None]:
ds = ds.compute()

In [None]:
ds.mask

In [None]:
%matplotlib inline

In [None]:
# map_data = ds['pr'].where(ds.mask != -1) # 14 Tulare
# map_data.plot() 

In [None]:
ds.where(ds.mask != -1)

In [None]:
#map_data = ds['pr'].where(ds.mask == 17)

In [None]:
id_subbasin = -1

map_data = ds.where(ds.mask != id_subbasin)
#map_data = ds.where(ds.mask == id_subbasin)
map_data

Grant, Can we pull this from the shape file or do we need add seperate csv here?  We also need the weight for each subasin.

In [None]:
basin_dict = {
    17: "UpperYuba",
    18: "Test",
}

In [None]:
oids = basin_dict.keys()
oids

In [None]:
#for oid in oids:
    # mask the data
    # get our number
#    result[oid] = precip_value

In [None]:
results = {}

In [None]:
map_data['pr'][0].plot() 

In [None]:
map_data['tasmax'][20].plot() 

In [None]:
map_data['tasmin'][20].plot() 

In [None]:
#results_precip = mask_test.pr.mean(['lat','lon'])
results_precip = map_data.pr.mean(['lat','lon'])
results_precip.attrs["units"]  = 'mm/mon'

In [None]:
results_tasmin = map_data.tasmin.mean(['lat','lon'])
results_tasmin.attrs["units"]  = 'degC'

In [None]:
results_tasmax = map_data.tasmax.mean(['lat','lon'])
results_tasmax.attrs["units"]  = 'degC'

In [None]:
ds_all= xr.merge([results_precip,results_tasmax,results_tasmin])

In [None]:
ds_all

In [None]:
Format the output.

In [None]:
import pandas
df = ds_all.to_pandas()

df.drop('spatial_ref',axis=1, inplace=True)
df.axes

Format output to what we want.

In [None]:
df['Year'] = df.index.strftime('%Y')
df['Month'] = df.index.strftime('%b')
df['Tave (degC)'] = df[['tasmax','tasmin']].mean(axis=1)
df.rename({'pr': 'Pr (mm)','tasmax':'Tasmin (degC)','tasmin' : 'Tasmin (degC)'}, axis=1,inplace=True)

In [None]:
df_r = df.iloc[:,[3,4,0,1,2,5]]
df_n = df_r.reset_index()
df_n.drop('time' , axis=1, inplace=True)

In [None]:
df_n

In [None]:
csv_Path_Test = os.path.join(output_folder,get_output_file_name_monthly(model_params,id_subbasin)) #os.path.join(output_folder, "test2.csv")

Grant: This was just a test.  Can you find a way to store in memory instead CSV file and write it the zip file?  Also, for some reason the index is being written out to the CSV.

In [None]:
df_n.to_csv(csv_Path_Test,index=False)
#lst = df_n.values.tolist()
#lst

In [None]:
client.close()

In [None]:
zip_path = os.path.join(output_folder, "test.zip")

In [None]:
data1 = "test\ntest"
data2 = "test\ntest"
csv_data = [
    ("scenario1.csv", data1),
    ("scenario2.csv", data2)
]

with zipfile.ZipFile(zip_path, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
    for filename, data in csv_data:
        zf.writestr(filename, data)