In [253]:
! pip install xclim

import xesmf
import xarray as xr
import numpy as np
from science_validation_manual import *
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import rhg_compute_tools.kubernetes as rhgk

client, cluster = rhgk.get_micro_cluster()

cluster.scale(100)
cluster

client.restart()
client.close()
cluster.close()

# Playing with our raw ERA-5 data and regridding

def lon360(lon180):
    return ((360 + (lon180 % 360)) % 360)

def lon180(lon360):
    if (lon360 > 180.0):
        return lon360 - 360.0
    else:
        return lon360

# load ERA-5 versions 
# ERA-5 "coarse" and "fine" (both at 1/4 degree) used in downscaling
pr_coarse_ref = read_gcs_zarr('gs://scratch-170cd6ec/91da8e47-b396-4360-b397-ece89f1b777e/e2e-miroc6-pr-8rn7f-2846959676/rechunked.zarr')
pr_fine_ref = read_gcs_zarr('gs://scratch-170cd6ec/91da8e47-b396-4360-b397-ece89f1b777e/e2e-miroc6-pr-8rn7f-587431548/rechunked.zarr')
# ERA-5 at regular Gaussian resolution, "cleaned" by renaming variable/dims 
pr_cleaned_ref = read_gcs_zarr('gs://clean-b1dbca25/reanalysis/ERA-5/F320/pr.1995-2015.F320.zarr')
# ERA-5 at regular Gaussian resolution
pr_raw_ref = read_gcs_zarr('gs://impactlab-data/climate/source_data/ERA-5/downscaling/pr.1994-2015.F320.v5.zarr')
# define Seattle lat/lon 
target_lat = 47.608013
target_lon = -122.335167 

# now get Seattle timeseries from each of these ERA-5 versions.
# two of the datasets have [0,360] ranging longitude, need to convert. 
pr_seattle_pipeline = pr_cleaned_ref['pr'].sel(lon=lon360(target_lon), lat=target_lat, method="nearest").load()
pr_seattle_pipeline_coarse = pr_coarse_ref['pr'].sel(lon=target_lon, lat=target_lat, method="nearest").load()
pr_seattle_pipeline_fine = pr_fine_ref['pr'].sel(lon=target_lon, lat=target_lat, method="nearest").load()
pr_seattle_pipeline_raw = pr_raw_ref['tp'].sel(longitude=lon360(target_lon), latitude=target_lat, method="nearest").load()

plt.figure(figsize=(14, 4))
pr_seattle_pipeline.groupby('time.year').sum().plot(label='our pipeline, cleaned')
pr_seattle_pipeline_raw.groupby('time.year').sum().plot(label='our pipeline, raw', linestyle=':')
pr_seattle_pipeline_coarse.groupby('time.year').sum().plot(label='our pipeline, coarse')
pr_seattle_pipeline_fine.groupby('time.year').sum().plot(label='our pipeline, fine')
plt.legend(bbox_to_anchor=(1.1, 1.05))
plt.ylabel('precip (mm)')
plt.title('Seattle annual precip, longitude corrected for raw & cleaned')

dict_of_da = {'cleaned':pr_seattle_pipeline, 
              'regridded_coarse':pr_seattle_pipeline_coarse,
              'regridded_fine':pr_seattle_pipeline_fine}

for key, da in dict_of_da.items():
    print(key)
    res = {
        'zeros_pct': da.where(da>=0.254).isnull().sum().values.item()/len(da.time.values),
        'min': da.min().compute().values.item(),
        'q1': da.quantile(0.01).compute().values.item(),
        'q10': da.quantile(0.10).compute().values.item(),
        'q50': da.quantile(0.5).compute().values.item(),
        'q90': da.quantile(0.10).compute().values.item(),
        'q99': da.quantile(0.99).compute().values.item(),
        'max': da.max().compute().values.item(),
        'mean': da.mean().compute().values.item(),
        'year_sum_min' : da.groupby('time.year').sum().min().compute().values.item(),
        'year_sum_mean' : da.groupby('time.year').sum().mean().compute().values.item(),
        'year_sum_max' : da.groupby('time.year').sum().max().compute().values.item()
    }
    for r in res:
        res[r] = round(res[r], 3)
    print(res)

import seaborn
pl = seaborn.displot(dict_of_da, bins=500)
pl.set(xlim=(0,10))

pl = seaborn.displot(dict_of_da, kind='kde')
pl.set(xlim=(0,30))