In [None]:
###############################################
### TO RUN ON MICROSOFT PLANETARY COMPUTER ####
###############################################

In [27]:
import planetary_computer
import pystac_client

import numpy as np
import xarray as xr

import getpass
import azure.storage.blob
import zarr

### Preliminaries

In [45]:
# Historical period = 1980 - 2014
years = [str(year) for year in range(1980, 2015)]
months = [str(mnth).zfill(2) for mnth in range(1, 13)]

make_datetime = lambda year, month: year + '-' + month

datetimes = [make_datetime(year, month) for year in years for month in months]

In [48]:
# catalog
catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1/"
)

In [87]:
#########
# Dask
#########
import dask_gateway
gateway = dask_gateway.Gateway()

# cluster options
cluster_options = gateway.cluster_options()
cluster_options["worker_memory"] = 16
cluster_options["worker_cores"] = 1

# start cluster
cluster = gateway.new_cluster(cluster_options)
client = cluster.get_client()
cluster.scale(20)

# dashboard link
print(cluster.dashboard_link)

https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.7c4d462a034c43f78b0fc0ce190a6f6f/status


# Temperature

In [82]:
# Final dataset
ds_final = []

# Loop through months
for datetime in datetimes:
    # Search
    search = catalog.search(collections=["era5-pds"], 
                            query={"era5:kind": {"eq": "an"}},
                            datetime=datetime)

    item = search.get_all_items()[0]
    signed_item = planetary_computer.sign(item)
    asset = signed_item.assets['air_temperature_at_2_metres']
    
    # Open file
    ds = xr.open_dataset(asset.href, **asset.extra_fields["xarray:open_kwargs"])

    # Get tasavg, tasmax, tasmin
    ds_tmax = ds.resample(time='1D').max().rename({'air_temperature_at_2_metres':'tasmax'})
    ds_tmin = ds.resample(time='1D').min().rename({'air_temperature_at_2_metres':'tasmin'})
    ds_tas = ds.resample(time='1D').mean().rename({'air_temperature_at_2_metres':'tasavg'})

    # Combine
    ds = xr.combine_by_coords([ds_tas, ds_tmax, ds_tmin], join="exact")
    
    # Append
    ds_final.append(ds)

# Combine all
ds_final = xr.concat(ds_final, dim='time')

In [91]:
# rechunk for quantile calculations
ds_final = ds_final.chunk({'time':-1, 'lat':50, 'lon':50})

In [None]:
%%time
# Get 95th, 99th quantiles
ds_q95 = ds_final.quantile(q=0.95, dim='time').compute()
ds_q99 = ds_final.quantile(q=0.95, dim='time').compute()