In [None]:
import gcsfs
import os
import xarray as xr
from dask.distributed import Client

In [None]:
is_pangeo_data = False # True if in Pangeo binder, False if in laptop
if is_pangeo_data:
    from dask_kubernetes import KubeCluster as Cluster
    n_workers = 10
else:
    from dask.distributed import LocalCluster as Cluster
    n_workers = 4

In [None]:
cluster = Cluster(n_workers=n_workers)
client = Client(cluster)
cluster

In [None]:
%run ../python/misc.py

In [None]:
if is_pangeo_data:
    mask_path = 'gs://pangeo-data/gross/ws_mask/amazonas'
    fs = gcsfs.GCSFileSystem(project='pangeo-data')
    all_labels = [os.path.basename(path[:-1]) for path in fs.ls('pangeo-data/gross/ws_mask/amazonas') if os.path.basename(path[:-1]).startswith('0')]
else:
    mask_path = 'ws_mask/amazonas'
    all_labels = [fname for fname in os.listdir('ws_mask/amazonas') if fname.startswith('0')]

In [None]:
da_trmm_mask = get_trmm_masks(mask_path, all_labels).astype('float32').chunk({'label': 10})
da_trmm_mask.to_dataset(name='mask').to_zarr('ws_mask/amazonas/trmm_mask')

In [None]:
da_gpm_mask = get_gpm_masks(mask_path, all_labels).astype('float32').chunk({'label': 10})
da_gpm_mask.to_dataset(name='mask').to_zarr('ws_mask/amazonas/gpm_mask')

In [None]:
da_pet_mask = get_pet_masks(mask_path, all_labels).astype('float32').chunk({'label': 1})
da_pet_mask.to_dataset(name='mask').to_zarr('ws_mask/amazonas/pet_mask')

In [None]:
# copy to GCS
!gsutil -m cp -r ws_mask/amazonas/trmm_mask gs://pangeo-data/gross/ws_mask/amazonas
!gsutil -m cp -r ws_mask/amazonas/gpm_mask gs://pangeo-data/gross/ws_mask/amazonas
!gsutil -m cp -r ws_mask/amazonas/pet_mask gs://pangeo-data/gross/ws_mask/amazonas

In [None]:
# this should be run with is_pangeo_data=1
if is_pangeo_data:
    trmm_mask_path = 'gs://pangeo-data/gross/ws_mask/amazonas/trmm_mask'
    gpm_mask_path = 'gs://pangeo-data/gross/ws_mask/amazonas/gpm_mask'
    pet_mask_path = 'gs://pangeo-data/gross/ws_mask/amazonas/pet_mask'
else:
    trmm_mask_path = 'ws_mask/amazonas/trmm_mask'
    gpm_mask_path = 'ws_mask/amazonas/gpm_mask'
    pet_mask_path = 'ws_mask/amazonas/pet_mask'
da_trmm_mask = xr.open_zarr(get_path(trmm_mask_path))['mask']
da_gpm_mask = xr.open_zarr(get_path(gpm_mask_path))['mask']
da_pet_mask = xr.open_zarr(get_path(pet_mask_path))['mask']
d0, d1 = '2000-03-01 12:00:00', '2018-12-31'
get_precipitation(d0, d1, da_trmm_mask, da_gpm_mask, 'ws_precipitation/amazonas')

In [None]:
pet = get_pet(d0, d1, da_pet_mask, 'ws_pet/amazonas')

In [None]:
# copy to GCS
!gsutil -m cp -r ws_precipitation gs://pangeo-data/gross/
!gsutil -m cp -r ws_pet gs://pangeo-data/gross/