In [None]:
import xarray as xr
import numpy as np
import os
from tqdm import tqdm
import gcsfs

In [None]:
def get_mask(mask_path, labels):
    pix_deg_flow = 1 / 1200
    das = []
    lat0, lat1, lon0, lon1 = -np.inf, np.inf, np.inf, -np.inf
    for label in tqdm(labels):
        ds = xr.open_zarr(gcsfs.GCSMap(f'{mask_path}/{label}'))
        da = ds['mask'].compute()
        das.append(da)
        lat0 = max(lat0, da.lat.values[0])
        lat1 = min(lat1, da.lat.values[-1])
        lon0 = min(lon0, da.lon.values[0])
        lon1 = max(lon1, da.lon.values[-1])
    nlat = int(round((lat0 - lat1) / pix_deg_flow + 1))
    nlon = int(round((lon1 - lon0) / pix_deg_flow + 1))
    tolerance = pix_deg_flow / 10
    lat = np.arange(lat0, lat1-tolerance, -pix_deg_flow)
    lon = np.arange(lon0, lon1+tolerance, pix_deg_flow)
    a = np.zeros((nlat, nlon), dtype=np.uint8)
    for da in das:
        dlat = int(round((lat0 - da.lat.values[0]) / pix_deg_flow))
        dlon = int(round((da.lon.values[0] - lon0) / pix_deg_flow))
        a[dlat:dlat+da.shape[0], dlon:dlon+da.shape[1]] += da.values
    da = xr.DataArray(a, coords=[lat, lon], dims=['lat', 'lon'])
    return da

In [None]:
mask_path = 'pangeo-data/gross/ws_mask/amazonas'
fs = gcsfs.GCSFileSystem(project='pangeo-data')
labels = [os.path.basename(path[:-1]) for path in fs.ls(mask_path)]
len(labels)

In [None]:
da = get_mask(mask_path, labels)