In [52]:
import numpy as np
import xarray as xr
import stackstac
import pystac_client
import planetary_computer
import rasterio as rio
import rioxarray as rxr
from rioxarray.merge import merge_arrays
from urllib.request import urlretrieve
from pyproj import Proj, transform
from os.path import basename, exists, expanduser, join
import geopandas as gpd
from shapely.geometry import shape

from dask.distributed import Client

In [2]:
client = Client()  # This starts the Dask client
print(client.dashboard_link)

/user/gbrencher/proxy/8787/status


In [54]:
aoi = {
    "type": "Polygon",
    "coordinates": [
        [
            [-122.27508544921875, 47.54687159892238],
            [-121.96128845214844, 47.54687159892238],
            [-121.96128845214844, 47.745787772920934],
            [-122.27508544921875, 47.745787772920934],
            [-122.27508544921875, 47.54687159892238],
        ]
    ],
}

aoi_gpd = gpd.GeoDataFrame({'geometry':[shape(aoi)]})

In [9]:
snowon_date_range = "2024-03-01/2024-04-01"
snowoff_date_range = "2023-09-01/2023-10-01"

In [5]:
stac = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1",
    modifier=planetary_computer.sign_inplace,
)

## grab S1 data

In [7]:
search = stac.search(
    intersects=aoi,
    datetime=snowon_date_range,
    collections=["sentinel-1-rtc"],
)

items = search.item_collection()
print(len(items))

snowon_s1_ds = (
    stackstac.stack(
        items,
        resolution=50
    )
    .where(lambda x: x > 0, other=np.nan)  # sentinel-1 uses 0 as nodata
)

# limit to morning (ascending orbit) acquisitons
snowon_s1_ds = snowon_s1_ds.where(snowon_s1_ds.time.dt.hour > 11, drop=True)

# band dimension to data variable
snowon_s1_ds = snowon_s1_ds.to_dataset(dim='band')

# clip to aoi
snowon_s1_ds = snowon_s1_ds.rio.clip_box(*aoi_gpd.total_bounds,crs=snowon_s1_ds.rio.crs) 

9


  times = pd.to_datetime(


In [10]:
search = stac.search(
    intersects=aoi,
    datetime=snowoff_date_range,
    collections=["sentinel-1-rtc"],
)

items = search.item_collection()
print(len(items))

snowoff_s1_ds = (
    stackstac.stack(
        items,
        resolution=50
    )
    .where(lambda x: x > 0, other=np.nan)  # sentinel-1 uses 0 as nodata
)

# limit to morning (ascending orbit) acquisitons
snowoff_s1_ds = snowoff_s1_ds.where(snowoff_s1_ds.time.dt.hour > 11, drop=True)

# band dimension to data variable
snowoff_s1_ds = snowoff_s1_ds.to_dataset(dim='band')

# clip to aoi
snowoff_s1_ds = snowoff_s1_ds.rio.clip_box(*aoi_gpd.total_bounds,crs=snowon_s1_ds.rio.crs) 

11


  times = pd.to_datetime(


In [11]:
# calculate medians
# set chunks
snowon_s1_ds = snowon_s1_ds.chunk({"x": 1024, "y": 1024, "time": -1})
snowoff_s1_ds = snowoff_s1_ds.chunk({"x": 1024, "y": 1024, "time": -1})

# compute median
snowon_s1_ds = snowon_s1_ds.median(dim='time').squeeze()
snowoff_s1_ds = snowoff_s1_ds.median(dim='time').squeeze()

In [12]:
# rename variables
snowon_s1_ds = snowon_s1_ds.rename({
    'vv': 'snowon_vv',
    'vh': 'snowon_vh'
})

snowoff_s1_ds = snowoff_s1_ds.rename({
    'vv': 'snowoff_vv',
    'vh': 'snowoff_vh'
})

## grab s2 data

In [13]:
search = stac.search(
    intersects=aoi,
    datetime=snowon_date_range,
    collections=["sentinel-2-l2a"],
    query={"eo:cloud_cover": {"lt": 25}},
)

items = search.item_collection()
print(len(items))

s2_ds = (
    stackstac.stack(
        items,
        resolution=50,
    )
    .where(lambda x: x > 0, other=np.nan)  # sentinel-2 uses 0 as nodata
)

# Convert the 'band' coordinate to data variables
s2_ds = s2_ds.to_dataset(dim='band')

# clip to aoi
s2_ds = s2_ds.rio.clip_box(*aoi_gpd.total_bounds,crs=snowon_s1_ds.rio.crs) 

3


  times = pd.to_datetime(


In [14]:
# calculate medians
# set chunks
s2_ds = s2_ds.chunk({"x": 1024, "y": 1024, "time": -1})

# compute median
s2_ds = s2_ds.median(dim='time').squeeze()

## grab cop30 data

In [38]:
search = stac.search(
    collections=["cop-dem-glo-30"],
    intersects=aoi
)

items = search.item_collection()
print(len(items))
    
data = []
for item in items:
    dem_path = planetary_computer.sign(item.assets['data']).href
    data.append(rxr.open_rasterio(dem_path))
cop30_da = merge_arrays(data)
cop30_ds = cop30_da.rename('elevation').squeeze().to_dataset()

# clip to aoi
cop30_ds = cop30_ds.rio.clip_box(*aoi_gpd.total_bounds,crs=snowon_s1_ds.rio.crs) 

2


## grab fcf data

In [40]:
def url_download(url, out_fp, overwrite = False):
    # check if file already exists
    if not exists(out_fp) or overwrite == True:
            urlretrieve(url, out_fp)
    # if already exists. skip download.
    else:
        print('file already exists, skipping')

In [41]:
def download_fcf(out_fp):
    # this is the url from Lievens et al. 2021 paper
    fcf_url = 'https://zenodo.org/record/3939050/files/PROBAV_LC100_global_v3.0.1_2019-nrt_Tree-CoverFraction-layer_EPSG-4326.tif'
    # download just forest cover fraction to out file
    url_download(fcf_url, out_fp)

In [44]:
fcf_path ='/tmp/fcf_global.tif'
download_fcf(fcf_path)

In [61]:
# open as dataArray and return
fcf_ds = rxr.open_rasterio(fcf_path)

# clip to aoi
fcf_ds = fcf_ds.rio.clip_box(*aoi_gpd.total_bounds,crs=snowon_s1_ds.rio.crs) 

# promote to dataset
fcf_ds = fcf_ds.rename('fcf').squeeze().to_dataset()

## deal with weird values above 100!!

## combine datasets

In [None]:
# reproject to match snowon s1
snowoff_s1_ds = snowoff_s1_ds.rio.reproject_match(snowon_s1_ds)
s2_ds = s2_ds.rio.reproject_match(snowon_s1_ds)
cop30_da = cop30_da.rio.reproject_match(snowon_s1_ds)
fcf_ds = fcf_ds.rio.reproject_match(snowon_s1_ds)

In [None]:
ds_list = [snowon_s1_ds, snowoff_s1_ds, s2_ds, cop30_da, fcf_ds]
ds = xr.merge(ds_list, compat='override', join='override').squeeze()

## calculate additional data variables

In [6]:
# calculate cross ratios
def db_scale(x, epsilon=1e-10):
    # Add epsilon only where x is zero
    x_with_epsilon = np.where(x==0, epsilon, x)
    # Calculate the logarithm
    log_x = 10 * np.log10(x_with_epsilon)
    # Set the areas where x was originally zero back to zero
    log_x[x==0] = 0
    return log_x

In [40]:
# radar data variables
# convert to decibels
ds['snowon_vv'] = db_scale(ds['snowon_vv'])
ds['snowon_vh'] = db_scale(ds['snowon_vh'])
ds['snowoff_vv'] = db_scale(ds['snowoff_vv'])
ds['snowoff_vh'] = db_scale(ds['snowoff_vh'])

# calculate variables
ds['snowon_cr'] = ds['snowon_vh'] - ds['snowon_vv']
ds['snowoff_cr'] = ds['snowoff_vh'] - ds['snowoff_vv']
ds['delta_cr'] = ds['snowon_cr'] - ds['snowoff_cr']

NameError: name 'db_scale' is not defined

In [None]:
# s2 band indices
ds['ndvi'] = (ds['B08'] - ds['B04'])/(ds['B08'] + ds['B04'])
ds['ndsi'] = (ds['B03'] - ds['B11'])/(ds['B03'] + ds['B11'])
ds['ndwi'] = (ds['B03'] - ds['B08'])/(ds['B03'] + ds['B08'])

In [None]:
# latitude, longitude
# define projections
utm_proj = Proj(proj='utm', zone='10', ellps='WGS84') ## NOTE hardcoded utm for now, adjust before use
wgs84_proj = Proj(proj='latlong', datum='WGS84')

x, y = np.meshgrid(ds['x'].values, ds['y'].values)
lon, lat = transform(utm_proj, wgs84_proj, x, y)
ds['latitude'] = (('y', 'x'), lat)
ds['longitude'] = (('y', 'x'), lon)

In [None]:
# dowy
def calc_dowy(doy):
    'calculate day of water year from day of year'
    if doy < 274:
        dowy = doy + (365-274)
    elif doy >= 274:
        dowy = doy-274
    return dowy

## NOTE think about date and fix this
dowy_1d = calc_dowy(pd.to_datetime(fn.split('_')[4]).dayofyear)
dowy = torch.full_like(aso_sd, dowy_1d)

## write out to zarr file

In [None]:
ds.to_zarr(f'{data_path}/combined/stack_20230725_20230925.zarr')

In [None]:
client.close()