In [1]:
import xarray as xr
import hvplot.xarray
import hvplot.pandas
import numpy as np
import pandas as pd
import holoviews as hv
import geoviews as gv
import matplotlib as mpl


import geopandas as gpd
from pathlib import Path

import warnings

warnings.filterwarnings('ignore')

hv.extension('bokeh')

## Select the reservoir

In [2]:
SAVE = True
RESERVOIR = '0810'
ALG_VERSION = 'v0.2' # calculate overall estimate of elevation for reservoir, discard rest
# ALG_VERSION = 'v0.1.1' # remove temporal resampling

bad_geom_passes = {
    '0936': ['468', '317', '289', '11,190', '11,468', '11,317'],
    '0810': ['90'],
    '0518': ['51'],
    '1078': ['511', '205,218', '218', '496,511', '511'],
    '0503': ['79'],
    '0930': ['11,190'],
    '1097': ['121'],
    '1388': ['453', '453,466', '466'],
    '1426': ['425'],
    '0529': ['398'],
    '0931': ['11,190', '11']
}

RESULTS_DIR = Path(f'../results/')
DATA_DIR = Path(f'../data/')

In [3]:
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/100-validation-reservoirs-grand-polys.geojson'))

selected_reservoirs = val_pts['tmsos_id'].tolist()
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name']

RESERVOIR_NAME = res_names[RESERVOIR]
print(f'{RESERVOIR}: {RESERVOIR_NAME}')

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    ) * val_res_pt[val_res_pt['tmsos_id'] == RESERVOIR].hvplot(
        geo=True, color='red', size=100, 
    )
).opts(
    title=f"Locations of validation reservoirs. {RESERVOIR_NAME}, highlighted in red"
)

global_map

0810: Noi, Th


In [4]:
# conv_hull_df = val_res_poly.copy()
# conv_hull_df['geometry'] = conv_hull_df['geometry'].convex_hull
# conv_hull_df.to_file('../data/validation-locations/earthdata-convhull-res-poly.geojson')

In [5]:
(val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR].hvplot(
    geo=True, tiles='OSM', shared_axes=False
)).opts(title=f"{RESERVOIR_NAME}")

In [6]:
BUFFER_M = 800 # m

utm_crs = val_polys[val_polys['tmsos_id'] == RESERVOIR].estimate_utm_crs()
print(f"UTM area of use: \n{utm_crs.area_of_use}")

buffered_roi_utm = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR].to_crs(utm_crs).buffer(BUFFER_M).to_crs('epsg:4326')
buffered_roi = buffered_roi_utm.to_crs('epsg:4326')

UTM area of use: 
- name: Between 102°E and 108°E, northern hemisphere between equator and 84°N, onshore and offshore. Cambodia. China. Indonesia. Laos. Malaysia - West Malaysia. Mongolia. Russian Federation. Singapore. Thailand. Vietnam.
- bounds: (102.0, 0.0, 108.0, 84.0)


In [7]:
gd_track_fn = Path('../data/swot_orbit/swot_orbit.geojson')
gd_track = gpd.read_file(gd_track_fn)
gd_track

Unnamed: 0,ID_PASS,START_TIME,geometry
0,1,Day 01 00:00:00,"MULTIPOLYGON (((-75.4587 -77.45455, -75.73434 ..."
1,2,Day 01 00:51:30,"MULTIPOLYGON (((84.56768 77.75119, 84.6008 78...."
2,2,Day 01 00:51:30,"MULTIPOLYGON (((-180 -52.37417, -180 -50.92787..."
3,3,Day 01 01:42:50,"MULTIPOLYGON (((-110.19573 -77.57156, -110.161..."
4,4,Day 01 02:34:20,"MULTIPOLYGON (((57.70091 77.75254, 57.69643 78..."
...,...,...,...
845,582,Day 21 18:10:40,"MULTIPOLYGON (((136.37126 77.75111, 136.40526 ..."
846,582,Day 21 18:10:40,"MULTIPOLYGON (((-180 72.80747, -180 73.45033, ..."
847,583,Day 21 19:02:00,"MULTIPOLYGON (((-58.39258 -77.57164, -58.3595 ..."
848,584,Day 21 19:53:30,"MULTIPOLYGON (((109.50449 77.75255, 109.5009 7..."


In [8]:
gd_nadir_fn = Path('/tiger1/pdas47/tmsosPP/data/swot_orbit/sph_science_nadir/swot_science_orbit_sept2015-v2_nadir.shp')
gd_nadir = gpd.read_file(gd_nadir_fn)
gd_nadir.head()

Unnamed: 0,ID_PASS,START_TIME,geometry
0,1,Day 01 00:00:00,"LINESTRING (-75.40128 -77.54325, -67.74948 -77..."
1,2,Day 01 00:51:30,"LINESTRING (84.56135 77.66162, 92.5227 77.5155..."
2,2,Day 01 00:51:30,"LINESTRING (-180 -52.65544, -179.4133 -53.7198..."
3,3,Day 01 01:42:50,"LINESTRING (-110.20279 -77.66112, -107.53758 -..."
4,4,Day 01 02:34:20,"LINESTRING (57.70176 77.66297, 60.37721 77.650..."


In [9]:
buffered_roi_gdf = gpd.GeoDataFrame(buffered_roi).rename({0: 'geometry'}, axis=1) # .hvplot(alpha=0.2, lw=2)
buffered_roi_gdf.set_geometry('geometry', inplace=True)

In [10]:
import hvplot.pandas
import geoviews as gv
from holoviews import opts
import matplotlib as mpl
from shapely.geometry import box


gd_track_subset = gd_track[gd_track.intersects(buffered_roi.geometry.values[0])]
gd_nadir_subset = gd_nadir[gd_nadir.ID_PASS.isin(gd_track_subset.ID_PASS)]
gd_nadir_20km = gd_nadir_subset.to_crs(utm_crs).buffer(10000, cap_style='flat').to_crs('epsg:4326')
gd_nadir_20km = gpd.GeoDataFrame(gd_nadir_20km)
gd_nadir_20km = gd_nadir_20km.rename({0: 'geometry'}, axis=1)
gd_nadir_20km.set_geometry('geometry', inplace=True)

roi_gdf = gpd.GeoDataFrame(buffered_roi).rename({0: 'geometry'}, axis=1)
roi_gdf = roi_gdf.set_geometry('geometry')
roi_gdf['name'] = RESERVOIR_NAME
roi_hv = roi_gdf.hvplot(geo=True, color='gray')

plot_lims = buffered_roi.buffer(0.5).bounds
minx = plot_lims.values[0][0]
maxx = plot_lims.values[0][2]
miny = plot_lims.values[0][1]
maxy = plot_lims.values[0][3]

xlim=(minx, maxx)
ylim=(miny, maxy)
view_bounds = box(xlim[0], ylim[0], xlim[1], ylim[1])

cmap = mpl.colormaps['Set1'].resampled(len(gd_track_subset['ID_PASS'].unique()))
gd_track_subset['color'] = [mpl.colors.rgb2hex(cmap(i)) for i in np.linspace(0, 1, len(gd_track_subset.groupby('ID_PASS')))]
tracks_hv = gd_track_subset.clip(view_bounds).hvplot(
    geo=True, tiles='OSM', 
    color=gd_track_subset['color'], 
    alpha=0.5, line_width=2,
    xlim=xlim,
    ylim=ylim,
    tools=['hover'], hover_cols=['ID_PASS']
)

nadir_hv = gd_nadir_20km.hvplot(
    geo=True, alpha=0.5, line_width=2, color='white', 
    # xlim=(int(buffered_roi.total_bounds[0])-1, int(buffered_roi.total_bounds[2])+1),
    # ylim=(int(buffered_roi.total_bounds[1])-1, int(buffered_roi.total_bounds[3])+1),
    xlim = (minx, maxx),
    ylim = (miny, maxy)
)

(tracks_hv * nadir_hv * roi_hv).opts(
    title=f"SWOT Ground Tracks over {RESERVOIR_NAME}", width=400
)



In [11]:
gd_track_subset

Unnamed: 0,ID_PASS,START_TIME,geometry,color
128,90,Day 04 04:18:50,"MULTIPOLYGON (((25.579 77.75051, 25.61941 78.1...",#e41a1c
432,299,Day 11 15:31:10,"MULTIPOLYGON (((19.12978 -77.57326, 19.12453 -...",#ff7f00
838,577,Day 21 13:53:20,"MULTIPOLYGON (((19.50651 -77.57235, 19.53153 -...",#999999


### load data

In [12]:
# load
from pathlib import Path
import pandas as pd

swot_save_dir = Path('/tiger1/pdas47/tmsosPP/data/swot')
swot_save_dir.mkdir(exist_ok=True)

dsses = []
reservoirs = []
available = []
platforms = []
times = []
ds_dict = {reservoir_id: None for reservoir_id in selected_reservoirs}

for reservoir_id in selected_reservoirs:
    fp = swot_save_dir / f'{reservoir_id}.nc'

    if not fp.exists():
        reservoirs.append(reservoir_id)
        dsses.append(None)
        available.append(False)
        platforms.append(None)
        times.append(None)
        continue
    
    ds = xr.open_dataset(fp, chunks='auto', engine='netcdf4', decode_coords='all', )
    dsses.append(ds)
    reservoirs.extend([reservoir_id] * len(ds.time))
    available.extend([True] * len(ds.time))
    platforms.extend(['swot'] * len(ds.time))
    times.extend(ds.time.values)

# ds = xr.concat([ds.assign_coords(coords={"pass_ids": (ds['pass_ids'] if 'pass_ids' in ds.variables else ds['pass_id'])}) for ds in filter(lambda ds: ds is not None, dsses)], dim='time').sortby('time')
# ds_dict[reservoir_id] = ds

df = pd.DataFrame({
    'reservoir': reservoirs,
    'available': available,
    'platform': platforms,
    'time': times,
})

ds_dict = {reservoir_id: ds for reservoir_id, ds in zip(selected_reservoirs, dsses)}
df = pd.merge(df, val_res_poly[['tmsos_id', 'geometry']], left_on='reservoir', right_on='tmsos_id', suffixes=(False, False))
# df = df.sort_values('time')
df.head()

Unnamed: 0,reservoir,available,platform,time,tmsos_id,geometry
0,214,True,swot,2023-08-04,214,"POLYGON ((-4.96212 39.42249, -4.96139 39.42138..."
1,214,True,swot,2023-09-05,214,"POLYGON ((-4.96212 39.42249, -4.96139 39.42138..."
2,214,True,swot,2023-09-15,214,"POLYGON ((-4.96212 39.42249, -4.96139 39.42138..."
3,214,True,swot,2023-10-06,214,"POLYGON ((-4.96212 39.42249, -4.96139 39.42138..."
4,214,True,swot,2023-10-17,214,"POLYGON ((-4.96212 39.42249, -4.96139 39.42138..."


In [13]:
# import numpy as np
# import matplotlib.pyplot as plt

# fig, ax = plt.subplots(figsize=(15, 4))

# yticks = []
# ytick_labels = []
# for i, reservoir_id in enumerate(df.groupby('reservoir').groups):
#     data = df.loc[df['reservoir'] == reservoir_id]['time'].reset_index(drop=True)
#     name = res_names[reservoir_id]
#     ax.eventplot(data, orientation='horizontal', linelengths=0.5, lineoffsets=i)
#     yticks.append(i)
#     ytick_labels.append(f"{reservoir_id}: {name}")

# ax.set_yticks(ticks=yticks, labels=ytick_labels)
# ax.set_ylabel('Reservoir ID')
# ax.set_xlabel('Time')

# ax.set_title('SWOT Data downloaded for selected reservoirs')

## plot data for a single reservoir

In [14]:
ds = ds_dict[RESERVOIR]
ds

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 20.49 MiB 20.49 MiB Shape (52, 492, 210) (52, 492, 210) Dask graph 1 chunks in 2 graph layers Data type float32 numpy.ndarray",210  492  52,

Unnamed: 0,Array,Chunk
Bytes,20.49 MiB,20.49 MiB
Shape,"(52, 492, 210)","(52, 492, 210)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 624 B 624 B Shape (52,) (52,) Dask graph 1 chunks in 2 graph layers Data type",52  1,

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [15]:
ds.pass_ids.values

array(['299', '299', '577', '90', '299', '577', '299', '577', '90', '299',
       '577', '90', '299', '577', '90', '299', '577', '90', '577', '90',
       '299', '577', '90', '299', '577', '90', '299', '577', '90', '299',
       '577', '90', '299', '577', '90', '299', '577', '299', '577', '90',
       '299', '577', '90', '299', '577', '90', '299', '90', '299', '577',
       '90', '299'], dtype='<U3')

In [16]:
# import holoviews as hv
# import hvplot.xarray

# ds['wse'].hvplot(
#     x='x', y='y', rasterize=True, aspect='equal', # , geo=True, # crs=ds.rio.crs
# ).opts(
#     title='Water Surface Elevation (m)'
# ) + ds['wse_qual'].hvplot(
#     x='x', y='y', rasterize=True, aspect='equal', cmap='RdYlBu_r' #  geo=True, crs=ds.rio.crs
# ).opts(
#     title='Water Surface Elevation Quality'
# )

In [17]:
# import holoviews as hv

# ds['water_frac'].hvplot(
#     x='x', y='y', rasterize=True, aspect='equal', clim=(0, 1) # , geo=True, # crs=ds.rio.crs
# ).opts(
#     title='Water Fraction'
# ) + ds['water_frac_uncert'].hvplot(
#     x='x', y='y', rasterize=True, clim=(0, 1), aspect='equal', cmap='RdYlBu_r' #  geo=True, crs=ds.rio.crs
# ).opts(
#     title='Water Fraction Uncertainty'
# )

In [18]:
# import holoviews as hv

# ds['water_area'].hvplot(
#     x='x', y='y', rasterize=True, aspect='equal', clim=(0, 1e4) # , geo=True, # crs=ds.rio.crs
# ).opts(
#     title='Water Area'
# ) + ds['water_area_qual'].hvplot(
#     x='x', y='y', rasterize=True, clim=(0, 3), aspect='equal', cmap='RdYlBu_r' #  geo=True, crs=ds.rio.crs
# ).opts(
#     title='Water Area Quality (0=nominal,1=suspect,2=degraded,3=bad)'
# )

## Classify water for single reservoir

In [19]:
import rioxarray

ds = ds.rio.set_spatial_dims('y', 'x')
ds = ds.rio.write_crs(utm_crs)
ds = ds.rio.clip(buffered_roi.to_crs(ds.rio.crs).geometry.values)
ds

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 17.27 MiB 17.27 MiB Shape (52, 468, 186) (52, 468, 186) Dask graph 1 chunks in 5 graph layers Data type float32 numpy.ndarray",186  468  52,

Unnamed: 0,Array,Chunk
Bytes,17.27 MiB,17.27 MiB
Shape,"(52, 468, 186)","(52, 468, 186)"
Dask graph,1 chunks in 5 graph layers,1 chunks in 5 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 624 B 624 B Shape (52,) (52,) Dask graph 1 chunks in 2 graph layers Data type",52  1,

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [22]:
# from rasterio.crs import CRS
# import rioxarray
# import dask
# import numpy as np
# from tmsswot.data import get_occurrence_like

# hv.extension('bokeh')

# pekel_dir = Path('../data/pekel/occurrence_nc')
# occurrence = get_occurrence_like(buffered_roi, reproject_match=ds['water_frac'], pekel_dir=pekel_dir, stretching_factor=1)
# occurrence

In [23]:
# occurrence.hvplot(
#     x='x', y='y', rasterize=True, aspect='equal', clim=(0, 100), geo=True, # crs=ds.rio.crs
# ).opts(
#     title='Occurrence (%)'
# )

In [24]:
# %load_ext autoreload
# %autoreload 2

In [25]:
def classify_swot_raster_area(
        swot_ds, 
        roi, 
        occurrence, 
        water_frac_threshold=0.7, 
        area_qual_threshold=2, 
        zg_omega=0.3
    ):  
    swot_ds.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
    swot_ds.rio.write_crs(CRS.from_wkt(swot_ds.spatial_ref.attrs['crs_wkt']), inplace=True)

    # water-area
    water_map = (swot_ds['water_frac'].where(swot_ds['water_area_qual'] <= area_qual_threshold) > water_frac_threshold) * 1
    water_map = water_map > water_frac_threshold
    water_map = xr.where(~np.isnan(swot_ds['water_frac']).data, water_map, np.nan)
    water_map = water_map.rio.set_crs(swot_ds.rio.crs)
    water_map.attrs['standard_name'] = "surface_water"
    water_map.attrs['long_name'] = "Surface water of lake/reservoir observed by SWOT"
    water_map = water_map.rename('surface_water')

    # missing data
    missing_mask = xr.DataArray(data=np.isnan(water_map), dims=water_map.dims, coords=water_map.coords, name='missing_mask_area')
    
    # enhance using historical occurrence data
    def zg(water_map_block):
        times = water_map_block.time
        res = xr.zeros_like(water_map_block)

        for i, time in enumerate(times):
            water_map_slice = water_map_block.sel(time=time)
            if missing_fraction.sel(time=time) <= 0.95:
                mask_slice = missing_mask.sel(time=time)
                
                occurrence_slice = xr.where(~mask_slice, occurrence, np.nan)
                occurrence_slice = xr.where(water_map_slice==1, occurrence_slice, 0)
                occurrence_counts, occurrence_values = np.histogram(occurrence_slice.values, bins=99, range=(1, 100))
                count_threshold = np.nanmean(occurrence_counts) * zg_omega
                occurrence_idx = np.nanargmax(np.where(occurrence_counts > count_threshold) if (occurrence_counts > count_threshold).sum() > 0 else [0])
                occurrence_idx = np.nan if occurrence_idx == 0 else occurrence_idx
                
                pekel_estimated_map = (occurrence >= occurrence_idx)
        
                corrected_map = xr.where(
                    missing_mask['missing_mask'].sel(time=time) == 1, 
                    pekel_estimated_map,
                    water_map_slice
                )
                res.data[i] = corrected_map
            else:
                res.data[i] = np.full_like(water_map_slice, 2)
        return res

    water_map = water_map.chunk({'time': 50, 'x': -1, 'y': -1})
    water_map = water_map.transpose('time', 'y', 'x')
    water_map_uncorrected = water_map.copy()

    missing_fraction = missing_mask['missing_mask'].sum(dim=('x', 'y')).values / missing_mask['missing_mask'].size
    missing_fraction = xr.DataArray(
        data=(missing_fraction - missing_fraction.min())/(missing_fraction.max() - missing_fraction.min()), 
        dims=['time'], coords={'time': water_map.time}, name='missing_fraction')
    missing_fraction = missing_fraction.rename('missing_fraction')
    missing_fraction.attrs['standard_name'] = "missing_fraction"
    missing_fraction.attrs['long_name'] = "Fraction of data missing over the lake/reservoir observed by SWOT"
    missing_fraction.attrs['unit'] = "1"

    water_map = water_map.map_blocks(
        zg, template=water_map
    ).rename("surface_water")
    water_map.rio.set_nodata(2, inplace=True)

    water_map_area = xr.where(water_map!=1, np.nan, swot_ds['water_frac']).sum(dim=('x', 'y')) * 1e-6 * 1e4 # km^2
    water_map_area.attrs['standard_name'] = "surface_water_area"
    water_map_area.attrs['long_name'] = "Surface water area of lake/reservoir observed by SWOT"
    water_map_area.attrs['unit'] = "km^2"
    water_map_area = water_map_area.rename('surface_water_area')

    water_missing_fraction = missing_fraction
    water_missing_fraction.attrs['standard_name'] = "missing_fraction"
    water_missing_fraction.attrs['long_name'] = "Fraction of data missing over the lake/reservoir observed by SWOT"
    water_missing_fraction.attrs['unit'] = "1"
    water_missing_fraction = water_missing_fraction.rename('missing_fraction')


    swot_ds = xr.merge([
        water_map, 
        water_map_area, 
        water_missing_fraction, 
    ])
    
    swot_ds['surface_water_area'] = xr.where(missing_fraction > 0.9, np.nan, swot_ds['surface_water_area'])
    
    return swot_ds


def classify_swot_raster_elevation(
        swot_ds, 
        roi, 
        wse_qual_threshold=1, 
    ):
    swot_ds.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
    swot_ds.rio.write_crs(CRS.from_wkt(swot_ds.spatial_ref.attrs['crs_wkt']), inplace=True)

    water_surface_elevation_mask = (
        np.logical_or(np.isnan(swot_ds['wse']), swot_ds['wse_qual'] > wse_qual_threshold)
    ).rename('surface_water_elevation_raster_mask')
    water_surface_elevation_mask.attrs['standard_name'] = "surface_water_elevation_raster_mask"
    water_surface_elevation_mask.attrs['long_name'] = "Mask used to mask out poor quality water surface elevation raster. Includes nodata values in input raster."
    water_surface_elevation_mask.attrs['unit'] = '-'
    water_surface_elevation_mask.attrs['wse_qual_threshold'] = wse_qual_threshold
    
    water_surface_elevation_raster = swot_ds['wse'].where(~water_surface_elevation_mask).rename('surface_water_elevation_raster')
    water_surface_elevation_raster.attrs['standard_name'] = "surface_water_elevation_raster"
    water_surface_elevation_raster.attrs['long_name'] = "Surface water elevation of lake/reservoir observed by SWOT"
    water_surface_elevation_raster.attrs['unit'] = "m"

    water_surface_elevation = water_surface_elevation_raster.median(dim=['x', 'y'])
    water_surface_elevation.attrs['standard_name'] = "surface_water_elevation"
    water_surface_elevation.attrs['long_name'] = "Water Surface Elevation observed by SWOT"
    water_surface_elevation.attrs['unit'] = "m"
    water_surface_elevation = water_surface_elevation.rename('surface_water_elevation')

    quantiles = water_surface_elevation_raster.chunk(-1).quantile([0.25, 0.75], dim=('x', 'y'))
    iqr_filtered = water_surface_elevation_raster.where((water_surface_elevation_raster <= quantiles.sel(quantile=0.75))& (water_surface_elevation_raster >= quantiles.sel(quantile=0.25)))
    elevation_reservoir = iqr_filtered.mean(dim=('x', 'y')).rename('elevation')
    
    swot_ds_classified = xr.merge([
        elevation_reservoir
    ])

    swot_ds_classified['pass_ids'] = swot_ds['pass_ids']

    return swot_ds_classified

In [26]:
# from tmsswot.classify import classify_swot

swot_elev_ds = classify_swot_raster_elevation(
    ds, buffered_roi,
    wse_qual_threshold=1
)

swot_elev_df = swot_elev_ds.to_pandas()[['reservoir', 'elevation', 'pass_ids']].reset_index().dropna()

swot_elev_ds

Unnamed: 0,Array,Chunk
Bytes,208 B,208 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 30 graph layers,1 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 208 B 208 B Shape (52,) (52,) Dask graph 1 chunks in 30 graph layers Data type float32 numpy.ndarray",52  1,

Unnamed: 0,Array,Chunk
Bytes,208 B,208 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 30 graph layers,1 chunks in 30 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,
"Array Chunk Bytes 624 B 624 B Shape (52,) (52,) Dask graph 1 chunks in 2 graph layers Data type",52  1,

Unnamed: 0,Array,Chunk
Bytes,624 B,624 B
Shape,"(52,)","(52,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,,


In [27]:
swot_elev_ds['elevation'].values

array([147.49045, 147.2792 , 150.2148 , 141.16165, 147.38264,       nan,
       147.25995, 146.68028, 142.84464, 145.72514, 146.56715, 142.85492,
       145.41122, 145.45418, 142.63   , 145.75342, 146.95502, 142.0696 ,
       147.70158, 141.67102, 148.97493, 144.78452, 141.17975, 147.44362,
       147.06837, 140.89406, 147.25232, 146.28864, 140.44322,       nan,
             nan, 140.24529, 146.5364 , 143.3751 , 139.77434, 148.05023,
             nan, 149.93729, 151.25888, 138.9217 ,       nan, 154.03151,
       139.26329, 146.61719, 153.98448, 139.16046, 147.55945, 139.84789,
       146.67407, 145.97441,       nan, 147.01631], dtype=float32)

In [28]:
# hv.extension('matplotlib')

In [29]:
[f for f in swot_elev_df['pass_ids']]

['299',
 '299',
 '577',
 '90',
 '299',
 '299',
 '577',
 '90',
 '299',
 '577',
 '90',
 '299',
 '577',
 '90',
 '299',
 '577',
 '90',
 '577',
 '90',
 '299',
 '577',
 '90',
 '299',
 '577',
 '90',
 '299',
 '577',
 '90',
 '90',
 '299',
 '577',
 '90',
 '299',
 '299',
 '577',
 '90',
 '577',
 '90',
 '299',
 '577',
 '90',
 '299',
 '90',
 '299',
 '577',
 '299']

In [31]:
hv.extension('matplotlib')

swot_elev_df.hvplot(kind='scatter', y='pass_ids', x='time', by='pass_ids')

In [32]:
# remove bad pass_ids
swot_elev_df = swot_elev_df[~swot_elev_df["pass_ids"].isin(bad_geom_passes.get(RESERVOIR, []))]

# remove before start of science orbit (july 21, 2023)
swot_elev_df = swot_elev_df[swot_elev_df['time'] > pd.to_datetime('2023-07-21')]

In [34]:
hv.extension('matplotlib')

swot_elev_df.hvplot(
    x='time', y='elevation', by='pass_ids', kind='scatter', grid=True
)

In [32]:
plot_fp = RESULTS_DIR / 'swot_karin' / f'{RESERVOIR}_{ALG_VERSION}_swot_karin_elevation.png'
plot_fp.parent.mkdir(exist_ok=True, parents=True)
print(plot_fp)
hvplot.save(swot_elev_df.hvplot(x='time',y='elevation', kind='scatter', by='pass_ids'), plot_fp)
# hvplot.save(swot_elev_df.hvplot(x='time',y='elevation', kind='scatter', by='pass_ids'), plot_fp.with_suffix('.svg'))

../results/swot_karin/0931_v0.2_swot_karin_elevation.png


The geckodriver version (0.34.0) detected in PATH at /tiger1/pdas47/tmsosPP/.env/bin/geckodriver might not be compatible with the detected firefox version (126.0.1); currently, geckodriver 0.35.0 is recommended for firefox 126.*, so it is advised to delete the driver in PATH and retry


In [33]:
swot_elev_ds = swot_elev_df.set_index('time').to_xarray()


save data.

In [34]:
save_fp = DATA_DIR / 'swot' / 'output' / f'{RESERVOIR}_swot_{ALG_VERSION}.nc'
swot_elev_ds.to_netcdf(save_fp)
print(f'saved at {save_fp}')

saved at ../data/swot/output/0931_swot_v0.2.nc


In [34]:
csv_save_fp = save_fp.with_suffix('.csv')
swot_elev_df.to_csv(csv_save_fp, index=False)
print(f'saved at {csv_save_fp}')

saved at ../data/swot/output/0931_swot_v0.2.csv


##### end of notebook