Notebook for building the TMS-OS++ algorithm.

In [1]:
import geopandas as gpd
from pathlib import Path
import hvplot.pandas
import hvplot.xarray
import holoviews as hv
import pandas as pd
import geoviews as gv
import xarray as xr

hv.extension('bokeh')

# Reservoirs

In [2]:
# read the bounding box of the study area
### subset OF 11 reservoirs
# val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/mini-reservoirs-subset-validation-pts.geojson'))
# val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/mini-reservoirs-subset-validation.geojson'))
### all 100 reservoirs
val_pts = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/subset-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('/tiger1/pdas47/tmsosPP/data/validation-locations/subset-validation-reservoirs-grand.geojson'))

selected_reservoirs = [
    '0078','0079','0193','0197','0214','0217','0340','0365','0484','0486',
    '0498','0503','0505','0507','0508','0523',
    '0524', '0529','0532','0535',
    '0549','0552','0569','0787','0803','0807','0810','0816','0819','0824',
    '0828','0830','0833','0930','0931','0935','0936','1078','1097','1134',
    '1135','1162','1284','1320','1385','1388','1392','1398','1400','1426',
    '1498','0502'
]
res_names = val_pts[['tmsos_id', 'name']].set_index('tmsos_id').to_dict()['name']


val_res_pt = val_pts.loc[val_pts['tmsos_id'].isin(selected_reservoirs)]
val_res_poly = val_polys.loc[val_polys['tmsos_id'].isin(selected_reservoirs)]

global_map = (
    val_res_pt.hvplot(
        geo=True, tiles='OSM'
    )
).opts(
    title=f"Locations of validation reservoirs"
)

global_map

# Read in data

## SWOT

In [3]:
ALG_VERSION = 'v0.2'
RESERVOIR = '0930'

swot_dfs = []
for reservoir_id in selected_reservoirs:
    swot_fp = Path(f'../data/swot/output/{reservoir_id}_swot_{ALG_VERSION}.csv')
    print(f'reading from {swot_fp}')
    
    df_res = pd.read_csv(swot_fp, parse_dates=['time'], dtype={'reservoir': str, 'pass_ids': str})
    
    swot_dfs.append(df_res)
    
swot_df_all = pd.concat(swot_dfs)
swot_df_all.info()

reading from ../data/swot/output/0078_swot_v0.2.csv
reading from ../data/swot/output/0079_swot_v0.2.csv
reading from ../data/swot/output/0193_swot_v0.2.csv
reading from ../data/swot/output/0197_swot_v0.2.csv
reading from ../data/swot/output/0214_swot_v0.2.csv
reading from ../data/swot/output/0217_swot_v0.2.csv
reading from ../data/swot/output/0340_swot_v0.2.csv
reading from ../data/swot/output/0365_swot_v0.2.csv
reading from ../data/swot/output/0484_swot_v0.2.csv
reading from ../data/swot/output/0486_swot_v0.2.csv
reading from ../data/swot/output/0498_swot_v0.2.csv
reading from ../data/swot/output/0503_swot_v0.2.csv
reading from ../data/swot/output/0505_swot_v0.2.csv
reading from ../data/swot/output/0507_swot_v0.2.csv
reading from ../data/swot/output/0508_swot_v0.2.csv
reading from ../data/swot/output/0523_swot_v0.2.csv
reading from ../data/swot/output/0524_swot_v0.2.csv
reading from ../data/swot/output/0529_swot_v0.2.csv
reading from ../data/swot/output/0532_swot_v0.2.csv
reading from

In [4]:
from scipy.stats import zscore

# filter out based on z-score
def z_score_rolling(group):
    elev = group[['elevation', 'time']].set_index('time').sort_index()
    z = elev['elevation'].rolling(window=5, center=True).apply(lambda window: zscore(window, nan_policy='omit').iloc[2])
    return z

z_scores = []
for reservoir, group in swot_df_all.groupby('reservoir'):
    z_score = group['elevation'].rolling(window=3, center=True).apply(lambda w: zscore(w, nan_policy='omit').iloc[1])
    group['z_score'] = z_score
    
    z_scores.append(group)
    
swot_df_all = pd.concat(z_scores)
swot_df_all

Unnamed: 0,time,reservoir,elevation,pass_ids,z_score
0,2023-12-30,0078,1077.484000,447,
1,2024-01-20,0078,1079.801300,447,0.200373
2,2024-02-09,0078,1081.197900,447,-0.157414
3,2024-03-01,0078,1083.266800,447,-0.240118
4,2024-03-22,0078,1087.095700,447,-0.092862
...,...,...,...,...,...
32,2024-06-07,1498,52.345436,259272,1.329753
33,2024-06-17,1498,52.105057,537,-0.465878
34,2024-06-28,1498,52.045770,259272,0.492295
35,2024-07-08,1498,51.772766,537,0.668780


In [5]:
swot_df_all.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1683 entries, 0 to 36
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   time       1683 non-null   datetime64[ns]
 1   reservoir  1683 non-null   object        
 2   elevation  1625 non-null   float64       
 3   pass_ids   1683 non-null   object        
 4   z_score    1438 non-null   float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 78.9+ KB


In [6]:
# swot_df_all.hvplot(x='time', y='elevation', kind='scatter', groupby='reservoir')
swot_df_all.hvplot(x='time', y='elevation', kind='scatter')

In [60]:
data = [(pd.to_datetime("2023-09-21T22:13:00"), pd.to_datetime("2023-09-27T10:18:53"), ),
(pd.to_datetime("2023-10-06T17:13:26"), pd.to_datetime("2023-10-06T17:50:30"), ),
(pd.to_datetime("2023-10-27T14:06:10"), pd.to_datetime("2023-10-27T14:43:12"), ),
(pd.to_datetime("2023-11-13T19:28:23"), pd.to_datetime("2023-11-13T19:50:14"), ),
(pd.to_datetime("2023-11-17T10:54:05"), pd.to_datetime("2023-11-17T11:31:07"), ),
(pd.to_datetime("2023-12-15T21:28:44"), pd.to_datetime("2023-12-15T22:05:47"), ),
(pd.to_datetime("2023-12-23T02:48:25"), pd.to_datetime("2023-12-23T03:25:39"), ),
(pd.to_datetime("2023-12-23T02:49:00"), pd.to_datetime("2023-12-28T15:02:00"), ),
(pd.to_datetime("2023-12-23T09:40:01"), pd.to_datetime("2023-12-23T10:17:15"), ),
(pd.to_datetime("2024-01-12T21:00:52"), pd.to_datetime("2024-01-12T21:37:55"), ),
(pd.to_datetime("2024-01-30T07:27:39"), pd.to_datetime("2024-01-30T07:50:26"), ),
(pd.to_datetime("2024-02-05T13:45:00"), pd.to_datetime("2024-02-05T15:47:00"), ),
(pd.to_datetime("2024-02-09T17:50:40"), pd.to_datetime("2024-02-09T18:27:43"), ),
(pd.to_datetime("2024-03-15T21:10:32"), pd.to_datetime("2024-03-15T21:47:35"), ),
(pd.to_datetime("2024-03-19T14:12:28"), pd.to_datetime("2024-03-19T14:12:28"), ),
(pd.to_datetime("2024-03-21T13:12:11"), pd.to_datetime("2024-03-21T13:12:11"), ),
(pd.to_datetime("2024-04-05T17:40:48"), pd.to_datetime("2024-04-05T18:17:51"), ),
(pd.to_datetime("2024-04-17T02:05:41"), pd.to_datetime("2024-04-17T02:27:40"), ),
(pd.to_datetime("2024-04-19T14:07:09"), pd.to_datetime("2024-04-19T14:07:09"), ),
(pd.to_datetime("2024-05-03T21:14:17"), pd.to_datetime("2024-05-03T21:51:21"), ),
(pd.to_datetime("2024-05-04T12:40:02"), pd.to_datetime("2024-05-04T21:43:01"), ),
(pd.to_datetime("2024-05-24T19:26:13"), pd.to_datetime("2024-05-24T20:03:17"), ),
(pd.to_datetime("2024-06-07T08:21:55"), pd.to_datetime("2024-06-07T08:59:17"), ),
(pd.to_datetime("2024-06-07T17:47:45"), pd.to_datetime("2024-06-07T18:25:06"), ),
(pd.to_datetime("2024-06-29T10:16:38"), pd.to_datetime("2024-06-29T10:38:58"), ),
(pd.to_datetime("2024-07-05T19:02:56"), pd.to_datetime("2024-07-05T19:40:00"), ),
(pd.to_datetime("2024-08-02T20:45:31"), pd.to_datetime("2024-08-02T21:22:34"), ),
(pd.to_datetime("2024-08-23T20:50:00"), pd.to_datetime("2024-08-23T21:27:04"), )]

swot_anomalies_time_df = pd.DataFrame(data, columns=['start', 'end'])
swot_anomalies_time_df['end'] = swot_anomalies_time_df['end'] + pd.Timedelta('24h')


data = [
    (3, 157),
    (417, 418),
    (417, 418),
    (316, 316),
    (418, 418),
    (46, 46),
    (248, 248),
    (248, 402),
    (256, 256),
    (245, 245),
    (149, 149),
    (324, 326),
    (441, 442),
    (256, 257),
    (360, 360),
    (415, 415),
    (256, 257),
    (574, 574),
    (60, 60),
    (460, 461),
    (478, 488),
    (462, 462),
    (257, 257),
    (268, 268),
    (291, 291),
    (469, 469),
    (86, 87),
    (90, 91)
]

swot_anomalies_pass_df = pd.DataFrame(data, columns=['start_pass', 'end_pass'])

swot_anomalies_df = swot_anomalies_time_df.merge(swot_anomalies_pass_df, left_index=True, right_index=True)
swot_anomalies_df

Unnamed: 0,start,end,start_pass,end_pass
0,2023-09-21 22:13:00,2023-09-28 10:18:53,3,157
1,2023-10-06 17:13:26,2023-10-07 17:50:30,417,418
2,2023-10-27 14:06:10,2023-10-28 14:43:12,417,418
3,2023-11-13 19:28:23,2023-11-14 19:50:14,316,316
4,2023-11-17 10:54:05,2023-11-18 11:31:07,418,418
5,2023-12-15 21:28:44,2023-12-16 22:05:47,46,46
6,2023-12-23 02:48:25,2023-12-24 03:25:39,248,248
7,2023-12-23 02:49:00,2023-12-29 15:02:00,248,402
8,2023-12-23 09:40:01,2023-12-24 10:17:15,256,256
9,2024-01-12 21:00:52,2024-01-13 21:37:55,245,245


In [61]:
swot_df_all.head()

Unnamed: 0,time,reservoir,elevation,pass_ids,z_score,sensor,day,month,year,anomaly
0,2023-12-30,78,1077.484,447,,swot_karin,30,12,2023,False
1,2024-01-20,78,1079.8013,447,0.200373,swot_karin,20,1,2024,False
2,2024-02-09,78,1081.1979,447,-0.157414,swot_karin,9,2,2024,False
3,2024-03-01,78,1083.2668,447,-0.240118,swot_karin,1,3,2024,False
4,2024-03-22,78,1087.0957,447,-0.092862,swot_karin,22,3,2024,False


In [83]:
anomalies = []
for idx, row in swot_df_all.iterrows():
    time = row['time']
    pass_ids = [int(pass_id) for pass_id in row['pass_ids'].split(',')]

    # check if the time is within the anomaly time range
    anomaly = swot_anomalies_df[
        (time >= swot_anomalies_df['start']) & (time <= swot_anomalies_df['end'])
    ]
    if not anomaly.empty:
        print(f"Anomaly detected for {row['reservoir']} at {time}")
        anomalies.append(True)
    else:
        anomalies.append(False)

Anomaly detected for 0078 at 2024-03-22 00:00:00
Anomaly detected for 0079 at 2023-12-28 00:00:00
Anomaly detected for 0079 at 2024-03-22 00:00:00
Anomaly detected for 0193 at 2023-12-28 00:00:00
Anomaly detected for 0193 at 2023-12-29 00:00:00
Anomaly detected for 0197 at 2024-04-20 00:00:00
Anomaly detected for 0217 at 2024-01-31 00:00:00
Anomaly detected for 0340 at 2024-05-05 00:00:00
Anomaly detected for 0340 at 2024-07-06 00:00:00
Anomaly detected for 0365 at 2024-02-10 00:00:00
Anomaly detected for 0365 at 2024-05-05 00:00:00
Anomaly detected for 0365 at 2024-07-06 00:00:00
Anomaly detected for 0484 at 2024-04-20 00:00:00
Anomaly detected for 0486 at 2024-03-20 00:00:00
Anomaly detected for 0486 at 2024-04-18 00:00:00
Anomaly detected for 0498 at 2023-11-14 00:00:00
Anomaly detected for 0498 at 2024-02-06 00:00:00
Anomaly detected for 0502 at 2024-02-06 00:00:00
Anomaly detected for 0502 at 2024-04-20 00:00:00
Anomaly detected for 0503 at 2024-03-20 00:00:00
Anomaly detected for

In [85]:
swot_df_all['anomaly'] = anomalies

In [86]:
# create a hv plot of the anomalies as shaded VSpans using the swot_anomalies_df dataframe

swot_issues = hv.VSpans(
    (swot_anomalies_df['start'], swot_anomalies_df['end'])
).opts(
    color='red', alpha=0.5
)
swot_issues

In [87]:
swot_df_all['sensor'] = 'swot_karin'
swot_df_all['day'] = swot_df_all['time'].dt.day
swot_df_all['month'] = swot_df_all['time'].dt.month
swot_df_all['year'] = swot_df_all['time'].dt.year

In [93]:
z_score_threshold = 1.4

swot_df_subset = swot_df_all[(swot_df_all['z_score'] <= z_score_threshold)&(swot_df_all['z_score']>=-z_score_threshold)]

# # filter out anomalies
# swot_df_subset = swot_df_subset[~swot_df_subset['anomaly']]

In [94]:
swot_df_all.hvplot(x='time', y='elevation', kind='scatter', color='z_score', by='reservoir', subplots=True, width=400, height=200, shared_axes=False, xaxis=True).opts(title='Unfiltered swot elevations colored by z-score').cols(3) * swot_issues

In [102]:
(swot_df_subset.hvplot(x='time', y='elevation', kind='scatter', color='pass_ids', by='reservoir', subplots=True, width=400, height=200, shared_axes=False, xaxis=False).opts(title='Unfiltered swot elevations colored by z-score') * swot_df_subset[swot_df_subset['anomaly']].hvplot.scatter(x='time', y='elevation', color='black', size=100, marker='x', by='reservoir', subplots=True, width=400, height=200, shared_axes=False, xaxis=False).opts(title='Anomalies')).cols(3)

In [92]:
swot_df_subset.hvplot(x='time', y='elevation', kind='scatter', color='pass_ids', by='reservoir', subplots=True, width=400, height=200, shared_axes=False, xaxis=False).opts(title='Unfiltered swot elevations colored by z-score').cols(3)

In [11]:
swot_df_all.count()

time         1683
reservoir    1683
elevation    1625
pass_ids     1683
z_score      1438
sensor       1683
day          1683
month        1683
year         1683
dtype: int64

In [12]:
swot_df_subset.count()

time         1317
reservoir    1317
elevation    1317
pass_ids     1317
z_score      1317
sensor       1317
day          1317
month        1317
year         1317
dtype: int64

## Target data

### Storage Change Calculation

In [13]:
aec_df = None
srtm_extrapolated_dir = Path('../data/aec/srtm_extrapolated')
poly_deg = [2, 3]

for poly_deg_candidate in poly_deg:
    aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg_candidate}.csv'
    if aec_fp.exists():
        break

aec_df = pd.read_csv(aec_fp)
aec_df.head(5)

Unnamed: 0,Elevation,CumArea,obs_or_extrapolated
0,685.49,0.0,extrapolated
1,686.51,4.63,extrapolated
2,687.52,20.33,extrapolated
3,688.54,35.88,extrapolated
4,689.56,51.25,extrapolated


In [14]:
aec_df.hvplot(x='Elevation', y='CumArea').opts(height=400, width=500)

In [15]:
import xarray as xr
import hvplot.xarray
import numpy as np

alg_type = 'swot_karin'
alg_version = 'v0.2'
elevation_dir = Path(f'/tiger1/pdas47/tmsosPP/data/swot/output')
elevation_fp = elevation_dir / f'{RESERVOIR}_swot_{alg_version}.csv'
srtm_extrapolated_dir = Path('../data/aec/srtm_extrapolated')

reservoir_dynamics_all = []

for reservoir_id in selected_reservoirs:
    capacity = (val_res_pt.loc[val_res_pt['tmsos_id']==reservoir_id, 'CAP_MCM'] * 1e6).values[0] # m^3
    RESERVOIR_NAME = res_names[reservoir_id]

    aec_df = None
    for poly_deg_candidate in [2, 3]:
        aec_fp = srtm_extrapolated_dir / f'{reservoir_id}_poly_{poly_deg_candidate}.csv'
        if aec_fp.exists():
            break
    
    aec_df = pd.read_csv(aec_fp)
    aec_df.head(5)

    reservoir_dynamics = None
    data = {}
    if alg_type == 'swot_karin':
        # swot_karin_df = pd.read_csv(elevation_fp, parse_dates=['time'])
        # data['elevation'] = swot_karin_df['elevation']
        # data['time'] = swot_karin_df['time']
        data['elevation'] = swot_df_subset.loc[swot_df_subset['reservoir']==reservoir_id, 'elevation']
        data['time'] = swot_df_subset.loc[swot_df_subset['reservoir']==reservoir_id, 'time']
        area = np.interp(data['elevation'], aec_df['Elevation'], aec_df['CumArea'])
        data['area'] = area
    
        reservoir_dynamics = pd.DataFrame(data).set_index('time').to_xarray()
    
    if 'storage_change' not in list(reservoir_dynamics.variables):
        del_h = reservoir_dynamics['elevation'].diff(dim='time')
        avg_A = (reservoir_dynamics['area'].isel(time=slice(0, -1)) + reservoir_dynamics['area'].isel(time=slice(1, None)))/2
        del_s = xr.DataArray(0.5 * avg_A * del_h * 1e6, name='storage_change')
        reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)
    
    if 'storage_change_rate' not in list(reservoir_dynamics.variables):
        del_s = reservoir_dynamics['storage_change']
        del_t = reservoir_dynamics['time'].diff(dim='time').dt.days
        reservoir_dynamics = reservoir_dynamics.assign(del_t=del_s)
        reservoir_dynamics = reservoir_dynamics.assign(storage_change_rate=del_s)

    reservoir_dynamics_df = reservoir_dynamics.to_pandas()
    reservoir_dynamics_df['reservoir'] = reservoir_id
    reservoir_dynamics_all.append(reservoir_dynamics_df)

reservoir_dynamics_all = pd.concat(reservoir_dynamics_all)

In [16]:
print(capacity)

9621000000.0


In [17]:
print(capacity)
hv.HLine(0).opts(color='gray') * reservoir_dynamics_df[reservoir_dynamics_df['reservoir']==RESERVOIR].hvplot.scatter(x='time', y='storage_change').opts(
    title=f'{RESERVOIR}: {RESERVOIR_NAME}'
) * hv.HLine(y=capacity).opts(title=f'{capacity}')

9621000000.0


In [18]:
storage_change = reservoir_dynamics['storage_change']
hv.HLine(0).opts(color='gray') * (storage_change*100/capacity).hvplot(x='time', y='storage_change', kind='scatter').opts(ylim=(-10,10), ylabel='∆s/capacity (%)', title=f'{RESERVOIR_NAME} ({RESERVOIR}): Storage change as a percentage of Capacity')

In [19]:
reservoir_dynamics['storage_change_rate']

## add swot nadir data

In [20]:
# RESERVOIR = '1284'
ALG_VERSION = '0.1'

swot_nadir_dfs = []
for reservoir in selected_reservoirs:
    RESERVOIR_NAME = res_names[RESERVOIR]
    swot_fp = Path(f'../data/elevation/swot_nadir/v{ALG_VERSION}/{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_swot_nadir_elevation.csv')
    print(f'reading from {swot_fp}')
    
    if swot_fp.exists():
        swot_df = pd.read_csv(swot_fp, parse_dates=['time']).groupby('time').median().reset_index()
        swot_nadir_dfs.append(swot_df)
    else:
        print(f"{swot_fp} does not exist")
        swot_df = None
    
swot_df

reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv does not exist
reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv does not exist
reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv does not exist
reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv does not exist
reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv does not exist
reading from ../data/elevation/swot_nadir/v0.1/0930_Kinbasket_swot_nadir_elevation.csv
../data/elevation/swot_nadir/v0.1

In [21]:
# swot_df.hvplot(x='time', y='elevation', kind='scatter', by='pass_ids')

## add all columns to reservoir

Satellite, Area, Elevation, Storage Change

In [22]:
source = 'swot_nadir'
poly_deg = 2
storage_change_calc_method = 'area' 

import xarray as xr
import numpy as np

srtm_extrapolated_dir = Path('/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated')

aec_fp = srtm_extrapolated_dir / f'{RESERVOIR}_poly_{poly_deg}.csv'
print(aec_fp)

if aec_fp.exists():
    aec_df = pd.read_csv(aec_fp)

# determine storage change calculation method if not provided
if storage_change_calc_method is None:
    db = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['db'].values
    if db == 'deltares':
        storage_change_calc_method = 'area'
    elif db == 'resops':
        storage_change_calc_method = 'storage' # or 'area' 
    elif db == 'rid':
        storage_change_calc_method = 'storage'

db = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['db'].values
if storage_change_calc_method is None:
    db = val_res_poly[val_res_poly['tmsos_id'] == RESERVOIR]['db'].values
    if db == 'deltares':
        storage_change_calc_method = 'area'
    elif db == 'resops':
        storage_change_calc_method = 'storage' # or 'area' 
    elif db == 'rid':
        storage_change_calc_method = 'storage'

swot_ds_res = swot_df.set_index(['time']).to_xarray()
swot_ds_res = swot_ds_res.where(~np.isnan(swot_ds_res['elevation']), drop=True)
# # swot_ds_res = swot_ds_res

reservoir_dynamics_swot_nalt = swot_ds_res.rename({'time': 'date'})
reservoir_dynamics_swot_nalt['elevation'].attrs['source'] = 'swot nadir altimeter'
reservoir_dynamics_swot_nalt['elevation'].attrs['obs_or_calc'] = 'obs'
reservoir_dynamics_swot_nalt['elevation'].attrs['unit'] = 'm'

# def impute_missing_values(reservoir_dynamics):
#     # calculate elevation if it is not present
#     if np.isnan(reservoir_dynamics['elevation']).sum() > 0:
#         # elevation needs to be calculated
#         elevation = np.interp(reservoir_dynamics['area'], aec_df['CumArea'], aec_df['Elevation'])
#         elevation_da = xr.DataArray(data=elevation, coords=reservoir_dynamics.coords, name='elevation')
#         elevation_da = elevation_da.where(np.isnan(reservoir_dynamics['elevation']))
#         elevation_da.attrs['source'] = source
#         elevation_da.attrs['obs_or_calc'] = 'calc'
#         elevation_da.attrs['unit'] = 'm'
#         reservoir_dynamics = reservoir_dynamics.assign(elevation = elevation_da)
#     else: # elevation is present, just add metadata
#         reservoir_dynamics['elevation'].attrs['source'] = source
#         reservoir_dynamics['elevation'].attrs['obs_or_calc'] = 'obs'
#         reservoir_dynamics['elevation'].attrs['unit'] = 'm'

#     # calculate area if it is not present
#     if 'area' not in list(reservoir_dynamics.variables) or np.isnan(reservoir_dynamics['area']).sum() > 0:
#         # area needs to be calculated
#         area = np.interp(reservoir_dynamics['elevation'], aec_df['Elevation'], aec_df['CumArea'])
#         area_da = xr.DataArray(data=area, coords=reservoir_dynamics.coords, name='area')
#         area_da.attrs['source'] = source
#         area_da.attrs['obs_or_calc'] = 'calc'
#         area_da.attrs['unit'] = 'km^2'
#         reservoir_dynamics = reservoir_dynamics.assign(area = area_da)
#     else:
#         reservoir_dynamics['area'].attrs['source'] = source
#         reservoir_dynamics['area'].attrs['obs_or_calc'] = 'obs'
#         reservoir_dynamics['area'].attrs['unit'] = 'km^2'

#     # calculate storage change
#     if 'storage_change' not in list(reservoir_dynamics.variables):
#         if storage_change_calc_method == 'area':
#             A0 = reservoir_dynamics['area'].isel(date=slice(0, -1))
#             A1 = reservoir_dynamics['area'].isel(date=slice(1, None))

#             h0 = reservoir_dynamics['elevation'].isel(date=slice(0, -1))
#             h1 = reservoir_dynamics['elevation'].isel(date=slice(1, None))

#             # t0 = reservoir_dynamics['date'].isel(date=slice(0, -1))
#             # t1 = reservoir_dynamics['date'].isel(date=slice(1, None))

#             del_s_values = 1e6 * (h1.values - h0.values)*(A1.values + A0.values)/2
#             del_s_values = np.insert(del_s_values, 0, np.nan)
#             del_s = xr.DataArray(del_s_values, name='storage_change', coords=reservoir_dynamics.coords)
#             del_s.attrs['unit'] = 'm^3'
#             reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)

#             del_t = reservoir_dynamics['date'].diff(dim='date').dt.days.astype(float)
#             del_s_daily = del_s / del_t
#             del_s_daily.attrs['unit'] = 'm^3/day'
#             reservoir_dynamics = reservoir_dynamics.assign(storage_change_daily=del_s_daily)
#             reservoir_dynamics = reservoir_dynamics.assign(delta_t=del_t)
#         elif storage_change_calc_method == 'storage':
#             S = reservoir_dynamics['storage'].diff(dim='date')
#             S = np.insert(S, 0, np.nan)
#             del_s = xr.DataArray(S, name='storage_change', coords=reservoir_dynamics.coords)
#             del_s.attrs['unit'] = 'm^3'
#             reservoir_dynamics = reservoir_dynamics.assign(storage_change=del_s)

#             del_t = reservoir_dynamics['date'].diff(dim='date').dt.days
#             del_s_daily = del_s / del_t
#             del_s_daily.attrs['unit'] = 'm^3/day'
#             reservoir_dynamics = reservoir_dynamics.assign(storage_change_daily=del_s_daily)
#             reservoir_dynamics = reservoir_dynamics.assign(delta_t=del_t)
#     else:
#         reservoir_dynamics['storage_change'].attrs['unit'] = 'm^3'
#         reservoir_dynamics['storage_change_daily'].attrs['unit'] = 'm^3/day'
#         reservoir_dynamics['delta_t'].attrs['unit'] = 'days'

#     return reservoir_dynamics

# reservoir_dynamics_swot_nalt = impute_missing_values(reservoir_dynamics_swot_nalt)
# reservoir_dynamics_swot_nalt = reservoir_dynamics_swot_nalt.assign(
#     source = xr.DataArray(
#         data=[source]*len(reservoir_dynamics_swot_nalt['date']),
#         dims=['date'],
#         coords={'date': reservoir_dynamics_swot_nalt['date']}
#     )
# )
# swot_df = reservoir_dynamics_swot_nalt.to_pandas()
# reservoir_dynamics_swot_nalt

# reservoir_dynamics_swot_nalt

/tiger1/pdas47/tmsosPP/data/aec/srtm_extrapolated/0930_poly_2.csv


AttributeError: 'NoneType' object has no attribute 'set_index'

In [None]:
(
    reservoir_dynamics_swot_nalt.hvplot(x='date', y='area', kind='scatter') \
    + reservoir_dynamics_swot_nalt.hvplot(x='date', y='elevation', kind='scatter') \
    # + reservoir_dynamics_swot_nalt.hvplot(x='date', y='storage', kind='scatter') \
    + reservoir_dynamics_swot_nalt.hvplot(x='date', y='storage_change', kind='scatter') \
    + reservoir_dynamics_swot_nalt.hvplot(x='date', y='storage_change', color='delta_t', kind='scatter', cmap='viridis')
).cols(1)

## add sentinel-6 data

In [None]:
# RESERVOIR = '1284'
RESERVOIR_NAME = res_names[RESERVOIR]
ALG_VERSION = '0.1'
s6a_fp = Path(f'../data/elevation/sentinel6a/v{ALG_VERSION}/{RESERVOIR}_{RESERVOIR_NAME.split(",")[0].replace(" ", "_")}_sentinel6a.csv')
print(f'reading from {s6a_fp}')

if s6a_fp.exists():
    s6a_df = pd.read_csv(s6a_fp, parse_dates=['date']).set_index('date')
    s6a_df['source'] = 'sentinel6a'
else:
    print(f"{s6a_fp} does not exist")
    s6a_df = None

s6a_df

In [None]:
s6a_df.hvplot(x='date', y='elevation', kind='scatter', by='pass_num')

## combined data

In [None]:
reservoir_dynamics_swot_nalt

In [None]:
reservoir_dynamics_swot

In [None]:
swot_df

In [None]:
merged = pd.concat([swot_df, swot_karin, s6a_df])
merged

In [None]:
merged.hvplot(x='date', y='elevation', by='source', kind='scatter') \
* merged[['elevation', 'uncertainty', 'source']].dropna().hvplot(y='elevation', yerr1='uncertainty', yerr2='uncertainty', by='source', kind='errorbars')

In [None]:
# target variable: elevation from altimeters

target = merged['elevation']
target.head()

## create input data
#### features: elevation (uncorrected) from Landsat-8, Sentinel-2 (HLS)

## read in TMS-OS data

In [None]:
RESERVOIR = '1284'
swot_fp = Path(f'../data/tmsos/sar/{RESERVOIR}_12d_sar.csv')
print(f'reading from {swot_fp}')

tmsos_df = pd.read_csv(swot_fp, parse_dates=['time'])
tmsos_df

## choose model

In [None]:
merged[['elevation']]