In [1]:
import pandas as pd
import hvplot.pandas
from pathlib import Path

In [2]:
reservoir = '0502'  # select a reservoir
selected_reservoirs = [
    # '0505', # dumboor. India
    '0810', # sirindhorn, Thailand.
    '0830', # Krasoew, Thailand.
    '0502', # Bhakra dam, India.
    # '0518', # Bhadra, India.
    # '0349', # vaaldam, South Africa.
    '0464', # Sterkspruit, South Africa.
    # '0214', # Cijara, Spain
    # '1498', # Toledo bend, US
    # '0936', # Arrow, Canada
]
res_names = {
    '0505': 'Dumboor',
    '0810': 'Sirindhorn',
    '0830': 'Krasoew',
    '0502': 'Bhakra',
    '0518': 'Bhadra',
    '0349': 'Vaaldam',
    '0464': 'Sterkspruit',
    '0214': 'Cijara',
    '1498': 'Toledo',
    '0936': 'Arrow',
}

In [3]:
version = '0.1'
area_dir = Path(f'../data/area/hls_corrected/v{version}/')
area_fns = list(area_dir.glob("*.csv"))
area_dfs = [
    pd.read_csv(fn) for fn in area_fns
]
area_dfs

[                        time  hls area [km2]     platform  reservoir
 0    2019-01-02 05:35:04.740       81.429300  Sentinel-2B        518
 1    2019-01-07 05:35:02.000       83.231100  Sentinel-2A        518
 2    2019-01-12 05:35:06.170       87.795906  Sentinel-2B        518
 3    2019-01-15 05:16:29.450       82.386000    Landsat-8        518
 4    2019-01-17 05:35:03.170       88.654495  Sentinel-2A        518
 ..                       ...             ...          ...        ...
 455  2023-12-17 05:35:06.658             NaN  Sentinel-2B        518
 456  2023-12-20 05:16:59.614             NaN    Landsat-9        518
 457  2023-12-22 05:35:03.249             NaN  Sentinel-2A        518
 458  2023-12-27 05:35:08.464      103.121100  Sentinel-2B        518
 459  2023-12-28 05:16:50.141             NaN    Landsat-8        518
 
 [460 rows x 4 columns],
                         time  hls area [km2]     platform  reservoir
 0    2019-01-01 03:33:56.660       196.66080  Sentinel-2A     

In [4]:
reservoir_ids = [area_fn.name.split('.')[0] for area_fn in area_fns]
reservoir_ids

['0518', '0810', '0505', '0464', '0502', '0830']

In [5]:
for reservoir_id in reservoir_ids:
    area_df = pd.read_csv(area_dir / f"{reservoir_id}.csv")
    area_df['date'] = pd.to_datetime(area_df['time'])
    area_df = area_df.set_index('date')

    


In [6]:
import geopandas as gpd
from pathlib import Path

# read the bounding box of the study area
val_pts = gpd.read_file(Path('../data/validation-locations/subset-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('../data/validation-locations/subset-validation-reservoirs-grand.geojson'))

idx = val_polys['tmsos_id'].isin(selected_reservoirs)
subset = val_polys[idx]
subset

Unnamed: 0,GRAND_ID_left,RES_NAME_left,DAM_NAME_left,ALT_NAME_left,RIVER_left,ALT_RIVER_left,MAIN_BASIN_left,SUB_BASIN_left,NEAR_CITY_left,ALT_CITY_left,...,db,name,rid_id,grand_id,rid_filepath,resops_id,rid_filename,tmsos_id,distance,geometry
52,4589,,Sterkspruit,,Crocodile,,Indian Ocean Coast,Incomati,Bosoord,,...,deltares,,,,,,,464,,"POLYGON ((30.38665 -25.32238, 30.38694 -25.324..."
60,4793,Govind,Bhakra Dam,Gobind Sagar,Sutluj,,Indus,,Nangal Township,,...,deltares,,,,,,,502,0.005405,"POLYGON ((76.39041 31.54315, 76.39202 31.54278..."
92,5158,,Krasoew,,Tha Chin,,,,,,...,rid,Krasoew,100303.0,5158.0,mekong_insitu/100303-Krasoew_Dam.csv,,100303-Krasoew_Dam.csv,830,,"POLYGON ((99.63094 14.92288, 99.63222 14.92227..."
97,5796,Noi,Sirindhorn,,Lam Dom Noi,,Mekong,,,,...,rid,Sirindhorn,200212.0,5796.0,mekong_insitu/200212-Sirindhorn_Dam.csv,,200212-Sirindhorn_Dam.csv,810,,"POLYGON ((105.37056 14.95278, 105.37065 14.952..."


In [7]:
subset.hvplot(geo=True, tiles='OSM').opts(width=600)



In [8]:
def get_insitu_df(tmsos_id):
    idx = val_polys['tmsos_id'].isin(selected_reservoirs)
    subset = val_polys[idx]

    row = subset[subset['tmsos_id']==tmsos_id]
    db = row['db'].values
    
    insitu_df = None

    if db == 'deltares':
        deltares_id = row['deltares_id'].values
        insitu_dir = Path('../data/insitu/deltares/')
        fn = insitu_dir / f'{int(deltares_id):07}.csv'
        print(fn, fn.exists())
        
        insitu_df = pd.read_csv(fn, parse_dates=['time']).rename({'area': 'area [km2]'}, axis=1)
        insitu_df.sort_values('time', inplace=True)
        insitu_df['area [km2]'] = insitu_df['area [km2]'] * 1e-6
    if db == 'rid':
        rid_id = row['rid_id'].values
        name = row['name'].values[0]
        insitu_dir = Path('../data/insitu/rid')
        fn = insitu_dir / f'{int(rid_id)}-{name}_Dam.csv'
        print(fn, fn.exists())
        insitu_df = pd.read_csv(fn, parse_dates=['date']).rename({'storage (mil. m3)': 'area [km2]'}, axis=1)
        insitu_df['area [km2]'] = insitu_df['area [km2]'] * 1e-6
        
    return insitu_df

insitu_dfs = []

perf_dfs = []


# for reservoir in reservoir_ids:
insitu_df = get_insitu_df(reservoir)
if 'time' in insitu_df.columns:
    insitu_df.rename({'time': 'date'}, axis=1, inplace=True)
insitu_df['date'] =  pd.to_datetime(insitu_df['date'].dt.date)
insitu_df.set_index('date', inplace=True)
insitu_dfs.append(insitu_df)

sat_fn = Path(f'{area_dir}/{reservoir}.csv')
sat_df = pd.read_csv(sat_fn, parse_dates=['time'], dtype={'reservoir': str})
sat_df['date'] = pd.to_datetime(sat_df['time'].dt.date)
sat_df.set_index('date', inplace=True)


test_df = sat_df.join(insitu_df, how='left', rsuffix='_insitu')
test_df.rename({
    'area [km2]': 'insitu area [km2]',
}, axis=1, inplace=True)
test_df.dropna(subset=['hls area [km2]'], inplace=True)
test_df.reset_index(inplace=True)

test_df
# import HydroErr as he

# metrics = [
#     'ME', 'MAE', 'NRMSE mean', 'NRMSE range', 'R^2', 'Pearson r', 'NSE', 'KGE 2012',
# ]

# metrics_fn = [
#     he.me, he.mae, he.nrmse_mean, he.nrmse_range, he.r_squared, he.pearson_r, he.nse, he.kge_2012, 
# ]

# metric_values = []

# for metric_name, metric_fn in zip(metrics, metrics_fn):
#     metric_value = metric_fn(test_df['hls area [km2]'], test_df['insitu area [km2]'])
#     metric_values.append(metric_value)

# perf_df = pd.DataFrame({metric_name: [metric_value] for metric_name, metric_value in zip(metrics, metric_values)})
# perf_df['reservoir'] = reservoir
# perf_df['senesor'] = 'hls'
# perf_df['algorithm'] = 'tms-swot-v0.1.0'
# # perf_gdf = gpd.GeoDataFrame(perf_df, geometry=val_polys[val_polys['tmsos_id']==reservoir].geometry)

# perf_dfs.append(perf_df)

# combined_perf_df = pd.concat(perf_dfs)
# combined_perf_df

../data/insitu/deltares/0089238.csv True


  fn = insitu_dir / f'{int(deltares_id):07}.csv'


Unnamed: 0,date,time,hls area [km2],platform,reservoir,insitu area [km2]
0,2019-01-03,2019-01-03 05:49:58.340,138.421800,Sentinel-2A,0502,124.808186
1,2019-01-08,2019-01-08 05:50:02.660,99.673200,Sentinel-2B,0502,
2,2019-01-13,2019-01-13 05:23:40.856,119.252700,Landsat-8,0502,122.483863
3,2019-01-13,2019-01-13 05:23:40.856,119.252700,Landsat-8,0502,121.820221
4,2019-01-13,2019-01-13 05:49:59.750,107.073906,Sentinel-2A,0502,122.483863
...,...,...,...,...,...,...
728,2024-03-22,2024-03-22 05:29:44.912,116.255700,Landsat-8,0502,
729,2024-03-22,2024-03-22 05:50:04.285,92.379600,Sentinel-2B,0502,
730,2024-03-23,2024-03-23 05:23:57.508,91.269905,Landsat-9,0502,
731,2024-03-27,2024-03-27 05:50:01.504,137.802600,Sentinel-2A,0502,


In [9]:
sat_df

Unnamed: 0_level_0,time,hls area [km2],platform,reservoir
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-03,2019-01-03 05:49:58.340,138.421800,Sentinel-2A,0502
2019-01-04,2019-01-04 05:29:53.138,,Landsat-8,0502
2019-01-05,2019-01-05 05:40:21.190,,Sentinel-2B,0502
2019-01-08,2019-01-08 05:50:02.660,99.673200,Sentinel-2B,0502
2019-01-13,2019-01-13 05:23:40.856,119.252700,Landsat-8,0502
...,...,...,...,...
2024-03-22,2024-03-22 05:50:04.285,92.379600,Sentinel-2B,0502
2024-03-23,2024-03-23 05:23:57.508,91.269905,Landsat-9,0502
2024-03-27,2024-03-27 05:50:01.504,137.802600,Sentinel-2A,0502
2024-03-29,2024-03-29 05:40:08.258,119.106000,Sentinel-2B,0502


In [10]:
import altair as alt

chart = alt.Chart(test_df).mark_bar().encode(
    y=r'hls area \[km2\]:Q',
    x='count()'
    # y='hls area [km2]',
    # y2='water_level (m)'
)

chart

In [11]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 733 entries, 0 to 732
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   date               733 non-null    datetime64[ns]
 1   time               733 non-null    datetime64[ns]
 2   hls area [km2]     733 non-null    float64       
 3   platform           733 non-null    object        
 4   reservoir          733 non-null    object        
 5   insitu area [km2]  236 non-null    float64       
dtypes: datetime64[ns](2), float64(2), object(2)
memory usage: 34.5+ KB


In [12]:
insitu_df

Unnamed: 0_level_0,area [km2]
date,Unnamed: 1_level_1
1989-08-06,117.247861
1989-12-12,118.320195
1989-12-28,117.301984
1990-01-13,112.828042
1990-02-14,104.032752
...,...
2021-09-14,115.937843
2021-09-15,114.781232
2021-09-19,119.612063
2021-09-24,118.840905


In [18]:
insitu_df['2019':'2024'].reset_index()

Unnamed: 0,date,area [km2],In-situ data
0,2019-01-03,124.808186,Bhakra
1,2019-01-13,122.483863,Bhakra
2,2019-01-13,121.820221,Bhakra
3,2019-01-15,142.096138,Bhakra
4,2019-01-23,121.384741,Bhakra
...,...,...,...
326,2021-09-14,115.937843,Bhakra
327,2021-09-15,114.781232,Bhakra
328,2021-09-19,119.612063,Bhakra
329,2021-09-24,118.840905,Bhakra


In [28]:
sat_df.rename({
    'platform': 'Platform'
}, axis=1, inplace=True)
sat_chart = alt.Chart(sat_df.reset_index()).mark_point(filled=True, size=50).encode(
    x=alt.X(r'date:T', axis=alt.Axis(format="%Y %B %d", labelAngle=15)),
    y=alt.Y(r'hls area \[km2\]:Q'),
    color=alt.Color(r'Platform:O').scale(scheme='category10')
).properties(
    width=600
)

# insitu_df.rename({
#     'water_level (m)': 'Water Level (m)'
# }, axis=1, inplace=True)
insitu_df['In-situ data'] = f'{res_names[reservoir]}'
insitu_chart = alt.Chart(insitu_df['2019':'2024'].reset_index()).mark_point(filled=True).encode(
    x=alt.X(r'date:T'), y=alt.Y(r'area \[km2\]:Q'), 
    shape=alt.Shape('In-situ data:N').scale(range=['diamond']), 
    color=alt.Color('In-situ data:N').scale(range=['black']), 
).properties(
    width=600,
    title=f'{res_names[reservoir]} HLS cloud corrected area vs. in-situ water level',
)
# insitu_df.rename({
#     'water_level (m)': 'Water Level (m)'
# }, axis=1, inplace=True)
# insitu_df['In-situ data'] = f'{res_names[reservoir]}'
# insitu_chart = alt.Chart(insitu_df['2019':'2024'].reset_index()).mark_point(filled=True).encode(
#     x=alt.X(r'date:T'), y=alt.Y('Water Level (m):Q').scale(domain=[70, 90], clamp=True), 
#     shape=alt.Shape('In-situ data:N').scale(range=['diamond']), 
#     color=alt.Color('In-situ data:N').scale(range=['black']), 
# ).properties(
#     width=600,
#     title=f'{res_names[reservoir]} HLS cloud corrected area vs. in-situ water level',
# )

alt.layer(insitu_chart, sat_chart).resolve_scale(y='independent', color='independent').resolve_legend(color='independent', shape='independent')

In [128]:
# alt.Chart(test_df).mark_point(clip=True).encode(
#     x=alt.X(r'hls area \[km2\]:Q'), 
#     y=alt.Y(r'water_level (m):Q')
# )

In [129]:
# from datetime import datetime

# result_dir = Path('../data/results')
# result_dir.mkdir(exist_ok=True)

# d = datetime.today().strftime('%Y%m%d_%H%M%S')
# save_dir = result_dir / f'{d}'
# save_dir.mkdir(exist_ok=False)

# combined_perf_df.to_csv(save_dir / 'performance.csv', index=False)

In [14]:
sat_df

Unnamed: 0_level_0,time,hls area [km2],Platform,reservoir
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-03,2019-01-03 05:49:58.340,138.421800,Sentinel-2A,0502
2019-01-04,2019-01-04 05:29:53.138,,Landsat-8,0502
2019-01-05,2019-01-05 05:40:21.190,,Sentinel-2B,0502
2019-01-08,2019-01-08 05:50:02.660,99.673200,Sentinel-2B,0502
2019-01-13,2019-01-13 05:23:40.856,119.252700,Landsat-8,0502
...,...,...,...,...
2024-03-22,2024-03-22 05:50:04.285,92.379600,Sentinel-2B,0502
2024-03-23,2024-03-23 05:23:57.508,91.269905,Landsat-9,0502
2024-03-27,2024-03-27 05:50:01.504,137.802600,Sentinel-2A,0502
2024-03-29,2024-03-29 05:40:08.258,119.106000,Sentinel-2B,0502


In [15]:
test_df = sat_df.join(insitu_df, how='left', rsuffix='_insitu')
test_df.rename({
    'area [km2]': 'insitu area [km2]',
}, axis=1, inplace=True)
test_df

Unnamed: 0_level_0,time,hls area [km2],Platform,reservoir,insitu area [km2],In-situ data
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-01-03,2019-01-03 05:49:58.340,138.421800,Sentinel-2A,0502,124.808186,Bhakra
2019-01-04,2019-01-04 05:29:53.138,,Landsat-8,0502,,
2019-01-05,2019-01-05 05:40:21.190,,Sentinel-2B,0502,,
2019-01-08,2019-01-08 05:50:02.660,99.673200,Sentinel-2B,0502,,
2019-01-13,2019-01-13 05:23:40.856,119.252700,Landsat-8,0502,122.483863,Bhakra
...,...,...,...,...,...,...
2024-03-22,2024-03-22 05:50:04.285,92.379600,Sentinel-2B,0502,,
2024-03-23,2024-03-23 05:23:57.508,91.269905,Landsat-9,0502,,
2024-03-27,2024-03-27 05:50:01.504,137.802600,Sentinel-2A,0502,,
2024-03-29,2024-03-29 05:40:08.258,119.106000,Sentinel-2B,0502,,


In [16]:
test_df[['platform', 'reservoir', 'hls area [km2]', 'insitu area [km2]']].hvplot.scatter(
    x='date', y=['hls area [km2]', 'insitu area [km2]'], width=800, height=400,
).opts(ylabel='Area (km2)', title='Area comparison between HLS and in-situ data')

KeyError: "['platform'] not in index"

In [133]:
import HydroErr as he


metrics = [
    'ME', 'MAE', 'NRMSE mean', 'NRMSE range', 'R^2', 'Pearson r', 'NSE', 'KGE 2012',
]

metrics_fn = [
    he.me, he.mae, he.nrmse_mean, he.nrmse_range, he.r_squared, he.pearson_r, he.nse, he.kge_2012, 
]

# results = pd.DataFrame({
#     ''
# })
metric_values = []

for metric_name, metric_fn in zip(metrics, metrics_fn):
    metric_value = metric_fn(test_df['hls area [km2]'], test_df['insitu area [km2]'])
    metric_values.append(metric_value)

perf_df = pd.DataFrame({metric_name: [metric_value] for metric_name, metric_value in zip(metrics, metric_values)})
perf_df['reservoir'] = reservoir
perf_df['senesor'] = 'hls'
perf_df['algorithm'] = 'tms-swot-v0.1.0'

perf_df

  109  178  210  221  230  312  313  314  321  331  340  341  346  347
  348  349  362  372  383  384  385  392  396  402  413  414  415  416
  431  432  436  445  446  447  451  457  458  462  463  471  546  547
  603  649  656  669  673  683  684  718  722  723  727  728  729  730
  731  732  736  737  740  741  742  752  753  754  755  756  757  758
  772  773  774  786  787  794  795  796  797  798  799  803  804  805
  806  835  836  848  861  875  910  917 1007 1068 1078 1079 1089 1093
 1096 1097 1101 1102 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
 1213 

Unnamed: 0,ME,MAE,NRMSE mean,NRMSE range,R^2,Pearson r,NSE,KGE 2012,reservoir,senesor,algorithm
0,0.292825,0.412752,0.121494,0.141262,0.797594,0.893081,0.781281,0.840897,464,hls,tms-swot-v0.1.0


In [134]:
from datetime import datetime

result_dir = Path('../data/results')
result_dir.mkdir(exist_ok=True)

d = datetime.today().strftime('%Y%m%d_%H%M%S')
save_dir = result_dir / f'{d}'
save_dir.mkdir(exist_ok=False)

# perf_df.to_csv(save_dir / 'performance.csv', index=False)

In [135]:
res_id = '0214'

import geopandas as gpd
from pathlib import Path

# read the bounding box of the study area
val_pts = gpd.read_file(Path('../data/validation-locations/subset-validation-reservoirs-grand-pts.geojson'))
val_polys = gpd.read_file(Path('../data/validation-locations/subset-validation-reservoirs-grand.geojson'))

dumboor_pt = val_pts.loc[val_pts['tmsos_id']==res_id]
dumboor_poly = val_polys.loc[val_polys['tmsos_id']==res_id]

dumboor_poly.hvplot(geo=True, tiles='OSM', alpha=0.5) * dumboor_pt.hvplot(geo=True, color='red', size=200, alpha=0.5)

In [136]:
sat_area_fp = Path(f'../data/area/hls_corrected/{res_id}.csv')
sat_area = pd.read_csv(sat_area_fp, dtype={'reservoir': str}, parse_dates=['time'])
sat_area.hvplot(
    x='time', y='hls area [km2]', kind='scatter', c='platform'
)

FileNotFoundError: [Errno 2] No such file or directory: '../data/area/hls_corrected/0214.csv'

In [None]:
dumboor_pt.columns

Index(['GRAND_ID', 'RES_NAME', 'DAM_NAME', 'ALT_NAME', 'RIVER', 'ALT_RIVER',
       'MAIN_BASIN', 'SUB_BASIN', 'NEAR_CITY', 'ALT_CITY', 'ADMIN_UNIT',
       'SEC_ADMIN', 'COUNTRY', 'SEC_CNTRY', 'YEAR', 'ALT_YEAR', 'REM_YEAR',
       'DAM_HGT_M', 'ALT_HGT_M', 'DAM_LEN_M', 'ALT_LEN_M', 'AREA_SKM',
       'AREA_POLY', 'AREA_REP', 'AREA_MAX', 'AREA_MIN', 'CAP_MCM', 'CAP_MAX',
       'CAP_REP', 'CAP_MIN', 'DEPTH_M', 'DIS_AVG_LS', 'DOR_PC', 'ELEV_MASL',
       'CATCH_SKM', 'CATCH_REP', 'DATA_INFO', 'USE_IRRI', 'USE_ELEC',
       'USE_SUPP', 'USE_FCON', 'USE_RECR', 'USE_NAVI', 'USE_FISH', 'USE_PCON',
       'USE_LIVE', 'USE_OTHR', 'MAIN_USE', 'LAKE_CTRL', 'MULTI_DAMS',
       'TIMELINE', 'COMMENTS', 'URL', 'QUALITY', 'EDITOR', 'LONG_DD', 'LAT_DD',
       'POLY_SRC', 'index_right', 'deltares_id', 'deltares_filename', 'db',
       'name', 'rid_id', 'grand_id', 'rid_filepath', 'resops_id',
       'rid_filename', 'tmsos_id', 'distance', 'geometry'],
      dtype='object')

In [24]:
deltares_id = int(dumboor_pt['deltares_id'].values[0])
deltares_name = f'{deltares_id:07}.csv'
deltares_fp = Path(f'../data/insitu/deltares/') / deltares_name
print(deltares_fp)

insitu_df = pd.read_csv(deltares_fp, parse_dates=['time'])
insitu_df['insitu area [km2]'] = insitu_df['area'] / 1e6
insitu_df

../data/insitu/deltares/0087711.csv


Unnamed: 0,time,area,insitu area [km2]
0,1988-01-11 03:47:00,3.800564e+07,38.005638
1,1988-02-28 03:48:00,3.396474e+07,33.964735
2,1988-03-31 03:48:00,3.047994e+07,30.479940
3,1988-09-23 03:49:00,4.290060e+07,42.900601
4,1988-10-09 03:49:00,4.314776e+07,43.147758
...,...,...,...
909,2021-09-23 04:41:00,3.909731e+07,39.097311
910,2021-09-23 04:42:00,3.902300e+07,39.023004
911,2021-09-25 04:32:00,3.852712e+07,38.527118
912,2021-09-25 04:32:00,3.837526e+07,38.375262


In [30]:
insitu_clean_df = insitu_df.groupby([insitu_df['time'].dt.date])[['area', 'insitu area [km2]']].mean().reset_index()
insitu_clean_df['time'] = pd.to_datetime(insitu_clean_df['time'])
insitu_clean_df

Unnamed: 0,time,area,insitu area [km2]
0,1988-01-11,3.800564e+07,38.005638
1,1988-02-28,3.396474e+07,33.964735
2,1988-03-31,3.047994e+07,30.479940
3,1988-09-23,4.290060e+07,42.900601
4,1988-10-09,4.314776e+07,43.147758
...,...,...,...
709,2021-08-31,3.731312e+07,37.313122
710,2021-09-05,3.836447e+07,38.364467
711,2021-09-23,3.906016e+07,39.060158
712,2021-09-25,3.845119e+07,38.451190


In [35]:
sat_area.hvplot(
    kind='scatter', x='time', y='s2 area [km2]', label='Sentinel-2 area [km2] (uncorrected for clouds)'
) * insitu_clean_df.loc[
    (insitu_clean_df['time'] >= sat_area['time'].min())&(insitu_clean_df['time'] <= sat_area['time'].max())
].hvplot(
    kind='scatter', x='time', y='insitu area [km2]', label='insitu area [km2]'
)

NameError: name 'sat_area' is not defined

## todo: error metrics