In [None]:
import datacube
import geopandas as gpd
import pandas as pd
import numpy as np
from deafrica_tools.classification import HiddenPrints
from deafrica_tools.datahandling import wofs_fuser
from deafrica_tools.spatial import xr_rasterize

In [None]:
waterbodies_vector_file = 'testing_data/senegalbasinwaterbodies.shp'

In [None]:
def extract_timeseries(waterbodies_vector_file, waterbody_uid):
    # read the polgon shapefile
    polygons_gdf = gpd.read_file(waterbodies_vector_file)
    
    # select a given waterbody using the UID 
    waterbody_gdf = polygons_gdf.loc[polygons_gdf['UID'].isin([waterbody_uid])]
    
    # extract the timeseries link from the geodataframe
    timeseries_link = waterbody_gdf['timeseries']
    timeseries_link = timeseries_link.item()
    
    # read the timeseries csv
    waterbody_csv = pd.read_csv(timeseries_link)
    
    return waterbody_csv

In [None]:
def timeseries_value_comparison(last_obs, waterbodies_vector_file, waterbody_uid):
    # read the polgon shapefile
    polygons_gdf = gpd.read_file(waterbodies_vector_file)
    
    # select a given waterbody using the UID 
    waterbody_gdf = polygons_gdf.loc[polygons_gdf['UID'].isin([waterbody_uid])]
    
    last_obs = last_obs.iloc[-1]
    
    with HiddenPrints():
        dc = datacube.Datacube(app="timeseries_test")
        aoi = waterbody_gdf
        # get the latitude and longitude range of the waterbody
        lat_range = (aoi.total_bounds[1], aoi.total_bounds[3])
        lon_range = (aoi.total_bounds[0], aoi.total_bounds[2])
        
        time = last_obs['date']
        output_crs = 'EPSG:6933'
        query = {'x': lon_range,
                'y': lat_range,
                'time': time}
        wofls= dc.load(product = 'wofs_ls',
               group_by="solar_day",
               fuse_func=wofs_fuser,
               output_crs = output_crs,
               collection_category="T1",
               resolution=(-30,30),
               **query)
    
    wofls_ds = wofls.water
    # Generate a polygon mask to keep only data within the waterbody polygon
    polygon_mask = xr_rasterize(waterbody_gdf, wofls_ds)
    # Mask dataset to set pixels outside of the polygon to 'NaN'
    wofls_masked = wofls_ds.where(polygon_mask)
    waterbody_mask = wofls_masked.notnull()
    
    # masked waterbody pixel count 
    pixel_count = (wofls_masked.notnull()).sum().item()
    
    # Number of pixels observed to be valid (clear) and wet
    valid_and_wet = (wofls_masked == 128)
    valid_and_wet_count = valid_and_wet.where(waterbody_mask).sum().item()
    valid_and_wet_count
    # Number of pixels observed to be valid (clear) and dry
    valid_and_dry = (wofls_masked == 0)
    valid_and_dry_count = valid_and_dry.where(waterbody_mask).sum().item()
    valid_and_dry_count
    # Number of pixels observed to be invalid 
    invalid = ~wofls_masked.isin([128, 0])
    invalid_count = invalid.where(waterbody_mask).sum().item()
    
    # Percentages
    valid_and_wet_percentage = (valid_and_wet_count / pixel_count) * 100
    valid_and_dry_percentage = (valid_and_dry_count / pixel_count) * 100
    invalid_percentage = (invalid_count / pixel_count) * 100
    
    data = {'Attributes':['pc_wet','px_wet', 'pc_dry', 'px_dry', 'pc_invalid', 'px_invalid'],
       'CSV Results':[last_obs['pc_wet'],
              last_obs['px_wet'], 
              last_obs['pc_dry'], 
              last_obs['px_dry'], 
              last_obs['pc_invalid'], 
              last_obs['px_invalid']],
       'Validation Results':[valid_and_wet_percentage, 
                             valid_and_wet_count, 
                             valid_and_dry_percentage, 
                             valid_and_dry_count, 
                             invalid_percentage, 
                             invalid_count],
       'Difference': [last_obs['pc_wet']-valid_and_wet_percentage,
                     last_obs['px_wet']-valid_and_wet_count,
                     last_obs['pc_dry']-valid_and_dry_percentage,
                     last_obs['px_dry']-valid_and_dry_count,
                     last_obs['pc_invalid']-invalid_percentage,
                     last_obs['px_invalid']-invalid_count,]}
    comparison_df = pd.DataFrame(data) 
    return comparison_df

In [None]:
waterbody_uid = ['efc3cy8ek','een136fc6','ef3ubzgvj']

for uid in waterbody_uid:
    waterbody_timeseries = extract_timeseries(waterbodies_vector_file, uid)
    
    # returns observations that have been observed as wet and dry  
    csv_wet_dry = waterbody_timeseries[waterbody_timeseries['pc_wet']>10]
    csv_wet_dry = csv_wet_dry.drop(csv_wet_dry[csv_wet_dry['pc_invalid']>0].index)
    
    # returns observations that have been observed as invalid less than 10%  
    csv_wet_dry_invalid = waterbody_timeseries[waterbody_timeseries['pc_wet']>10]
    # remove observations that are not invalid 
    csv_wet_dry_invalid = csv_wet_dry_invalid.drop(csv_wet_dry_invalid[csv_wet_dry_invalid['pc_invalid']==0].index)
    # returns last invalid observation over 50%  
    invalid_obs = waterbody_timeseries[waterbody_timeseries['pc_invalid']>50]
    
    print(uid)
    
    csv_wet_dry_comparison = timeseries_value_comparison(csv_wet_dry, waterbodies_vector_file, uid)
    print('CSV Wet and Dry Observation Value Comparison')
    print(csv_wet_dry_comparison)
    
    try:
        csv_wet_dry_invalid_comparison = timeseries_value_comparison(csv_wet_dry_invalid, waterbodies_vector_file, uid)
        print('CSV Wet, Dry and Invalid Observation Value Comparison')
        print(csv_wet_dry_invalid_comparison)
    except:
        print('Could not complete')
    
    csv_invalid_comparison = timeseries_value_comparison(invalid_obs, waterbodies_vector_file, uid)
    print('CSV Invalid Observation Value Comparison')
    print(csv_invalid_comparison)