## Using WOfS Bitmasking on Waterbodies

* **Product used:**
[wofs_ls](https://explorer.digitalearth.africa/wofs_ls)

## Description 
This notebook uses individual classified images from the Water Observation Feature Layers (WOFLs), which are used in the image masking process. 

The data within a WOFL is stored as a bit field, which is a binary number. Meaning that each digit of a number is independantly set or not based on the presence (1) or absence (0) of a particular attribute (water, cloud, cloud shadow etc). A single decimal value is associated to each pixel, which then provides information on the features within that pixel. 

This notebook works through:

1. Loading WOFL data for a given waterbody and timeframe
2. Creating pixel masks with varying factors 

### Load packages and apps 

This notebook works using two functions, referred to as apps:
` load_waterbody_data` and `pixel_mask`. 

In [None]:
%matplotlib widget 

import ipywidgets as widgets

import geopandas as gpd
import matplotlib.pyplot as plt
import datacube
import sys

from datacube.storage import masking

from deafrica_tools.plotting import rgb
from deafrica_tools.datahandling import load_ard
from deafrica_tools.spatial import xr_rasterize
from deafrica_tools.datahandling import wofs_fuser, mostcommon_crs

### Load the waterbody data

The `(load_waterbody_data)` performs several steps including:

* identifies geojson file with the waterbody coordinates
* identifies a custom envelope boundary for the chosen waterbody
* loads WOFLs data 
* returns the data for analysis 

The function takes three arguements: `waterbody_name`, `start_date`, and `end_date`. These arguements determine the named polygon which the function loads, and the timeframe in which the user seeks to find data.

In [None]:
# The function needs to be defined with the inputs including the name of the waterbody, start date, end date
def load_waterbody_data(waterbody_name, start_date, end_date, file_name, column):
    
    # import the datacube 
    dc = datacube.Datacube(app='pond_analysis')
    
    # set the data frame from the geojson for waterbody coords
    ponds_gdf = gpd.read_file(file_name)
    
    single_pond = ponds_gdf.loc[ponds_gdf[column]== waterbody_name]
    
    # define the coords for the waterbody of choice, including the envelope with the boundaries
    single_pond['geometry'] = single_pond.envelope
    
    bounds = single_pond.bounds
    
    # set up the query
    left, bottom, right, top = bounds.values[0]
    x_coords = (left, right)
    y_coords = (top, bottom)

    time = (start_date, end_date)

    desired_crs = 'EPSG:6933'
    res = 30
    # Construct the data cube query 
    query = {
        'x': x_coords,
        'y': y_coords,
        'time' : time,
        'resolution' : (-res, res)
    }
    
    # load the data 
#     output_crs = mostcommon_crs(dc=dc, product='wofs_ls', query=query)
    wofls = dc.load(
        product = 'wofs_ls',
        group_by="solar_day",
        fuse_func=wofs_fuser, 
        output_crs = desired_crs,
        **query)
    
    # return the data
    return(wofls)


### Pixel Masking

This function converts the WOFL bit field into a binary array, containing True and False values. The `(pixel_mask)` function creates masks using flag labels such as "wet", "dry", "cloud", "cloud shadow" or "no data" to identify: 

* pixels with water prestent
* pixels without water present
* pixels which are cloud covered 
* pixels which hold no data

Then returns the data for analysis. 

In [None]:
def pixel_mask(data, file_name) :

    # calculate wet and dry attributes
    data['wet'] = masking.make_mask(data, wet=True).water
    data['dry'] = masking.make_mask(data, dry=True).water
    
    # combining flags - clouds
    # cloud (no clouds and shadow)
    data['cloud'] = masking.make_mask(data, cloud=True).water
    data['cloud_shadow'] = masking.make_mask(data, cloud_shadow=True).water
    
    # no data
    data['nodata'] = masking.make_mask(data, nodata = True).water
    
    ponds_gdf = gpd.read_file(file_name)
    pond_shape_raster = xr_rasterize(ponds_gdf, data.wet.isel(time=0), crs='EPSG:6933')
    
    pond_only_data = data.where(pond_shape_raster)
    
    return(pond_only_data)

### Determining Pixel Percent Coverage

The `(percent_coverage)` function calculates the total number of pixels within the set parameters, then computes the percentage of pixels using the flag labels from the previous function. This function then calculates the total percentage of unusable data, which includes "cloud", "cloud_shadow" and "bad data" flags. Allowing for the unusable data to be filtered out of the dataset. 


In [None]:
def percent_coverage(data):
    
    number_of_pixels = data.isel(time=0).water.count(dim=['x', 'y']).values
    data['water_percent'] = data.wet.sum(dim=['x', 'y'])/number_of_pixels
    data['dry_percent'] = data.dry.sum(dim=['x', 'y'])/number_of_pixels
    data['cloud_percent'] = data.cloud.sum(dim=['x', 'y'])/number_of_pixels
    data['cloud_shadow_percent'] = data.cloud_shadow.sum(dim=['x', 'y'])/number_of_pixels
    data['nodata_percent'] = data.nodata.sum(dim=['x', 'y'])/number_of_pixels
    
    data['bad_data_percent'] = data.cloud_percent+data.cloud_shadow_percent+data.nodata_percent
    
    # filter observations based on number of no data pixels
    data_filtered = data[['water_percent', 'dry_percent', 'bad_data_percent']].where(data.bad_data_percent<0.05, drop=True)
    
    
    return(data_filtered)

In [None]:
def make_box_layout():
     return widgets.Layout(
        border='solid 1px black',
        margin='0px 10px 10px 0px',
        padding='5px 5px 5px 5px'
     )

class waterbody_app(widgets.HBox):
     
    def __init__(self):
        super().__init__()
        output = widgets.Output()
 
        # set any fix attributes here 
        self.startdate = '2019-01-01'
        self.enddate = '2019-03-01'
        self.file_name = 'map (2).geojson'
        self.column = 'location'
        
        ponds_gdf = gpd.read_file(self.file_name)
        ponds_names = list(ponds_gdf[self.column].values)
        self.waterbody = ponds_names[0]

        with output:
            self.fig, self.ax = plt.subplots(constrained_layout=True, figsize=(5, 3.5))
#         self.line, = self.ax.plot(self.x, np.sin(self.x), initial_color)
        
        # draw on plot if desired
         
        self.fig.canvas.toolbar_position = 'bottom'
#         self.ax.grid(True)

        # start date
        startdate_picker = widgets.DatePicker(
            description='Pick a start date',
            disabled=False
        )
        # end date
        enddate_picker = widgets.DatePicker(
            description='Pick an end date',
            disabled=False
        )
        # waterbody 
        waterbody_name = widgets.Dropdown(
            options = ponds_names,
            value = ponds_names[0],
            description='Waterbody Name:',
            disabled=False,
        )
        # run button
        run_button = widgets.Button(
            description = 'Run',
        )
        
        controls = widgets.VBox([
            startdate_picker,
            enddate_picker,
            waterbody_name,
            run_button,
        ])
        controls.layout = make_box_layout()
         
        out_box = widgets.Box([output])
        output.layout = make_box_layout()
        
        # observe stuff
        startdate_picker.observe(self.update_startdate, 'value')
        enddate_picker.observe(self.update_enddate, 'value')
        waterbody_name.observe(self.update_waterbody, 'value')
        
        run_button.on_click(self.run_app)
        
#         int_slider.observe(self.update, 'value')
#         color_picker.observe(self.line_color, 'value')
#         text_xlabel.observe(self.update_xlabel, 'value')
#         text_ylabel.observe(self.update_ylabel, 'value')
         
#         text_xlabel.value = 'x'
#         text_ylabel.value = 'y'
        
        # add to children
        self.children = [controls, output]
            
    # set the start date to the new edited date
    def update_startdate(self, change):
        self.startdate = change.new
        
    def update_enddate(self, change):
        self.enddate = change.new
        
    def update_waterbody(self, change):
        self.waterbody = change.new
        
    # takes current state of all inputs, runs the data and makes plot
    def run_app(self, change):
        data = load_waterbody_data(self.waterbody, self.startdate, self.enddate, self.file_name, self.column)
        mask_data = pixel_mask(data, self.file_name)
        percentage_data = percent_coverage(mask_data)
        
        # remove any existing lines from the plot
        self.ax.clear()
        
        percentage_data.water_percent.plot(ax = self.ax, label = 'Water')
        percentage_data.dry_percent.plot(ax = self.ax, label = 'Dry')
        percentage_data.bad_data_percent.plot(ax = self.ax, label = 'Unobserved')
        self.ax.set_title(self.waterbody)
        self.ax.set_xlabel('Date')
        self.ax.set_ylabel('Percent Coverage')
        self.ax.legend(loc = 'upper right')
        

In [None]:
waterbody_app()