# Mapping water extent and rainfall

* **Products used:** 
[wofs_ls](https://explorer.digitalearth.africa/products/wofs_ls),
[rainfall_chirps_monthly](https://explorer.digitalearth.africa/products/rainfall_chirps_monthly)

## Background


## Description

***

## Load packages
Import Python packages that are used for the analysis.

In [None]:
%matplotlib inline

import datacube
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import time
from datacube.utils import geometry
from datacube.utils import masking


from deafrica_tools.dask import create_local_dask_cluster
from deafrica_tools.datahandling import wofs_fuser
from deafrica_tools.spatial import xr_rasterize

## Set up a Dask cluster

Dask can be used to better manage memory use and conduct the analysis in parallel. 

In [None]:
create_local_dask_cluster()

## Connect to Data Cube

In [None]:
dc = datacube.Datacube(app='long_term_water_extent')

## Analysis parameters

The following cell sets the parameters, which define the area of interest and the length of time to conduct the analysis over.


In [None]:
water_extent_vector_file = 'data/lake_baringo_extent.geojson'

water_catchment_vector_file = 'data/lake_baringo_catchment.geojson'

time_range = ('2019', '2020')

resample_strategy = 'QS-DEC' #'3M' #"QS-DEC", # '1M'

## Get waterbody and catchment geometries

This will be used to load the data

In [None]:
def get_geometry(gdf):
    gdf_crs = f'EPSG:{gdf.crs.to_epsg()}'
    gdf_geom = gdf.loc[0, 'geometry'].__geo_interface__
    
    geom = geometry.Geometry(gdf_geom, gdf_crs)
    return geom

In [None]:
extent = gpd.read_file(water_extent_vector_file)

extent_geometry = get_geometry(extent)

In [None]:
catchment = gpd.read_file(water_catchment_vector_file)

catchment_geometry = get_geometry(catchment)

## Load Water Observation from Space for Waterbody

In [None]:
extent_query = {
    'time': time_range,
    'resolution': (-30, 30),
    'output_crs': 'EPSG:6933',
    'geopolygon': extent_geometry,
    'group_by':'solar_day'
}

wofs_ds = dc.load(
    product = 'wofs_ls',
    fuse_func=wofs_fuser, 
    **extent_query)

In [None]:
water_ds = masking.make_mask(wofs_ds, wet=True)

In [None]:
# Get label
def get_resampled_labels(ds, freq, date_format='%b %y'):
    
    left = ds.resample(time=freq, label='left').groups
    left_str = [pd.to_datetime(str(key)).strftime(date_format) for key in left]
    
    right = ds.resample(time=freq, label='right').groups
    right_str = [pd.to_datetime(str(key)).strftime(date_format) for key in right]

    pairs = zip(left_str, right_str)
    labels = [f'{l} - {r}' for l,r in pairs]
    
    return labels

In [None]:
resampled_water_ds = water_ds.resample(
    time=resample_strategy, 
    label='right'
).max()


# Get date labels for plot
resampled_labels = get_resampled_labels(water_ds, resample_strategy)

In [None]:
resampled_water_ds

In [None]:
pixel_length = extent_query["resolution"][1]  # in metres
m_per_km = 1000  # conversion from metres to kilometres
area_per_pixel = pixel_length**2 / m_per_km**2

resampled_water_area_ds = resampled_water_ds.sum(dim=['x', 'y']) * area_per_pixel

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

ax.plot(
    resampled_labels,
    resampled_water_area_ds.water.values, 
    color='blue', 
    marker='^',
    markersize=4,
    linewidth=1,
)
plt.xticks(resampled_labels,rotation = 65)
plt.title(f'Observed Area of Water from {time_range[0]} to {time_range[1]}')
plt.ylabel('Waterbody area (km$^2$)')
plt.tight_layout()

## Load CHIRPS monthly rainfall

In [None]:
catchment_query = {
    'time': time_range,
    'resolution': (-30, 30),
    'output_crs': 'EPSG:6933',
    'geopolygon': extent_geometry,
    'group_by':'solar_day'
}

rainfall_ds = dc.load(
    product='rainfall_chirps_monthly',  
    **catchment_query)

#create mask
mask = xr_rasterize(extent, rainfall_ds)

#mask data
masked_rainfall_ds = rainfall_ds.where(mask)

## Resample to estimate rainfall for each time period

This is done by taking calculating the average rainfall over the extent of the catchment, then summing these averages over the resampling period to estimate the total rainfall for the catchment.

In [None]:
# Calculate the average rainfall over the catchment at each time step
catchment_rainfall_ds = masked_rainfall_ds.mean(dim=('x', 'y'))

# Calculate the total rainfall over each resampling period
catchment_rainfall_resampled_ds = catchment_rainfall_ds.resample(
    time=resample_strategy, 
    label='right'
).sum(dim='time')

In [None]:
# plot daily total precipitation for this area
fig, ax1 = plt.subplots(figsize=(10,6))

plt.xticks(rotation=65)

# Create histogram of rainfall
ax1.bar(
    resampled_labels,
    catchment_rainfall_resampled_ds.rainfall.values, 
    color="lightblue", 
    label='Annual rainfall',
)

ax1_handles, ax1_labels = ax1.get_legend_handles_labels()

# Create line chart of water area
ax2 = ax1.twinx()
ax2.plot(
    resampled_labels,
    resampled_water_area_ds.water.values, 
    color='red', 
    marker='^',
    markersize=4,
    linewidth=1,
    label='Waterbody Area'
)

ax2_handles, ax2_labels = ax2.get_legend_handles_labels()

# Format plot
fig.suptitle('Evolution of Lake surface area, compared to catchment rainfall (CHIRPS) over'+ 
             f'time from {time_range[0]} to {time_range[1]}')


ax1_handles.extend(ax2_handles)
ax1_labels.extend(ax2_labels)
ax1.legend(ax1_handles, ax1_labels, loc='upper left')

units = rainfall_ds.rainfall.attrs['units']
ax1.set_ylabel(f'Total Precipitation ({units})')

ax2.set_ylabel('Waterbody area (km$^2$)')
ax2.tick_params(axis='y', colors='red')
ax2.yaxis.label.set_color('red')

fig.tight_layout()
plt.savefig('combine.pdf')

# Next steps:
* ~~work out how to get average spatial rainfall, then sum over resamping time -- check that I get the same answer irrespective of order done in~~
* ~~Combine rainfall and wofs data extent to create combined plot~~
* Add code to plot change in water extent between two times
* Update quantitative colours in water extent change plot