# WOfS anomalies averaged over polygons

## Load packages, connect to datacube

In [1]:
%matplotlib inline

import datacube
import gc
import matplotlib.pyplot as plt
import geopandas as gpd
from datacube.utils import geometry
import pandas as pd
import matplotlib as mpl
import xarray as xr
from odc.algo import xr_reproject
from datacube.utils.geometry import assign_crs

from deafrica_tools.spatial import xr_rasterize
from deafrica_tools.dask import create_local_dask_cluster

dc = datacube.Datacube(app="WOfS-figure")

import warnings
warnings.filterwarnings("ignore", message="Iteration over multi-part geometries is deprecated and will be removed in ")

In [2]:
create_local_dask_cluster()

0,1
Client  Scheduler: tcp://127.0.0.1:33559  Dashboard: /user/chad/proxy/8787/status,Cluster  Workers: 1  Cores: 31  Memory: 254.70 GB


## Analysis parameters

* `year`: The year the anomaly will be calculated for ie, if '2020' then the WOfS area for 2020 will be subtracted from the long-term mean
* `vector_file`: The shapefile (geojson) over which the anomalies are iterated. The final results are appended to this dataframe on this file
* `attribute_col`: the column in the "vector_file" that is used to label the results. ie if this is a shapefile with Africam country boundary then the attribute_col should be the column containg the country names
* `output_suffix`: a filename suffix added to the exported  results geojson to identify what the results refer to. ie. if iterating over African countries then make this parameter something like 'countries'. 

In [3]:
# Set the year you want to compare
year = '2020'
vector_file = "data/african_countries.shp"
attribute_col = 'name'
output_suffix = 'countries'

## Open vector file and set-up ODC query

In [4]:
gdf = gpd.read_file(vector_file)
gdf.head()

Unnamed: 0,name,geometry
0,Sudan,"MULTIPOLYGON (((38.58148 18.02542, 38.58203 18..."
1,Angola,"MULTIPOLYGON (((11.79481 -16.81925, 11.79375 -..."
2,Benin,"MULTIPOLYGON (((1.86343 6.28872, 1.86292 6.288..."
3,Botswana,"POLYGON ((25.17447 -17.77881, 25.18476 -17.783..."
4,Burkina Faso,"POLYGON ((-0.45567 15.08082, -0.45411 15.07937..."


In [5]:
measurements = ['frequency']
resolution = (-30,30)
output_crs = 'EPSG:6933'

In [6]:
query = {'measurements': measurements,
         'resolution': resolution,
         'output_crs': output_crs,
         'dask_chunks':dict(x=1000,y=1000)
         }

### Loop through polygons and process WOfS data



In [7]:
#store results in dict
year_area = {}
area_mean = {}
area_std = {}
area_min = {}
area_idxmin = {}
area_max = {}
area_idxmax = {}
diff = {}
change_perc = {} 
anomaly = {}

In [8]:
## Try to prevent garbage collection warning from dask
g0, g1, g2 = gc.get_threshold()
gc.set_threshold(g0*3, g1*3, g2*3)

# A progress indicator
i = 0

# Loop through polygons in geodataframe and extract satellite data
for index, row in gdf.iterrows():

    print(" Feature {:02}/{:02}\r".format(i + 1, len(gdf)),
                  end='')
    
    country=str(row[attribute_col])
    
    # Get the geometry
    geom = geometry.Geometry(row.geometry.__geo_interface__,
                             geometry.CRS(f'EPSG:{gdf.crs.to_epsg()}'))

    # Update dc query with geometry      
    query.update({'geopolygon': geom}) 

    # Load wofs
    ds = dc.load(product='wofs_ls_summary_annual',
                 time=('2000', '2020'),
                 **query)
    
    # Generate a polygon mask to keep only data within the polygon
    mask = xr_rasterize(gdf.iloc[[index]], ds, verbose=False)
    ds = ds.where(mask)
    
    #threshold
    ds = ds.frequency > 0.01
    
    #calculate area of pixels
    area_per_pixel = query["resolution"][1]**2 / 1000**2
    ds_area = (ds.sum(dim=['x', 'y']) * area_per_pixel).compute()
    
    # Save timeseries of water area for reference
    plt.ioff() #turn off plotting
    fig, ax = plt.subplots(1,1, figsize=(11, 5))
    ds_area.plot(ax=ax, marker='o', color='#9467bd')
    plt.title(f'Observed Annual Area of Water :'+country+', 2000 to 2020')
    plt.xlabel('Year')
    plt.ylabel('Waterbody area (km$^2$)')
    plt.tight_layout()
    fig.savefig('results/pngs/wofs/wofs_area_'+country+'_2000_2020.png', 
                bbox_inches='tight',
                dpi=200, 
                facecolor="white")
    plt.close(fig)
    
    #summary stats for area of water from 1984-2020
    ds_area_mean = ds_area.mean().values.item()
    ds_area_std = ds_area.std().values.item()
    ds_area_max = ds_area.max().values.item()
    ds_area_idxmax = int(ds_area.idxmax().dt.year.values)
    ds_area_min = ds_area.min().values.item()
    ds_area_idxmin = int(ds_area.idxmin().dt.year.values)
    
    #area in year of interest
    ds_area_year = ds_area.sel(time=year).values.item()
    
    #compare area
    diff_area = ds_area_year - ds_area_mean
    diff_percent = diff_area / ds_area_mean * 100
    anomaly_year = (ds_area_year - ds_area_mean) / ds_area_std
    
    #output results in dict
    year_area.update({country: ds_area_year})
    area_mean.update({country: ds_area_mean})
    area_std.update({country: ds_area_std})
    area_max.update({country: ds_area_max})
    area_min.update({country: ds_area_min})
    area_idxmin.update({country: ds_area_idxmin})
    area_idxmax.update({country: ds_area_idxmax}) 
    diff.update({country: diff_area})
    change_perc.update({country: diff_percent})
    anomaly.update({country: anomaly_year})
    i += 1

 Feature 01/55

CPLReleaseMutex: Error = 1 (Operation not permitted)


 Feature 55/55

## Concatenate results in a dataframe

In [9]:
df = pd.DataFrame.from_dict(
    [
        year_area,
        area_mean,
        area_std,
        area_max,
        area_min,
        area_idxmax,
        area_idxmin,
        diff,
        change_perc,
        anomaly,
    ]
).T.rename(
    {
        0: "Water Extent " + year + " (km2)",
        1: "Mean Water Extent 2000-2020 (km2)",
        2: "Std. Dev. Water Extent 2000-2020 (km2)",
        3: "Max Water Extent 2000-2020 (km2)",
        4: "Min Water Extent 2000-2020 (km2)",
        5: "Year of Max Water Extent 2000-2020",
        6: "Year of Min Water Extent 2000-2020",
        7: "Difference in Water Extent: " + year + " - Mean (km2)",
        8: "Percentage Difference in Water Extent: " + year + " - Mean",
        9: "Standardised Anomaly in Water Extent: (" + year + " - Mean) / Std.dev.",
    },
    axis=1,
)

df.head()

Unnamed: 0,Water Extent 2020 (km2),Mean Water Extent 2000-2020 (km2),Std. Dev. Water Extent 2000-2020 (km2),Max Water Extent 2000-2020 (km2),Min Water Extent 2000-2020 (km2),Year of Max Water Extent 2000-2020,Year of Min Water Extent 2000-2020,Difference in Water Extent: 2020 - Mean (km2),Percentage Difference in Water Extent: 2020 - Mean,Standardised Anomaly in Water Extent: (2020 - Mean) / Std.dev.
Sudan,23565.3327,14108.894614,7233.773562,31836.186,6193.9989,2002.0,2004.0,9456.438086,67.024656,1.307262
Angola,27129.6927,15388.772829,4819.209387,27129.6927,6695.3349,2020.0,2000.0,11740.919871,76.295362,2.436275
Benin,883.6785,487.958186,211.861369,918.423,237.7044,2016.0,2006.0,395.720314,81.097177,1.867827
Botswana,8433.999,6576.190714,2148.866304,11355.6537,2203.0209,2017.0,2003.0,1857.808286,28.250523,0.864553
Burkina Faso,4194.495,2215.769486,985.298594,4194.495,1168.7139,2020.0,2004.0,1978.725514,89.301957,2.00825


## Join results onto geodataframe and export

In [10]:
gdf = gdf.join(df, attribute_col)
gdf.head()

Unnamed: 0,name,geometry,Water Extent 2020 (km2),Mean Water Extent 2000-2020 (km2),Std. Dev. Water Extent 2000-2020 (km2),Max Water Extent 2000-2020 (km2),Min Water Extent 2000-2020 (km2),Year of Max Water Extent 2000-2020,Year of Min Water Extent 2000-2020,Difference in Water Extent: 2020 - Mean (km2),Percentage Difference in Water Extent: 2020 - Mean,Standardised Anomaly in Water Extent: (2020 - Mean) / Std.dev.
0,Sudan,"MULTIPOLYGON (((38.58148 18.02542, 38.58203 18...",23565.3327,14108.894614,7233.773562,31836.186,6193.9989,2002.0,2004.0,9456.438086,67.024656,1.307262
1,Angola,"MULTIPOLYGON (((11.79481 -16.81925, 11.79375 -...",27129.6927,15388.772829,4819.209387,27129.6927,6695.3349,2020.0,2000.0,11740.919871,76.295362,2.436275
2,Benin,"MULTIPOLYGON (((1.86343 6.28872, 1.86292 6.288...",883.6785,487.958186,211.861369,918.423,237.7044,2016.0,2006.0,395.720314,81.097177,1.867827
3,Botswana,"POLYGON ((25.17447 -17.77881, 25.18476 -17.783...",8433.999,6576.190714,2148.866304,11355.6537,2203.0209,2017.0,2003.0,1857.808286,28.250523,0.864553
4,Burkina Faso,"POLYGON ((-0.45567 15.08082, -0.45411 15.07937...",4194.495,2215.769486,985.298594,4194.495,1168.7139,2020.0,2004.0,1978.725514,89.301957,2.00825


In [11]:
gdf.to_file('results/geojsons/wofs_anomalies_africa_'+output_suffix+'_'+year+'.geojson')

## Explore results

You can read in the geojson if returning to this notebook by uncommenting the cell directly below.

The geodataframe is 'simplified' to speed up processing.

Change the `col_to_plot` parameter to change which variable is plotted


In [None]:
gdf = gpd.read_file('results/geojsons/wofs_water_extent_africa_'+output_suffix+'_'+year+'.geojson')

In [None]:
#simplify so plotting is fast
gdf_simple = gdf.to_crs('epsg:6933')
gdf_simple['geometry'] = gdf_simple['geometry'].simplify(2500)


In [None]:
col_to_plot = "Percentage Difference in Water Extent: 2020 - Mean (km2)"

gdf_simple.explore(
    column=col_to_plot,
    cmap="RdBu",
    vmin=-100,
    vmax=100,
    style_kwds={"fillOpacity": 1.0},
    tiles="CartoDB positron",
)

# ------FIGURES CODE ----------------------------------------------

### Plot WOfS with country boundaries
**Use this section to edit existing plot title, bounds, etc.**

If you have successfully exported the previous shapefile but started a new instance, there is no need to re-process the data. It can be read in from the shapefile by uncommenting and running the code below. Be sure the vector file path is to the correct shapefile title.

In [None]:
gdf = gpd.read_file('results/geojsons/wofs_water_extent_africa_'+output_suffix+'_'+year+'.geojson')

In [None]:
col_to_plot = "Percentage Difference in Water Extent: 2020 - Mean (km2)"

### Customise the plot

In [None]:
# Define plot and colourbar axes
fig, ax = plt.subplots(1,1, figsize=(10,10))
fig.subplots_adjust(bottom=0.2)
cax = fig.add_axes([0.16, 0.15, 0.70, 0.03])


# Define colour map
cmap = mpl.cm.RdBu
bounds = list(range(-100, 101, 10))
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)#, extend='both')
cbar = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
                                norm=norm,
                                orientation='horizontal')

# Define colourbar labelling
# cbar.set_ticks([])
cbar.set_ticks([i for i in range(-100, 101, 25)])
cbar.set_ticklabels(list('{:2}%'.format(i) for i in (list(range(-100, 101, 25)))))
cbar.set_label("Surface water area % difference from mean (2000-2020)", fontsize='14')

# Turn off lon-lat ticks and labels
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_yticks([])

# # Remove frame
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)

# plot 'pc_change' and 'geometry' boundary lines
gdf.plot(col_to_plot, ax=ax, cmap=cmap,vmin=-100, vmax=100)
gdf.geometry.plot(ax=ax, linewidth=0.8, edgecolor='black', facecolor="none")

# Export figure
fig.savefig('results/pngs/wofs/wofs_pc_change_'+year+'_from_2000_2020_mean.png', 
            bbox_inches='tight',
            dpi=200, 
            facecolor="white")

---

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Africa data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** If you need assistance, please post a question on the [Open Data Cube Slack channel](http://slack.opendatacube.org/) or on the [GIS Stack Exchange](https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions [here](https://gis.stackexchange.com/questions/tagged/open-data-cube)).

**Compatible datacube version:**

In [None]:
print(datacube.__version__)

**Last Tested:**

In [None]:
from datetime import datetime
datetime.today().strftime('%Y-%m-%d')