# Rainfall anomalies averaged over polygons

## Load packages, connect to datacube

In [1]:
%matplotlib inline

import datacube
import gc
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
from datacube.utils import geometry
import pandas as pd
import matplotlib as mpl
import xarray as xr
from odc.algo import xr_reproject
from datacube.utils.geometry import assign_crs

from deafrica_tools.spatial import xr_rasterize
from deafrica_tools.dask import create_local_dask_cluster

dc = datacube.Datacube(app="WOfS-figure")

import warnings
warnings.filterwarnings("ignore", message="Iteration over multi-part geometries is deprecated and will be removed in ")



In [2]:
create_local_dask_cluster()

Perhaps you already have a cluster running?
Hosting the HTTP server on port 34421 instead


0,1
Client  Scheduler: tcp://127.0.0.1:46361  Dashboard: /user/fang.yuan@digitalearthafrica.org/proxy/34421/status,Cluster  Workers: 1  Cores: 15  Memory: 104.37 GB


## Analysis parameters

* `year`: The year the anomaly will be calculated for ie, if '2020' then the rainfall for 2020 will be subtracted from the long-term mean
* `vector_file`: The shapefile (geojson) over which the anomalies are iterated. The final results are appended to this dataframe on this file
* `attribute_col`: the column in the "vector_file" that is used to label the results. ie if this is a shapefile with Africam country boundary then the attribute_col should be the column containg the country names
* `output_suffix`: a filename suffix added to the exported results geojson to identify what the results refer to. ie. if iterating over African countries then make this parameter something like 'countries'. 

In [3]:
year='2020'
clim_year='2000','2020'
vector_file = "data/african_countries.shp"
output_suffix = 'countries'

## Open vector file and set-up ODC query

In [4]:
gdf = gpd.read_file(vector_file)
gdf.head()

Unnamed: 0,name,geometry
0,Sudan,"MULTIPOLYGON (((38.58148 18.02542, 38.58203 18..."
1,Angola,"MULTIPOLYGON (((11.79481 -16.81925, 11.79375 -..."
2,Benin,"MULTIPOLYGON (((1.86343 6.28872, 1.86292 6.288..."
3,Botswana,"POLYGON ((25.17447 -17.77881, 25.18476 -17.783..."
4,Burkina Faso,"POLYGON ((-0.45567 15.08082, -0.45411 15.07937..."


In [5]:
attribute_col = 'name'

resolution = (-5000, 5000) # native CHIRPS resolution
output_crs = 'EPSG:6933'
measurements = ['rainfall']

query = {'measurements': measurements,
         'resolution': resolution,
         'output_crs': output_crs,
         'dask_chunks':dict(x=1000,y=1000)
         }

In [6]:
year_rain = {}
rain_mean = {}
rain_std = {}
rain_anomaly = {}
rain_min = {}
rain_idxmin = {}
rain_max = {}
rain_idxmax = {}

In [7]:
#try to remove garbage collection warnings
g0, g1, g2 = gc.get_threshold()
gc.set_threshold(g0*3, g1*3, g2*3)

i = 0

# Loop through polygons in geodataframe and extract satellite data
for index, row in gdf.iterrows():
    print(" Feature {:02}/{:02}\r".format(i + 1, len(gdf)),
                  end='')
    
    #skip come countries as no-data
    country=str(row[attribute_col])
    if (country == 'Cape Verde') | (country == 'Mauritius'):
        pass
    
    else:
        # Get the geometry
        geom = geometry.Geometry(row.geometry.__geo_interface__,
                                 geometry.CRS(gdf.crs))

        # Update dc query with geometry      
        query.update({'geopolygon': geom}) 

        ds_all = dc.load(product=['rainfall_chirps_monthly'],
                         time=(clim_year[0], '2020'),
                         **query).rainfall

        #select out difeent components of calcs
        ds = ds_all.sel(time=year)
        ds_clim=ds_all.sel(time=slice(clim_year[0], clim_year[1]))
        ds_match_wofs=ds_all.sel(time=slice('2000','2020'))

        # set -9999 no-data values to NaN
        ds = ds.where(ds !=-9999.)
        ds_clim = ds_clim.where(ds_clim !=-9999.)
        ds_match_wofs = ds_match_wofs.where(ds_match_wofs !=-9999.)

        # Generate a polygon mask to keep only data within the polygon
        mask = xr_rasterize(gdf.iloc[[index]], ds)
        ds = ds.where(mask)
        ds_clim = ds_clim.where(mask)
        ds_match_wofs = ds_match_wofs.where(mask)

        # find sum over 12 months (ie total for that year)
        ds = ds.sum(dim='time').mean(dim=['x','y']).compute()

        # Climatologies 
        ds_clim_mean_year = ds_clim.groupby('time.year').sum(dim=['time']).mean(dim=['x','y']).compute()
        ds_clim_std = ds_clim_mean_year.std('year')
        ds_clim_mean = ds_clim_mean_year.mean('year')

        #extra summary stats to match wofs summary plots
        ds_match_wofs_mean_year = ds_match_wofs.groupby('time.year').sum(dim=['time']).mean(dim=['x','y']).compute()
        ds_max = ds_match_wofs_mean_year.max('year')
        ds_idxmax = int(ds_match_wofs_mean_year.idxmax('year').values)
        ds_min = ds_match_wofs_mean_year.min('year')
        ds_idxmin = int(ds_match_wofs_mean_year.idxmin('year').values)
        
        #save annual rainfall time-series for reference
        plt.ioff()
        fig, ax = plt.subplots(1,1, figsize=(11, 5))
        ds_match_wofs_mean_year.plot(ax=ax, marker='o', color='#9467bd')
        plt.title(f'Total annual rainfall: '+country+', 2000 to 2020')
        plt.xlabel('Year')
        plt.ylabel('Total Rainfall (mm)')
        plt.tight_layout()
        fig.savefig('results/pngs/rainfall/rainfall_'+country+'_2000_2020.png', 
                    bbox_inches='tight',
                    dpi=200, 
                    facecolor="white")
        plt.close(fig)

        #anomalies
        anomalies = xr.apply_ufunc(
            lambda x, m, s: (x - m) / s,
            ds,
            ds_clim_mean,
            ds_clim_std,
            output_dtypes=[ds.dtype],
            dask="allowed"
        )

        #ad results to dict
        year_rain.update({country: ds.values.item()})
        rain_mean.update({country: ds_clim_mean.values.item()})
        rain_std.update({country: ds_clim_std.values.item()})
        rain_anomaly.update({country: anomalies.values.item()})
        rain_max.update({country: ds_max.item()})
        rain_min.update({country: ds_min.item()})
        rain_idxmin.update({country: ds_idxmin})
        rain_idxmax.update({country: ds_idxmax}) 

    # Update counter
    i += 1

 Feature 05/55

  _reproject(


 Feature 06/55



 Feature 08/55



 Feature 09/55



 Feature 10/55



 Feature 11/55



 Feature 12/55



 Feature 13/55



 Feature 14/55



 Feature 15/55



 Feature 16/55



 Feature 17/55



 Feature 18/55



 Feature 19/55



 Feature 20/55



 Feature 21/55



 Feature 22/55



 Feature 23/55



 Feature 24/55



 Feature 25/55



 Feature 26/55



 Feature 27/55



 Feature 28/55



 Feature 29/55



 Feature 30/55



 Feature 32/55



 Feature 33/55



 Feature 34/55



 Feature 35/55



 Feature 36/55



 Feature 37/55



 Feature 38/55



 Feature 39/55



 Feature 40/55



 Feature 41/55



 Feature 42/55



 Feature 43/55



 Feature 44/55



 Feature 45/55



 Feature 46/55



 Feature 47/55



 Feature 48/55



 Feature 49/55



 Feature 50/55



 Feature 51/55



 Feature 52/55



 Feature 53/55



 Feature 54/55



 Feature 55/55



## Join results onto geodataframe and export

In [8]:
df_rain = pd.DataFrame.from_dict(
    [
        year_rain,
        rain_mean,
        rain_std,
        rain_anomaly,
        rain_max,
        rain_min,
        rain_idxmin,
        rain_idxmax
    ]
).T.rename(
    {
        0: "Total Rainfall " + year + " (mm)",
        1: f"Mean Yearly Rainfall {clim_year[0]}-{clim_year[1]} (mm)",
        2: "Std. Dev. Yearly Rainfall {clim_year[0]}-{clim_year[1]} (mm)",
        3: "Standardised Yearly Rainfall Anomaly "+year,
        4: "Max Rainfall 2000-2020 (mm)",
        5: "Min Rainfall 2000-2020 (mm)",
        6: "Year of Min Rainfall 2000-2020",
        7: "Year of Max Rainfall 2000-2020",
    },
    axis=1,
)

df_rain.head()

Unnamed: 0,Total Rainfall 2020 (mm),Mean Yearly Rainfall 2000-2020 (mm),Std. Dev. Yearly Rainfall {clim_year[0]}-{clim_year[1]} (mm),Standardised Yearly Rainfall Anomaly 2020,Max Rainfall 2000-2020 (mm),Min Rainfall 2000-2020 (mm),Year of Min Rainfall 2000-2020,Year of Max Rainfall 2000-2020
Sudan,173.611008,164.285645,19.675518,0.473958,198.974548,125.073601,2004.0,2019.0
Angola,582.206299,604.268127,34.438721,-0.640611,684.010254,550.185547,2015.0,2011.0
Benin,557.501038,539.664124,46.343452,0.384885,615.347717,464.490143,2013.0,2003.0
Botswana,282.955322,240.49823,62.34663,0.680985,356.188141,143.706665,2015.0,2006.0
Burkina Faso,439.910217,394.761108,34.016331,1.327277,442.781311,343.814087,2004.0,2019.0


## Join results onto geodataframe and export

In [9]:
gdf = gdf.join(df_rain, attribute_col)


In [10]:
# Export results (overwritng WOfS file exported earlier with new rainfall data appended)
gdf.to_file('results/geojsons/rainfall_anomalies_africa_'+output_suffix+'_'+year+'.geojson')

## Explore results

You can read in the geojson if returning to this notebook by uncommenting the cell directly below.

The geodataframe is 'simplified' to speed up processing.

Change the `col_to_plot` parameter to change which variable is plotted


In [11]:
# gdf = gdf.read_file('results/geojsons/rainfall_anomalies_africa_'+output_suffix+'_'+year+'.geojson')

In [12]:
#simplify so plotting is fast
gdf_simple = gdf.to_crs('epsg:6933')
gdf_simple['geometry'] = gdf_simple['geometry'].simplify(2500)


In [13]:
gdf_simple.columns

Index(['name', 'geometry', 'Total Rainfall 2020 (mm)',
       'Mean Yearly Rainfall 2000-2020 (mm)',
       'Std. Dev. Yearly Rainfall {clim_year[0]}-{clim_year[1]} (mm)',
       'Standardised Yearly Rainfall Anomaly 2020',
       'Max Rainfall 2000-2020 (mm)', 'Min Rainfall 2000-2020 (mm)',
       'Year of Min Rainfall 2000-2020', 'Year of Max Rainfall 2000-2020'],
      dtype='object')

In [14]:
col_to_plot = "Standardised Yearly Rainfall Anomaly "+year

gdf_simple.explore(
    column=col_to_plot,
    cmap="RdBu",
    vmin=-2.5,
    vmax=2.5,
    style_kwds={"fillOpacity": 1.0},
    tiles="CartoDB positron",
)

# ------FIGURES CODE ----------------------------------------------

### Plot rainfall with country boundaries
**Use this section to edit existing plot title, bounds, etc.**

If you have successfully exported the previous shapefile but started a new instance, there is no need to re-process the data. It can be read in from the shapefile by uncommenting and running the code below. Be sure the vector file path is to the correct shapefile title.

In [15]:
gdf = gpd.read_file('results/geojsons/rainfall_anomalies_africa_'+output_suffix+'_'+year+'.geojson')

### Customise the plot

In [16]:
col_to_plot = "Standardised Yearly Rainfall Anomaly 2020"
vmin, vmax = -3,3.1
steps = 0.5
title = 'Standardised Rainfall Anomaly 2020'
export_path = 'results/pngs/rainfall/std_anomaly_rainfall.png'

In [17]:
# Define plot and colourbar axes
fig, ax = plt.subplots(1,1, figsize=(10,10))
fig.subplots_adjust(bottom=0.2)
cax = fig.add_axes([0.16, 0.15, 0.70, 0.03])


# Define colour map
cmap = mpl.cm.RdBu
bounds = list(np.arange(vmin,vmax,steps))
norm = mpl.colors.BoundaryNorm(bounds, cmap.N)#, extend='both')
cbar = mpl.colorbar.ColorbarBase(cax, cmap=cmap,
                                norm=norm,
                                orientation='horizontal')

# Define colourbar labelling
# cbar.set_ticks([])
cbar.set_ticks([i for i in np.arange(vmin,vmax,steps)])
cbar.set_ticklabels(list('{:2}'.format(i) for i in (list(np.arange(vmin, vmax, steps)))))
cbar.set_label(title, fontsize='14')

# Turn off lon-lat ticks and labels
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.set_xticks([])
ax.set_yticks([])

# # Remove frame
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)

# plot 'pc_change' and 'geometry' boundary lines
gdf.plot(col_to_plot, ax=ax, cmap=cmap, norm=norm)
gdf.geometry.plot(ax=ax, linewidth=0.8, edgecolor='black', facecolor="none")

# Export figure
fig.savefig(export_path, 
            bbox_inches='tight',
            dpi=200, 
            facecolor="white")

---

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Africa data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** If you need assistance, please post a question on the [Open Data Cube Slack channel](http://slack.opendatacube.org/) or on the [GIS Stack Exchange](https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions [here](https://gis.stackexchange.com/questions/tagged/open-data-cube)).

**Compatible datacube version:**

In [18]:
print(datacube.__version__)

1.8.8


**Last Tested:**

In [19]:
from datetime import datetime
datetime.today().strftime('%Y-%m-%d')

'2023-03-02'