This notebook is not used operationally or for any validation, its only purpose is to have a clear understanding of the core functions of the AA workflow. The outputs and dimensions of each main step can thus be identified here.

**Import required libraries and functions**

In [2]:
%cd ..

c:\Users\amine.barkaoui\OneDrive - World Food Programme\Documents\GitHub\anticipatory-action


In [4]:
import os
import datetime
import pandas as pd

from config.params import Params

from AA.helper_fns import (
    read_forecasts,
    read_observations,
    aggregate_by_district,
    merge_un_biased_probs,
    merge_probabilities_triggers_dashboard,
)

from hip.analysis.analyses.drought import (
    get_accumulation_periods,
    run_accumulation_index,
    run_gamma_standardization,
    run_bias_correction,
    compute_probabilities,
)

from hip.analysis.aoi.analysis_area import AnalysisArea

**Define parameters**

The `config/{country}_config.yaml` file gathers all the parameters used in the operational script and that can be customized. For example, the *monitoring_year*, the list of districts or the intensity levels can be defined in that file.

In [35]:
params = Params(iso='ZWE', issue=10, index='SPI')
params.monitoring_year = 2023

**Read shapefile**

In [6]:
# Define aoi to read datasets using hip-analysis
area = AnalysisArea.from_admin_boundaries(
    iso3=params.iso.upper(),
    admin_level=2,
    resolution=0.25,
    datetime_range=f"1981-01-01/{params.monitoring_year + 1}-06-30",
)

# Read the shapefile
gdf = area.get_dataset([area.BASE_AREA_DATASET])
gdf

Unnamed: 0_level_0,geometry,Code,Name,adm1_Code,adm0_Code
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bulawayo,"POLYGON ((28.6712 -20.0163, 28.6072 -19.9615, ...",1010745,Bulawayo,900969,271
Chitungwiza,"POLYGON ((31 -17.9994, 31.0626 -18.0506, 31.11...",1010758,Chitungwiza,900970,271
Epworth,"POLYGON ((31.2031 -17.8863, 31.1303 -17.8654, ...",1010760,Epworth,900970,271
Harare,"POLYGON ((31.2031 -17.8863, 31.2195 -17.8512, ...",1010771,Harare,900970,271
Harare Rural,"POLYGON ((31.1361 -17.9289, 31.125 -17.888, 31...",1010772,Harare Rural,900970,271
...,...,...,...,...,...
Redcliff,"POLYGON ((29.8419 -19.025, 29.8195 -19.0142, 2...",1010810,Redcliff,900978,271
Shurugwi,"POLYGON ((30.4711 -19.8304, 30.4674 -19.8254, ...",1010817,Shurugwi,900978,271
Shurugwi Town,"POLYGON ((30.0317 -19.6013, 29.9984 -19.607, 3...",1010818,Shurugwi Town,900978,271
Zvishavane,"POLYGON ((30.4609 -20.5564, 30.4438 -20.528, 3...",1010826,Zvishavane,900978,271


**Read forecasts**

In [46]:
# When update is set to False, the downscaled dataset is read from a local folder or a s3 bucket. Otherwise, it is directly read from HDC.
forecasts = read_forecasts(
    area,
    params.issue,
    f"{params.data_path}/data/{params.iso}/zarr/2022/{str(params.issue).zfill(2)}/forecasts.zarr",
    update=False,  # True,
)
forecasts

Unnamed: 0,Array,Chunk
Bytes,1.60 GiB,32.17 MiB
Shape,"(9127, 51, 28, 33)","(9127, 1, 28, 33)"
Dask graph,51 chunks in 1 graph layer,51 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.60 GiB 32.17 MiB Shape (9127, 51, 28, 33) (9127, 1, 28, 33) Dask graph 51 chunks in 1 graph layer Data type float32 numpy.ndarray",9127  1  33  28  51,

Unnamed: 0,Array,Chunk
Bytes,1.60 GiB,32.17 MiB
Shape,"(9127, 51, 28, 33)","(9127, 1, 28, 33)"
Dask graph,51 chunks in 1 graph layer,51 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,249.57 kiB,249.57 kiB
Shape,"(9127,)","(9127,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,,
"Array Chunk Bytes 249.57 kiB 249.57 kiB Shape (9127,) (9127,) Dask graph 1 chunks in 1 graph layer Data type",9127  1,

Unnamed: 0,Array,Chunk
Bytes,249.57 kiB,249.57 kiB
Shape,"(9127,)","(9127,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,,


**Read observations**

In [47]:
# Observations data reading (already stored as the dataset used is the same as the one used in the pre-season/analytical script)
observations = read_observations(
    area,
    f"{params.data_path}/data/{params.iso}/zarr/{params.calibration_year}/obs/observations.zarr",
)
observations

Unnamed: 0,Array,Chunk
Bytes,106.84 MiB,7.22 kiB
Shape,"(15156, 28, 33)","(1, 28, 33)"
Dask graph,15156 chunks in 1 graph layer,15156 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 106.84 MiB 7.22 kiB Shape (15156, 28, 33) (1, 28, 33) Dask graph 15156 chunks in 1 graph layer Data type float64 numpy.ndarray",33  28  15156,

Unnamed: 0,Array,Chunk
Bytes,106.84 MiB,7.22 kiB
Shape,"(15156, 28, 33)","(1, 28, 33)"
Dask graph,15156 chunks in 1 graph layer,15156 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


**Read pre-computed triggers**

Now that we got all the data we need, let's read the triggers file so we can merge the probabilities with it once we have them.

In [27]:
# Read triggers file
if os.path.exists(f"{params.data_path}/data/{params.iso}/probs/aa_probabilities_triggers_pilots.csv"):
    triggers_df = pd.read_csv(
        f"{params.data_path}/data/{params.iso}/probs/aa_probabilities_triggers_pilots.csv",
    )
else:
    triggers_df = pd.read_csv(
        f"{params.data_path}/data/{params.iso}/triggers/triggers.spi.dryspell.{params.calibration_year}.pilots.csv",
    )
triggers_df

Unnamed: 0,district,index,category,window,issue_ready,issue_set,trigger_ready,trigger_set,vulnerability,prob_ready,prob_set
0,Beitbridge,DRYSPELL JF,Normal,Window 2,7.0,8.0,0.14,0.30,NRT,,
1,Beitbridge,SPI DJF,Normal,Window 2,7.0,8.0,0.38,0.00,NRT,,
2,Beitbridge,SPI FM,Normal,Window 2,9.0,10.0,0.31,0.35,NRT,,
3,Beitbridge,SPI JFM,Normal,Window 2,10.0,11.0,0.00,0.37,NRT,,
4,Beitbridge,SPI ND,Normal,Window 1,6.0,7.0,0.35,0.11,NRT,0.24,
...,...,...,...,...,...,...,...,...,...,...,...
73,Rushinga,SPI DJ,Normal,Window 2,9.0,10.0,0.23,0.37,NRT,,
74,Rushinga,SPI FM,Normal,Window 2,11.0,12.0,0.34,0.27,NRT,,
75,Rushinga,SPI JF,Normal,Window 2,8.0,9.0,0.08,0.34,NRT,,
76,Rushinga,SPI ND,Normal,Window 1,6.0,7.0,0.30,0.26,NRT,0.29,


**Get accumulation periods covered by the forecasts of the defined issue month**

In [15]:
# Get accumulation periods (DJ, JF, FM, DJF, JFM...)
accumulation_periods = get_accumulation_periods(
    forecasts,
    params.start_season,
    params.end_season,
    params.min_index_period,
    params.max_index_period,
)
accumulation_periods

{'JF': (1, 2),
 'FM': (2, 3),
 'MA': (3, 4),
 'ON': (10, 11),
 'ND': (11, 12),
 'DJ': (12, 1),
 'JFM': (1, 2, 3),
 'FMA': (2, 3, 4),
 'OND': (10, 11, 12),
 'NDJ': (11, 12, 1),
 'DJF': (12, 1, 2)}

Here we focus on the pipeline for one indicator (one period) so we select a single element from the above dictionary (November-December using October forecasts).

In [16]:
# Get single use case
period_name, period_months = list(accumulation_periods.items())[4]
period_name, period_months

('ND', (11, 12))

**Run accumulation (sum for SPI)**

In [17]:
# Remove 1980 season to harmonize observations between different indexes 
if int(params.issue) >= params.start_monitoring:
    observations = observations.where(
        observations.time.dt.date >= datetime.date(1981, 10, 1), drop=True
    )

In [23]:
# Accumulation
accumulation_fc = run_accumulation_index(
    forecasts.chunk(dict(time=-1)), params.aggregate, period_months, forecasts=True
)
accumulation_obs = run_accumulation_index(
    observations.chunk(dict(time=-1)), params.aggregate, period_months
)

In [33]:
accumulation_fc

In [25]:
accumulation_obs

Unnamed: 0,Array,Chunk
Bytes,295.97 kiB,7.22 kiB
Shape,"(41, 28, 33)","(1, 28, 33)"
Dask graph,41 chunks in 174 graph layers,41 chunks in 174 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 295.97 kiB 7.22 kiB Shape (41, 28, 33) (1, 28, 33) Dask graph 41 chunks in 174 graph layers Data type float64 numpy.ndarray",33  28  41,

Unnamed: 0,Array,Chunk
Bytes,295.97 kiB,7.22 kiB
Shape,"(41, 28, 33)","(1, 28, 33)"
Dask graph,41 chunks in 174 graph layers,41 chunks in 174 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


**Run standardization (SPI)**

In [19]:
# Remove inconsistent observations
accumulation_obs = accumulation_obs.sel(
    time=slice(datetime.date(1979, 1, 1), datetime.date(params.monitoring_year - 1, 12, 31))
)

In [28]:
# Anomaly
anomaly_fc = run_gamma_standardization(
    accumulation_fc.load(),
    params.hist_anomaly_start,
    params.hist_anomaly_stop,
    members=True,
)
anomaly_obs = run_gamma_standardization(
    accumulation_obs.load(),
    params.hist_anomaly_start,
    params.hist_anomaly_stop,
)

  rfh["time"] = [


In [31]:
anomaly_fc

In [32]:
anomaly_obs

**Run bias correction**

In [36]:
# Bias correction
index_bc = run_bias_correction(
    anomaly_fc,
    anomaly_obs,
    params.end_season,
    params.monitoring_year,
    int(params.issue),
    nearest_neighbours=8,
    enso=True,
)
display(index_bc)

**Run probabilities**

In [48]:
# Change dryspell sign as we compare values to a negative threshold to get probabilities
if params.index == "dryspell":
    anomaly_fc *= -1
    index_bc *= -1
    anomaly_obs *= -1

In [38]:
# Probabilities without Bias Correction
probabilities = compute_probabilities(
    anomaly_fc.where(anomaly_fc.time.dt.year == params.monitoring_year, drop=True),
    levels=params.intensity_thresholds,
).round(2)
display(probabilities)

In [39]:
# Probabilities after Bias Correction
probabilities_bc = compute_probabilities(
    index_bc, levels=params.intensity_thresholds
).round(2)
display(probabilities_bc)

**Admin-2 level aggregation**

In [40]:
# Aggregate by district
probs_district = aggregate_by_district(probabilities, gdf, params)
probs_bc_district = aggregate_by_district(probabilities_bc, gdf, params)

# Build single xarray with merged unbiased/biased probabilities
probs_by_district = merge_un_biased_probs(
    probs_district, probs_bc_district, params, period_name
)
display(probs_by_district)

**Dataframe formatting**

In [41]:
# Merge probabilities with triggers
probs_df, merged_df = merge_probabilities_triggers_dashboard(
    probs_by_district, triggers_df, params, period_name
)

In [42]:
probs_df

Unnamed: 0,district,category,issue,index,prob,aggregation
0,Beitbridge,Moderate,10,SPI ND,0.30,SPI 2
1,Beitbridge,Normal,10,SPI ND,0.41,SPI 2
2,Bikita,Moderate,10,SPI ND,0.31,SPI 2
3,Bikita,Normal,10,SPI ND,0.43,SPI 2
4,Bindura,Moderate,10,SPI ND,0.33,SPI 2
...,...,...,...,...,...,...
125,Zaka,Normal,10,SPI ND,0.42,SPI 2
126,Zvimba,Moderate,10,SPI ND,0.31,SPI 2
127,Zvimba,Normal,10,SPI ND,0.42,SPI 2
128,Zvishavane,Moderate,10,SPI ND,0.32,SPI 2


In [45]:
merged_df

Unnamed: 0,district,index,category,window,issue_ready,issue_set,trigger_ready,trigger_set,vulnerability,prob_ready,prob_set
0,Beitbridge,DRYSPELL JF,Normal,Window 2,7.0,8.0,0.14,0.30,NRT,,
1,Beitbridge,SPI DJF,Normal,Window 2,7.0,8.0,0.38,0.00,NRT,,
2,Beitbridge,SPI FM,Normal,Window 2,9.0,10.0,0.31,0.35,NRT,,
3,Beitbridge,SPI JFM,Normal,Window 2,10.0,11.0,0.00,0.37,NRT,,
4,Beitbridge,SPI ND,Normal,Window 1,6.0,7.0,0.35,0.11,NRT,0.24,
...,...,...,...,...,...,...,...,...,...,...,...
73,Rushinga,SPI DJ,Normal,Window 2,9.0,10.0,0.23,0.37,NRT,,
74,Rushinga,SPI FM,Normal,Window 2,11.0,12.0,0.34,0.27,NRT,,
75,Rushinga,SPI JF,Normal,Window 2,8.0,9.0,0.08,0.34,NRT,,
76,Rushinga,SPI ND,Normal,Window 1,6.0,7.0,0.30,0.26,NRT,0.29,
