This notebook is not used operationally or for any validation, its only purpose is to have a clear understanding of the core functions of the AA workflow. The outputs and dimensions of each main step can thus be identified here.

**Import required libraries and functions**

In [1]:
%cd ..

/home/u/amine.barkaoui/anticipatory-action


In [3]:
import os
import datetime
import pandas as pd

from config.params import Params

from AA.helper_fns import (
    read_forecasts,
    read_observations,
    compute_district_average,
    merge_un_biased_probs,
    merge_probabilities_triggers_dashboard,
)

from hip.analysis.analyses.drought import (
    get_accumulation_periods,
    run_accumulation_index,
    run_gamma_standardization,
    run_bias_correction,
    compute_probabilities,
)

from hip.analysis.aoi.analysis_area import AnalysisArea



**Define parameters**

The `config/{country}_config.yaml` file gathers all the parameters used in the operational script and that can be customized. For example, the *monitoring_year*, the list of districts or the intensity levels can be defined in that file.

In [4]:
params = Params(iso="MOZ", issue=5, index="SPI")

**Read shapefile**

In [5]:
# Define aoi to read datasets using hip-analysis
area = AnalysisArea.from_admin_boundaries(
    iso3=params.iso.upper(),
    admin_level=2,
    resolution=0.25,
    datetime_range=f"1981-01-01/{params.monitoring_year + 1}-06-30",
)

# Read the shapefile
gdf = area.get_dataset([area.BASE_AREA_DATASET])
gdf

Unnamed: 0_level_0,geometry,Code,Name,adm1_Code,adm0_Code
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ancuabe,"POLYGON ((40.02750 -12.83890, 40.02570 -12.817...",1010505,Ancuabe,900948,170
Balama,"POLYGON ((38.70730 -13.48280, 38.67420 -13.461...",1010508,Balama,900948,170
Chiure,"POLYGON ((39.32400 -13.89250, 39.33350 -13.893...",1010525,Chiure,900948,170
Cidade_De_Pemba,"POLYGON ((40.47160 -13.10860, 40.49990 -13.085...",1010536,Cidade_De_Pemba,900948,170
Ibo,"MULTIPOLYGON (((40.61640 -12.42130, 40.59600 -...",1010554,Ibo,900948,170
...,...,...,...,...,...
Mulevala,"POLYGON ((37.98600 -16.48200, 37.99310 -16.474...",1010632,Mulevala,900958,170
Namacurra,"POLYGON ((37.35910 -17.64970, 37.36290 -17.636...",1010639,Namacurra,900958,170
Namarroi,"POLYGON ((37.12030 -15.92340, 37.10890 -15.914...",1010640,Namarroi,900958,170
Nicoadala,"POLYGON ((36.97460 -17.63430, 36.97180 -17.620...",1010645,Nicoadala,900958,170


**Read forecasts**

In [6]:
# When update is set to False, the downscaled dataset is read from a local folder or a s3 bucket. Otherwise, it is directly read from HDC.
forecasts = read_forecasts(
    area,
    params.issue,
    f"{params.data_path}/data/{params.iso}/zarr/2022/{str(params.issue).zfill(2)}/forecasts.zarr",
    update=False,  # True,
)
forecasts

[########################################] | 100% Completed | 27.20 s


Unnamed: 0,Array,Chunk
Bytes,5.27 GiB,105.89 MiB
Shape,"(9416, 51, 67, 44)","(9416, 1, 67, 44)"
Dask graph,51 chunks in 1 graph layer,51 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 5.27 GiB 105.89 MiB Shape (9416, 51, 67, 44) (9416, 1, 67, 44) Dask graph 51 chunks in 1 graph layer Data type float32 numpy.ndarray",9416  1  44  67  51,

Unnamed: 0,Array,Chunk
Bytes,5.27 GiB,105.89 MiB
Shape,"(9416, 51, 67, 44)","(9416, 1, 67, 44)"
Dask graph,51 chunks in 1 graph layer,51 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,257.47 kiB,257.47 kiB
Shape,"(9416,)","(9416,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,,
"Array Chunk Bytes 257.47 kiB 257.47 kiB Shape (9416,) (9416,) Dask graph 1 chunks in 1 graph layer Data type",9416  1,

Unnamed: 0,Array,Chunk
Bytes,257.47 kiB,257.47 kiB
Shape,"(9416,)","(9416,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,,


**Read observations**

In [7]:
# Observations data reading (already stored as the dataset used is the same as the one used in the pre-season/analytical script)
observations = read_observations(
    area,
    f"{params.data_path}/data/{params.iso}/zarr/{params.calibration_year}/obs/observations.zarr",
)
observations

[########################################] | 100% Completed | 487.22 s


Unnamed: 0,Array,Chunk
Bytes,340.88 MiB,23.03 kiB
Shape,"(15156, 67, 44)","(1, 67, 44)"
Dask graph,15156 chunks in 1 graph layer,15156 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 340.88 MiB 23.03 kiB Shape (15156, 67, 44) (1, 67, 44) Dask graph 15156 chunks in 1 graph layer Data type float64 numpy.ndarray",44  67  15156,

Unnamed: 0,Array,Chunk
Bytes,340.88 MiB,23.03 kiB
Shape,"(15156, 67, 44)","(1, 67, 44)"
Dask graph,15156 chunks in 1 graph layer,15156 chunks in 1 graph layer
Data type,float64 numpy.ndarray,float64 numpy.ndarray


**Read pre-computed triggers**

Now that we got all the data we need, let's read the triggers file so we can merge the probabilities with it once we have them.

In [8]:
# Read triggers file
if os.path.exists(
    f"{params.data_path}/data/{params.iso}/probs/aa_probabilities_triggers_pilots.csv"
):
    triggers_df = pd.read_csv(
        f"{params.data_path}/data/{params.iso}/probs/aa_probabilities_triggers_pilots.csv",
    )
else:
    triggers_df = pd.read_csv(
        f"{params.data_path}/data/{params.iso}/triggers/triggers.spi.dryspell.{params.calibration_year}.pilots.csv",
    )
triggers_df

Unnamed: 0,district,index,category,window,issue_ready,issue_set,trigger_ready,trigger_set,vulnerability,prob_ready,prob_set,season,date_ready,date_set
0,Cahora_Bassa,DRYSPELL AM,Severe,Window 2,11.0,12.0,0.15,0.31,NRT,0.13,0.25,2024-25,2024-11-01,2024-12-01
1,Cahora_Bassa,DRYSPELL FM,Mild,Window 2,9.0,10.0,0.02,0.28,NRT,0.18,0.18,2024-25,2024-09-01,2024-10-01
2,Cahora_Bassa,DRYSPELL FM,Severe,Window 2,9.0,10.0,0.10,0.30,NRT,0.05,0.08,2024-25,2024-09-01,2024-10-01
3,Cahora_Bassa,DRYSPELL JF,Mild,Window 1,8.0,9.0,0.05,0.06,NRT,0.22,0.30,2024-25,2024-08-01,2024-09-01
4,Cahora_Bassa,DRYSPELL JF,Moderate,Window 1,8.0,9.0,0.14,0.05,NRT,0.18,0.28,2024-25,2024-08-01,2024-09-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,Moamba,SPI NDJ,Moderate,Window 1,7.0,8.0,0.00,0.31,NRT,0.09,0.31,2024-25,2024-07-01,2024-08-01
418,Moamba,SPI NDJ,Severe,Window 1,7.0,8.0,0.19,0.18,NRT,0.09,0.22,2024-25,2024-07-01,2024-08-01
419,Moamba,SPI ON,Mild,Window 1,5.0,6.0,0.27,0.00,NRT,0.28,0.40,2024-25,2024-05-01,2024-06-01
420,Moamba,SPI ON,Moderate,Window 1,5.0,6.0,0.15,0.32,NRT,0.25,0.07,2024-25,2024-05-01,2024-06-01


**Get accumulation periods covered by the forecasts of the defined issue month**

In [9]:
# Get accumulation periods (DJ, JF, FM, DJF, JFM...)
accumulation_periods = get_accumulation_periods(
    forecasts,
    params.start_season,
    params.end_season,
    params.min_index_period,
    params.max_index_period,
)
accumulation_periods

{'MJ': (5, 6), 'ON': (10, 11)}

Here we focus on the pipeline for one indicator (one period) so we select a single element from the above dictionary (November-December using October forecasts).

In [10]:
# Get single use case
period_name, period_months = list(accumulation_periods.items())[1]  # [4]
period_name, period_months

('ON', (10, 11))

**Run accumulation (sum for SPI)**

In [11]:
# Remove 1980 season to harmonize datasets between different indexes
forecasts = forecasts.where(
    forecasts.time.dt.date >= datetime.date(1981, params.start_season, 1), drop=True
)
observations = observations.where(
    observations.time.dt.date >= datetime.date(1981, params.start_season, 1), drop=True
)

In [12]:
# Accumulation
accumulation_fc = run_accumulation_index(
    forecasts.chunk(dict(time=-1)),
    params.aggregate,
    period_months,
    (params.start_season, params.end_season),
    forecasts=True,
)
accumulation_obs = run_accumulation_index(
    observations.chunk(dict(time=-1)),
    params.aggregate,
    period_months,
    (params.start_season, params.end_season),
)

**Run standardization (SPI)**

In [13]:
# Anomaly
anomaly_fc = run_gamma_standardization(
    accumulation_fc.load(),
    params.hist_anomaly_start,
    params.hist_anomaly_stop,
    members=True,
)
anomaly_obs = run_gamma_standardization(
    accumulation_obs.load(),
    params.hist_anomaly_start,
    params.hist_anomaly_stop,
)

  rfh["time"] = [
  dask_gufunc_kwargs={"meta": self._obj.data.astype(dtype)},


**Run bias correction**

In [14]:
# Bias correction
index_bc = run_bias_correction(
    anomaly_fc,
    anomaly_obs,
    start_monitoring=params.start_monitoring,
    year=params.monitoring_year,
    issue=int(params.issue),
    nearest_neighbours=8,
    enso=True,
)

**Run probabilities**

In [15]:
# Change dryspell sign as we compare values to a negative threshold to get probabilities
if params.index == "dryspell":
    anomaly_fc *= -1
    index_bc *= -1
    anomaly_obs *= -1

In [16]:
# Probabilities without Bias Correction
probabilities = compute_probabilities(
    anomaly_fc.where(anomaly_fc.time.dt.year == params.monitoring_year, drop=True),
    levels=params.intensity_thresholds,
).round(2)
display(probabilities)

In [17]:
# Probabilities after Bias Correction
probabilities_bc = compute_probabilities(
    index_bc, levels=params.intensity_thresholds
).round(2)
display(probabilities_bc)

**Admin-2 level aggregation**

In [18]:
probs_district = compute_district_average(probabilities, area)

In [19]:
probs_bc_district = compute_district_average(probabilities_bc, area)

In [20]:
# Build single xarray with merged unbiased/biased probabilities
probs_by_district = merge_un_biased_probs(
    probs_district.squeeze("time"),
    probs_bc_district.squeeze("time"),
    params,
    period_name,
)

**Dataframe formatting**

In [21]:
# Merge probabilities with triggers
probs_df, merged_df = merge_probabilities_triggers_dashboard(
    probs_by_district.drop_vars("time"), triggers_df, params, period_name
)

In [22]:
probs_df

Unnamed: 0,district,category,issue,index,prob,aggregation
0,Alto_Molocue,Mild,5,SPI ON,0.22,SPI 2
1,Alto_Molocue,Moderate,5,SPI ON,0.20,SPI 2
2,Alto_Molocue,Severe,5,SPI ON,0.16,SPI 2
3,Ancuabe,Mild,5,SPI ON,0.32,SPI 2
4,Ancuabe,Moderate,5,SPI ON,0.27,SPI 2
...,...,...,...,...,...,...
433,Zavala,Moderate,5,SPI ON,0.03,SPI 2
434,Zavala,Severe,5,SPI ON,0.00,SPI 2
435,Zumbu,Mild,5,SPI ON,0.29,SPI 2
436,Zumbu,Moderate,5,SPI ON,0.23,SPI 2


In [23]:
merged_df

Unnamed: 0,district,index,category,window,issue_ready,issue_set,trigger_ready,trigger_set,vulnerability,prob_ready,prob_set,season,date_ready,date_set
0,Cahora_Bassa,DRYSPELL AM,Severe,Window 2,11.0,12.0,0.15,0.31,NRT,0.13,0.25,2024-25,2024-11-01,2024-12-01
1,Cahora_Bassa,DRYSPELL FM,Mild,Window 2,9.0,10.0,0.02,0.28,NRT,0.18,0.18,2024-25,2024-09-01,2024-10-01
2,Cahora_Bassa,DRYSPELL FM,Severe,Window 2,9.0,10.0,0.10,0.30,NRT,0.05,0.08,2024-25,2024-09-01,2024-10-01
3,Cahora_Bassa,DRYSPELL JF,Mild,Window 1,8.0,9.0,0.05,0.06,NRT,0.22,0.30,2024-25,2024-08-01,2024-09-01
4,Cahora_Bassa,DRYSPELL JF,Moderate,Window 1,8.0,9.0,0.14,0.05,NRT,0.18,0.28,2024-25,2024-08-01,2024-09-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417,Moamba,SPI NDJ,Moderate,Window 1,7.0,8.0,0.00,0.31,NRT,0.09,0.31,2024-25,2024-07-01,2024-08-01
418,Moamba,SPI NDJ,Severe,Window 1,7.0,8.0,0.19,0.18,NRT,0.09,0.22,2024-25,2024-07-01,2024-08-01
419,Moamba,SPI ON,Mild,Window 1,5.0,6.0,0.27,0.00,NRT,0.28,0.40,2024-25,2024-05-01,2024-06-01
420,Moamba,SPI ON,Moderate,Window 1,5.0,6.0,0.15,0.32,NRT,0.25,0.07,2024-25,2024-05-01,2024-06-01
