### Comparison of final outcomes 

This allows to get an idea of the concrete consequences of a system transition from R to Python. These have been calculated for the year 2023, using the results available at the time of the review, i.e. forecasts issued between May and October 2023. 

In [3]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

%cd ../

c:\Users\amine.barkaoui\OneDrive - World Food Programme\Documents\GitHub\anticipatory-action


In [4]:
df_merged = pd.read_csv(f"data/MOZ/outputs/Fbf_Pilot_MockUp/Python_probabilities_season_triggers.csv").dropna()

In [5]:
# Keep SPI by default and DRYSPELL when not available
df_merged = pd.concat([
    wcd.sort_values('index', ascending=False).head(4)
    for (w, c, d), wcd in df_merged.groupby(['district', 'category', 'Window'])
])

### Python outcomes

In [7]:
df_merged.loc[df_merged.trigger == 'trigger2', 'issue'] = df_merged.loc[df_merged.trigger == 'trigger2'].issue.values - 1

In [8]:
for (d, c, w, ind, iss), pair in df_merged.groupby(['district', 'category', 'Window', 'index', 'issue']):
    activations = pair.sort_values('trigger').prob > pair.sort_values('trigger').trigger_value
    if len(activations) == 1:
        state = 'Ready' * int(activations.iloc[0]) + 'NA' * (1 - int(activations.iloc[0]))
    else:
        state = 'Set' if activations.all() else 'NA'
    df_merged.loc[
        (df_merged.district == d) & (df_merged.Window == w) & (df_merged['index'] == ind) & (df_merged.issue == iss), 'state'
    ] = state

In [9]:
outcomes = pd.DataFrame(columns=["W1-Leve", "W1-Moderado", "W1-Severo", "W2-Leve", "W2-Moderado", "W2-Severo"], index = df_merged['district'].sort_values().unique())
for d, r in outcomes.iterrows():
    val = []
    for w in df_merged['Window'].unique():
        for c in df_merged['category'].unique():
                val.append(df_merged[(df_merged['Window']==w) & (df_merged['category']==c) & (df_merged['district']==d)].state.max())   
    outcomes.loc[d] = val

In [10]:
outcomes.style.format(na_rep="missing so far")

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,,,,Ready,missing so far,missing so far
Changara,Set,Set,,,,
Chemba,,,,Ready,Ready,missing so far
Chibuto,Set,Set,missing so far,Set,Ready,Set
Chicualacuala,Set,,,,missing so far,
Chiure,,,,,missing so far,
Guija,,,,Set,,missing so far
Mabalane,Set,,,Set,Ready,Set
Mapai,Set,Set,Set,Ready,Ready,
Marara,,Set,Set,,missing so far,missing so far


### R outcomes

In [12]:
ref = pd.read_csv(f"AA/data/MOZ/outputs/Fbf_Pilot_MockUp/R_probabilities_season_triggers.csv").dropna()
ref.loc[ref.Trigger_nb == 'trigger2', 'Month'] = ref.loc[ref.Trigger_nb == 'trigger2'].Month.values - 1

In [13]:
for (d, c, w, ind, iss), pair in ref.groupby(['District', 'Category', 'Windows', 'Index', 'Month']):
    activations = pair.sort_values('Trigger_nb').Probability > pair.sort_values('Trigger_nb').Trigger
    if len(activations) == 1:
        state = 'Ready' * int(activations.iloc[0]) + 'NA' * (1 - int(activations.iloc[0]))
    else:
        state = 'Set' if activations.all() else 'NA'
    ref.loc[
        (ref.District == d) & (ref.Category == c) & (ref.Windows == w) & (ref['Index'] == ind) & (ref.Month == iss), 'state'
    ] = state

  ref.loc[


In [14]:
outcomes_R = pd.DataFrame(columns=["W1-Leve", "W1-Moderado", "W1-Severo", "W2-Leve", "W2-Moderado", "W2-Severo"], index = ref['District'].sort_values().unique())
for d, r in outcomes_R.iterrows():
    val = []
    for w in ref['Windows'].unique():
        for c in ref['Category'].unique():
                val.append(ref[(ref['Windows']==w) & (ref['Category']==c) & (ref['District']==d)].state.max())   
    outcomes_R.loc[d] = val

In [39]:
outcomes_R.style.format(na_rep="missing so far")

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,,Set,Set,Ready,,Ready
Changara,,,,missing so far,missing so far,missing so far
Chemba,,,,Ready,,Ready
Chibuto,,Set,,Set,,
Chicualacuala,,Set,,,Set,Set
Chiure,,,,,,missing so far
Guija,,,,Ready,Ready,Set
Mabalane,,,,,Set,Ready
Magude,,,,,Set,
Mapai,Set,Set,Set,,Set,


**Summary**

R: 

- 8 ready states (W2)
- 9 activations for W1 (Caia, Chibuto, Chicualacuala, Mapai, Marara, Massingir)
- 7 activations for W2 so far (Chibuto, Chicualacuala, Guija, Mabalane, Mapai, Massingir)

Python:

- 7 ready states (W2)
- 12 activations for W1 (Changara, Chibuto, Chicualacuala, Mabalane, Mapai, Marara, Massingir)
- 7 activations for W2 so far (Chibuto, Guija, Mabalane, Massingir)

### More detailed comparison

In [220]:
def compare_R_Python_outcomes(df_r, df_python):
    comp = pd.DataFrame(columns=["W1-Leve", "W1-Moderado", "W1-Severo", "W2-Leve", "W2-Moderado", "W2-Severo"], index = df_python.index)
    for d, row in comp.iterrows():
        for col in comp.columns:
            if df_python.loc[d, col] == 'Set' and df_r.loc[d, col] == 'Set':
                comp.loc[d, col] = 'both set'
            elif df_python.loc[d, col] == 'NA' and df_r.loc[d, col] == 'NA':
                comp.loc[d, col] = 'both NA'
            elif df_python.loc[d, col] == 'Ready' and df_r.loc[d, col] == 'Ready':
                comp.loc[d, col] = 'both ready'
            elif df_python.loc[d, col] == 'Set' and df_r.loc[d, col] == 'NA':
                comp.loc[d, col] = 'P set not R'
            elif df_python.loc[d, col] == 'NA' and df_r.loc[d, col] == 'Set':
                comp.loc[d, col] = 'R set not P'
            elif type(df_python.loc[d, col]) is not str or type(df_r.loc[d, col]) is not str:
                comp.loc[d, col] = 'missing so far'
            else: 
                comp.loc[d, col] = 'not complete'

    colors = {'P set not R': 'darkgreen', 'both set': 'mediumseagreen', 'both NA': 'mediumseagreen', 'both ready': 'mediumseagreen', 'R set not P': 'coral', 'missing so far': 'burlywood', 'not complete': 'burlywood'}
    return comp.style.map(lambda val: 'background-color: {}'.format(colors.get(val,'')))

In [221]:
compare_R_Python_outcomes(outcomes_R, outcomes)

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,both NA,R set not P,R set not P,both ready,missing so far,missing so far
Changara,P set not R,P set not R,both NA,missing so far,missing so far,missing so far
Chemba,both NA,both NA,both NA,both ready,not complete,missing so far
Chibuto,P set not R,both set,missing so far,both set,not complete,P set not R
Chicualacuala,P set not R,R set not P,both NA,both NA,missing so far,R set not P
Chiure,both NA,both NA,both NA,both NA,missing so far,missing so far
Guija,both NA,both NA,both NA,not complete,not complete,missing so far
Mabalane,P set not R,both NA,both NA,P set not R,not complete,not complete
Mapai,both set,both set,both set,not complete,not complete,both NA
Marara,both NA,P set not R,both set,missing so far,missing so far,missing so far


*Annex: comparison of selected indexes*

In [225]:
df_merged.type.value_counts()

type
SPI         162
DRYSPELL     31
Name: count, dtype: int64

In [227]:
ref.Type.value_counts()

Type
SPI          165
DRY SPELL     41
Name: count, dtype: int64

In [232]:
ref['Index'] = [f"DRYSPELL {i[3:]}" if i[:4] == 'DRY ' else i for i in ref['Index'].values]

In [240]:
def jaccard_set(list1, list2):
    """Define Jaccard Similarity function for two sets"""
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(list1) + len(list2)) - intersection
    try:
        return float(intersection) / union
    except:
        return 0


Similarity between Index / Period combinations

In [252]:
indexes_comp = pd.DataFrame(columns=["W1-Leve", "W1-Moderado", "W1-Severo", "W2-Leve", "W2-Moderado", "W2-Severo"], index = df_merged['district'].sort_values().unique())
for d, r in indexes_comp.iterrows():
    val = []
    for w in df_merged['Window'].unique():
        for c in df_merged['category'].unique():
            python = df_merged[(df_merged['Window']==w) & (df_merged['category']==c) & (df_merged['district']==d)]['index'].unique()
            r = ref[(ref['Windows']==w) & (ref['Category']==c) & (ref['District']==d)]['Index'].unique()
            if len(python) == 0 or len(r) == 0:
                val.append(-1)
            else:
                val.append(round(jaccard_set(python, r), 2))   
    indexes_comp.loc[d] = val

In [260]:
colors2 = {1: 'darkgreen', 0.5: 'seagreen', 0.33: 'steelblue', 0.25: 'slategrey', 0: 'dimgray', -1: 'burlywood'}
indexes_comp.style.map(lambda val: 'background-color: {}'.format(colors2.get(val,'')))

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,0.33,0.33,0.0,0.0,-1.0,-1.0
Changara,0.33,0.0,0.5,-1.0,-1.0,-1.0
Chemba,1.0,0.33,1.0,0.5,1.0,-1.0
Chibuto,1.0,0.33,-1.0,0.0,0.0,0.5
Chicualacuala,1.0,0.5,0.5,0.5,-1.0,1.0
Chiure,0.33,0.33,0.33,0.0,-1.0,1.0
Guija,0.33,0.33,0.0,0.0,0.0,-1.0
Mabalane,0.5,0.5,0.5,0.25,0.25,0.0
Mapai,1.0,0.5,1.0,0.33,0.33,0.5
Marara,0.33,0.33,0.33,-1.0,-1.0,-1.0


In [243]:
indexes

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,"[SPI NDJ, SPI DJF]","[SPI JF, SPI DJF]","[SPI NDJ, SPI DJ]",[SPI MA],[],[]
Changara,"[SPI NDJ, SPI DJ]","[SPI JF, SPI DJ]","[SPI NDJ, SPI DJF]",[DRYSPELL MA],[DRYSPELL JFM],"[DRYSPELL JFM, DRYSPELL FM]"
Chemba,"[SPI JF, SPI DJ]","[SPI JF, SPI DJF]","[SPI NDJ, SPI DJF]","[SPI MA, DRYSPELL JFM]",[SPI MA],[]
Chibuto,"[SPI OND, SPI NDJ]","[SPI ON, SPI DJ]",[],"[SPI JFM, SPI DJF]","[SPI JFM, DRYSPELL JFM]","[SPI JF, SPI DJF]"
Chicualacuala,"[SPI NDJ, SPI DJ]","[SPI ND, DRYSPELL NDJ]","[SPI DJ, DRYSPELL ON]",[SPI DJF],[],[SPI DJF]
Chiure,"[SPI JF, SPI DJF]","[SPI JF, SPI DJF]","[SPI JF, SPI DJ]","[SPI FMA, DRYSPELL MA, DRYSPELL FMA]",[],[SPI FMA]
Guija,"[SPI OND, SPI NDJ]","[SPI OND, SPI NDJ]","[SPI ON, DRYSPELL NDJ]","[SPI JF, SPI DJF]",[DRYSPELL JF],[]
Mabalane,"[SPI OND, SPI DJ]","[SPI OND, SPI NDJ]","[SPI ON, SPI NDJ]","[SPI JFM, SPI DJF, DRYSPELL DJF]","[SPI JF, DRYSPELL MA, DRYSPELL JF]","[SPI DJF, DRYSPELL MA]"
Mapai,"[SPI NDJ, SPI DJ]","[SPI OND, SPI DJ]","[SPI OND, SPI ND]","[SPI JFM, SPI DJF, DRYSPELL JF]","[SPI DJF, DRYSPELL JF]",[SPI DJF]
Marara,"[SPI NDJ, SPI DJF]","[SPI NDJ, SPI DJ]","[SPI NDJ, SPI DJ]",[DRYSPELL FMA],[],[]


In [123]:
indexes_R

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,[DRY DJ],"[DRY DJ, SPI DJF]","[DRY DJ, SPI DJF]",[SPI MA],[DRY FM],[SPI MA]
Changara,[SPI DJF],"[DRY DJ, SPI NDJ]",[SPI DJF],[],[],[]
Chemba,"[SPI DJF, SPI NDJ]","[SPI DJ, SPI JF]","[DRY DJ, SPI JF]",[SPI MA],[SPI MA],[SPI MA]
Chibuto,[SPI ON],"[SPI NDJ, SPI OND]","[SPI DJ, SPI ND]",[SPI DJF],[DRY DJ],[DRY DJ]
Chicualacuala,[SPI DJ],"[SPI DJ, SPI NDJ]",[SPI ND],[SPI DJF],"[SPI DJF, SPI JF]",[SPI DJF]
Chiure,"[DRY DJ, SPI DJ]","[SPI DJ, SPI DJF]","[DRY DJ, SPI DJF]",[SPI FMA],[DRY MA],[]
Guija,[SPI OND],"[SPI ON, SPI OND]","[SPI NDJ, SPI ON]","[DRY FM, SPI JF]","[DRY MA, SPI MA]",[SPI JFM]
Mabalane,[SPI NDJ],[SPI OND],[SPI OND],[SPI JF],"[DRY DJ, SPI DJF]","[DRY DJ, SPI JF]"
Magude,"[SPI DJ, SPI ON]",[SPI ON],[SPI ON],"[SPI DJF, SPI JF]",[SPI JFM],[DRY DJ]
Mapai,"[SPI ND, SPI OND]","[SPI DJ, SPI NDJ]",[SPI DJ],"[DRY DJ, SPI DJF]",[SPI DJF],"[DRY DJ, SPI DJF]"


Similarity between Index / Period / Issue combinations

In [267]:
indexes_comp2 = pd.DataFrame(columns=["W1-Leve", "W1-Moderado", "W1-Severo", "W2-Leve", "W2-Moderado", "W2-Severo"], index = df_merged['district'].sort_values().unique())
for d, r in indexes_comp2.iterrows():
    val = []
    for w in df_merged['Window'].unique():
        for c in df_merged['category'].unique():
            python2 = df_merged[(df_merged['Window']==w) & (df_merged['category']==c) & (df_merged['district']==d)].sort_values('index')
            r2 = ref[(ref['Windows']==w) & (ref['Category']==c) & (ref['District']==d)].sort_values('Index')
            if len(python2) == 0 or len(r) == 0:
                val.append(-1)
            else:
                val.append(round(jaccard_set(list(zip(python2['index'].unique(), python2['issue'].unique())), list(zip(r2['Index'].unique(), r2['Month'].unique()))), 2))   
    indexes_comp2.loc[d] = val

In [269]:
indexes_comp2.style.map(lambda val: 'background-color: {}'.format(colors2.get(val,'')))

Unnamed: 0,W1-Leve,W1-Moderado,W1-Severo,W2-Leve,W2-Moderado,W2-Severo
Caia,0.33,0.0,0.0,0.0,-1.0,-1.0
Changara,0.0,0.0,0.0,0.0,0.0,0.0
Chemba,0.0,0.0,0.0,0.0,1.0,-1.0
Chibuto,0.0,0.0,-1.0,0.0,0.0,1.0
Chicualacuala,0.0,0.5,0.5,0.0,-1.0,0.0
Chiure,0.5,0.0,0.5,0.0,-1.0,1.0
Guija,0.0,0.0,0.0,0.0,0.0,-1.0
Mabalane,0.5,0.5,0.5,0.0,0.0,0.0
Mapai,1.0,0.5,1.0,0.0,0.33,0.5
Marara,0.5,0.33,0.33,0.0,-1.0,-1.0
