# Summarize results
***

**Author**: Chus Casado Rodríguez<br>
**Date**: 20-02-2024<br>

**Introduction**:<br>
This notebook creates a table comparing the optimised criteria and the associated skill for every `leadtime` and `model` defined in the configuration file. If several f-scores were tested, it will load the results of all f-scores and compare them too.

The inputs are the points selected in [notebook 5](5_select_points.ipynb), the confusion matrices calculated in notebook [notebook_4](4_confusion_matrix.ipynb), and the notification criteria optimised in [notebook_6](6_skill.ipynb).

The output of this notebook is a comparative table exported as a CSV file.

In [1]:
import os
path_root = os.getcwd()
import glob
import numpy as np
import pandas as pd
import xarray as xr
# from datetime import datetime, timedelta
from tqdm import tqdm_notebook
import pickle
import yaml
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

os.chdir('../py/')
from config import Config
from compute import hits2skill, limit_leadtime, compute_skill
os.chdir(path_root)

## 1 Configuration

In [2]:
config_path = Path('../conf')
config = Config.load_from_yaml(config_path / 'config_COMB_leadtime_ranges.yml')

### 1.1 Reporting points

In [3]:
# area threshold
AREA_THRESHOLD = config.reporting_points['area']

# reporting points
PATH_STATIONS = config.reporting_points['output']
FILE_STATIONS = f'reporting_points_selected_{AREA_THRESHOLD}km2.parquet'

# catchments
CATCHMENTS = config.reporting_points['catchments']
if CATCHMENTS is None:
    CATCHMENTS = [f for f in os.listdir(PATH_STATIONS) if (PATH_STATIONS / f).is_dir()]
else:
    if isinstance(CATCHMENTS, str):
        CATCHMENTS = [CATCHMENTS]

# minimum performance required from the reporting points
MIN_KGE = config.reporting_points['KGE']

### 1.2 Hits

In [4]:
# type of experiment: individual models (NWP) or combined (COMB)
EXP = config.confusion_matrix['experiment']

# return period
RP = config.discharge['return_period']['threshold']

# lead time ranges (it may be removed)
LEADTIME = config.confusion_matrix['leadtime']

# parameters of the rolling window used to compute hits
WINDOW = config.confusion_matrix['window']

# path that contains the NetCDFs with hit, misses and false alarms pro
PATH_IN = config.confusion_matrix['output']
if LEADTIME is None:
    time_agg = 'all_leadtimes'
elif len(LEADTIME) == 10:
    time_agg = 'daily'
elif len(LEADTIME) == 20:
    time_agg = '12h'
else:
    time_agg = '_'.join([str(lt + 12) for lt in LEADTIME])
PATH_IN = PATH_IN / f'{RP}/{EXP}/{time_agg}/window_{WINDOW}'

### 1.3 Skill

In [5]:
# current operationa criteria
CURRENT_CRITERIA = config.skill['current_criteria']

# fixed notification criteria
MIN_LEADTIME = config.skill['leadtime']
MIN_AREA = config.skill['area']

# path where results were saved
PATH_OUT_ROOT = config.skill['output']
if MIN_KGE is not None:
    kge = f'kge_{MIN_KGE}'
else:
    kge = 'no_kge'
PATH_OUT_ROOT = PATH_OUT_ROOT / f'{RP}/{EXP}/{time_agg}/window_{WINDOW}/{kge}'

# coefficient of the fbeta-score
betas = [float(f[1:]) for f in os.listdir(PATH_OUT_ROOT) if (PATH_OUT_ROOT / f).is_dir()]
betas = [1 if beta == 1.0 else beta for beta in betas]

## 2 Data

### 2.1 Reporting points

I load all the stations that where selected in a previous [notebook](3_0_select_stations.ipynb).

In [6]:
# load selected reporting points
if 'stations' in locals():
    del stations 
for catchment in tqdm_notebook(CATCHMENTS):
    file = PATH_STATIONS / catchment / FILE_STATIONS
    if file.is_file():
        df = pd.read_parquet(file)
    else:
        continue
    if 'stations' in locals():
        stations = pd.concat((stations, df), axis=0)
    else:
        stations = df.copy()

# mask stations with events
col_events = f'obs_events_{RP}'
stations_w_events = (stations[col_events] > 0)

print('All points')
print('----------')
print(f'no. reporting points:\t\t{stations.shape[0]}')
print('no. stations with events:\t{0}'.format(stations_w_events.sum()))
print('no. observed events:\t\t{0}'.format(stations[col_events].sum()))

# select stations according to catchment area
if MIN_AREA > AREA_THRESHOLD:
    stations_optimize = stations.loc[stations.area >= MIN_AREA].index
else:
    stations_optimize = stations.index

print('\nPoints selected for optimization')
print('-------------------------------')
print(f'no. reporting points:\t\t{len(stations_optimize)}')
print('no. stations with events:\t{0}'.format((stations.loc[stations_optimize, col_events] > 0).sum()))
print('no. observed events:\t\t{0}'.format(stations.loc[stations_optimize, col_events].sum()))

# suffix that will be used when saving plots
suffix = f'{MIN_AREA}km2_{len(stations_optimize)}points'

  0%|          | 0/327 [00:00<?, ?it/s]

All points
----------
no. reporting points:		1979
no. stations with events:	871
no. observed events:		1406

Points selected for optimization
-------------------------------
no. reporting points:		1239
no. stations with events:	489
no. observed events:		719


### 2.2 Hits, misses and false alarms

In [7]:
# import hits for each station
hits_stn = xr.open_mfdataset(f'{PATH_IN}/*.nc', combine='nested', concat_dim='id')

# extract selected stations
stations = stations.loc[set(stations.index).intersection(hits_stn.id.data)]
hits_stn = hits_stn.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits_stn = limit_leadtime(hits_stn, exp=EXP)

# subset of the 'hits' dataset with the stations selected for the optimization
stations_optimize = list(set(stations_optimize).intersection(hits_stn.id.data))
hits_opt = hits_stn.sel(id=stations_optimize).sum('id', skipna=False)

### 2.3 Optimised criteria

In [9]:
criteria = {}
for beta in betas:
    metric = f'f{beta}'
    file = glob.glob(f'{PATH_OUT_ROOT}/{metric}/*{suffix}.pkl')[0]
    opt_crit = pickle.load(open(file, 'rb'))
    
    # if criteria was fitted for a single lead time value
    if LEADTIME is not None:
        for lt, crit in opt_crit.items():
            if metric in crit:
                del crit[metric]
            criteria[f'{metric}_{lt}'] = crit
            if EXP == 'COMB':
                criteria[f'{metric}_{lt}']['current'] = CURRENT_CRITERIA
    
    # if criteria was fitted for several lead time ranges
    else:
        for lt, crit1 in opt_crit.items():
            if EXP == 'COMB':
                current_criteria['leadtime'] = lt
                criteria[f'current_{lt}'] =  current_criteria.copy()
            for key2, crit2 in crit1.items():
                if metric in crit2:
                    del crit2[metric]
                crit2['leadtime'] = lt
                criteria[f'{metric}_{lt}_{key2}'] = crit2

## 3 Analysis

In this section I will compute the skill of the EFAS predictions in different ways. In all the following sections I will work with three metrics: $recall$, $precision$ and the $f_{beta}$ score. The three metrics are based in the contingency table of hits ($TP$ for true positives), false alarms ($FP$ for false positives) and misses ($FN$ for false negatives).

$$recall = \frac{TP}{TP + FN}$$
$$precision = \frac{TP}{TP + FP}$$
$$f_{beta} = \frac{(1 + \beta^2) \cdot TP}{(1 + \beta^2) \cdot TP + \beta^2 \cdot FN + FP}$$


### 3.1 Criteria

In [62]:
# transform criteria into a DataFrame
summary = pd.DataFrame(dtype=float)
for i, crtr in criteria.items():
    df = pd.DataFrame(crtr).T
    df.index = [i] * df.shape[0]
    summary = pd.concat((summary, df), axis=0)
summary['window'] = WINDOW
summary['KGE'] = MIN_KGE
# summary['OF'] = [x.split('_')[0] if x.split('_')[0] != 'current' else '' for x in summary.index]
summary[['OF', 'leadtime']] = [x.split('_') if x.split('_')[0] != 'current' else '' for x in summary.index]
summary.leadtime = summary.leadtime.astype(int)
if LEADTIME is None:
    summary = summary[['model', 'window', 'KGE', 'OF', 'probability', 'persistence']]
else:
    summary = summary[['model', 'window', 'KGE', 'OF', 'leadtime', 'probability', 'persistence']]
summary.reset_index(drop=True, inplace=True)

### 3.2 Hits, misses and false alarms

In [63]:
# compute hits, misses and false alarms
cols = ['TP', 'FN', 'FP']
summary[cols] = np.nan
for i in range(summary.shape[0]):
    crtr = summary.loc[i, ['model', 'leadtime', 'probability', 'persistence']].to_dict()
    summary.loc[i, cols] = hits_opt.sel(crtr).to_pandas().astype(int)
summary['no_events'] = summary.TP + summary.FN

### 3.3 Skill

In [64]:
# compute skill
for beta in betas:
    recall, precision, fscore = compute_skill(summary.TP,
                                              summary.FN,
                                              summary.FP,
                                              beta=beta)
    summary[['recall', 'precision', f'f{beta}']] = pd.concat((recall, precision, fscore), axis=1)

### 3.4 Export results

In [65]:
if EXP == 'COMB':
    # simplify model names
    summary.model = [''.join([x[0].upper() for x in model.split('_')]) for model in summary.model]
    # identify current approach
    mask1 = summary.model == ''.join([x[0].upper() for x in CURRENT_CRITERIA['model'].split('_')])
    mask2 = summary.probability == CURRENT_CRITERIA['probability']
    mask3 = summary.persistence == CURRENT_CRITERIA['persistence']
    summary.loc[mask1 & mask2 & mask3, 'model'] = 'current'

# reorganize columns
if LEADTIME is None:
    summary.sort_values(['model', 'OF'], inplace=True)
else:
    summary.sort_values(['leadtime', 'model', 'OF'], inplace=True)

# export
summary.to_csv(PATH_OUT_ROOT / 'optimal_skill_and_criteria_by_fscore.csv', float_format='%.3f', index=False)