# Summarize skill results
***

**Author**: Chus Casado Rodríguez<br>
**Date**: 28-06-2023<br>


**Introduction**:<br>
This notebook creates a table comparing the skill of the diverse notification criteria optimized for different f-scores.

In [1]:
import os
path_root = os.getcwd()
import glob
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
import pickle
import yaml

import warnings
warnings.filterwarnings("ignore")

os.chdir('../py/')
from compute import hits2skill, limit_leadtime
os.chdir(path_root)

## 1 Configuration

In [2]:
with open("../conf/config.yml", "r", encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

### 1.1 Reporting points

In [3]:
# area threshold
area_threshold = cfg.get('reporting_points', {}).get('area', 500)

# reporting points
path_stations = cfg.get('reporting_points', {}).get('output', '../results/reporting_points/')
file_stations = f'{path_stations}reporting_points_over_{area_threshold}km2.parquet'

# catchments
catchments = cfg.get('reporting_points', {}).get('catchments', None)

# minimum performance required from the reporting points
min_kge = cfg.get('reporting_points', {}).get('KGE', None)

### 1.2 Hits

In [4]:
# lead time ranges
leadtime = cfg.get('hits', {}).get('leadtime', None)

# parameters of the rolling window used to compute hits
window = cfg.get('hits', {}).get('window', 1)

# dissagregate the analysis by seasons?
seasonality = cfg.get('hits', {}).get('seasonality', False)

# path that contains the NetCDFs with hit, misses and false alarms pro
path_in = cfg.get('hits', {}).get('output', '../results/hits/')
path_in = f'{path_in}window_{window}/'

### 1.3 Skill

In [5]:
# current operationa criteria
current_criteria = cfg.get('skill', {}).get('current_criteria', None)

# fixed notification criteria
min_leadtime = cfg.get('skill', {}).get('leadtime', 60) 
min_area = cfg.get('skill', {}).get('area', 2000) 

# path where results will be saved
path_out = cfg.get('skill', {}).get('output', f'../results/skill/')
if min_kge is not None:
    path_out = f'{path_out}window_{window}/kge_{min_kge}/'
else:
    path_out = f'{path_out}window_{window}/no_kge/'
    
# coefficient of the fbeta-score
betas = [float(item[1:]) for item in os.listdir(path_out) if os.path.isdir(f'{path_out}{item}') and item.startswith('f')]
betas = [1 if beta == 1.0 else beta for beta in betas]

## 2 Data

### 2.1 Reporting points

I load all the stations that where selected in a previous [notebook](3_0_select_stations.ipynb).

In [6]:
# load table of fixed reporting points
stations = pd.read_parquet(file_stations)
stations[['X', 'Y', 'area']] = stations[['X', 'Y', 'area']].astype(int)

# select stations that belong to the selected catchments
if catchments is not None:
    if isinstance(catchments, list) is False:
        catchments = [catchments]
    stations = stations.loc[stations.catchment.isin(catchments),:]

# remove points with a performance (KGE) lower than the established threshold
if min_kge is not None:
    mask_kge = ~(stations.KGE <= min_kge)
    stations = stations.loc[mask_kge]
else:
    # remove station with erroneous behaviour
    stations = stations.loc[~(stations.n_events_obs >= 6)]

# mask stations with events
stations_w_events = (stations.n_events_obs > 0)

print('All points')
print('----------')
print(f'no. reporting points:\t\t{stations.shape[0]}')
print('no. stations with events:\t{0}'.format(stations_w_events.sum()))
print('no. observed events:\t\t{0}'.format(stations.n_events_obs.sum()))

# select stations according to catchment area
if min_area > area_threshold:
    stations_optimize = stations.loc[stations.area >= min_area].index
else:
    stations_optimize = stations.index

print('\nPoints selected for otimization')
print('-------------------------------')
print(f'no. reporting points:\t\t{len(stations_optimize)}')
print('no. stations with events:\t{0}'.format((stations.loc[stations_optimize, 'n_events_obs'] > 0).sum()))
print('no. observed events:\t\t{0}'.format(stations.loc[stations_optimize, 'n_events_obs'].sum()))

# suffix that will be used when saving plots
suffix = f'{min_area}km2_{len(stations_optimize)}points'

All points
----------
no. reporting points:		1979
no. stations with events:	831
no. observed events:		1264

Points selected for otimization
-------------------------------
no. reporting points:		1239
no. stations with events:	480
no. observed events:		678


### 2.2 Hits, misses and false alarms

In [16]:
# import hits for each station
hits_stn = xr.open_mfdataset(f'{path_in}*.nc', combine='nested', concat_dim='id')

# extract selected stations
hits_stn = hits_stn.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits_stn = limit_leadtime(hits_stn)

# subset of the 'hits' dataset with the stations selected for the optimization
if min_leadtime is None:
    hits_opt = hits_stn.sel(id=stations_optimize).sum('id', skipna=False)
else:
    hits_opt = hits_stn.sel(id=stations_optimize, leadtime=min_leadtime).sum('id', skipna=False)

if 'approach' in hits_stn.dims:
    dim = 'approach'
elif 'model' in hits_stn.dims:
    dim = 'model'

## 3 Analysis

In this section I will compute the skill of the EFAS predictions in different ways. In all the following sections I will work with three metrics: $recall$, $precision$ and the $f_{beta}$ score. The three metrics are based in the contingency table of hits ($TP$ for true positives), false alarms ($FP$ for false positives) and misses ($FN$ for false negatives).

$$recall = \frac{TP}{TP + FN}$$
$$precision = \frac{TP}{TP + FP}$$
$$f_{beta} = \frac{(1 + \beta^2) \cdot TP}{(1 + \beta^2) \cdot TP + \beta^2 \cdot FN + FP}$$


### 3.2 Compare approaches
#### 3.2.1 Import optimize criteria

In [17]:
criteria = {}
for beta in betas:
    metric = f'f{beta}'
    file = glob.glob(f'{path_out}{metric}/*{suffix}.pkl')[0]
    opt_crit = pickle.load(open(file, 'rb'))
    
    # if criteria was fitted for a single lead time value
    if all(list(opt_crit) == hits_stn[dim]):
        if dim == 'approach':
            criteria['current'] = current_criteria
        for key, crit in opt_crit.items():
            if metric in crit:
                del crit[metric]
            criteria[f'{metric}_{key}'] = crit
            
    # if criteria was fitted for several lead time ranges
    else:
        for lt, crit1 in opt_crit.items():
            if dim == 'approach':
                current_criteria['leadtime'] = lt
                criteria[f'current_{lt}'] =  current_criteria.copy()
            for key2, crit2 in crit1.items():
                if metric in crit2:
                    del crit2[metric]
                crit2['leadtime'] = lt
                criteria[f'{metric}_{lt}_{key2}'] = crit2

#### 3.2.2 Compare approaches

In [19]:
# transform criteria into a DataFrame
summary_criteria = pd.concat([pd.DataFrame(crtr, index=[i]) for i, crtr in criteria.items()], axis=0)
if dim == 'approach':
    summary_criteria[dim] = [''.join([x[0].upper() for x in app.split('_')]) for app in summary_criteria[dim]]
summary_criteria['window'] = window
summary_criteria['KGE'] = min_kge
summary_criteria['OF'] = [x.split('_')[0] if x.split('_')[0] != 'current' else '' for x in summary_criteria.index]
if leadtime is None:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'probability', 'persistence']]
else:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'leadtime', 'probability', 'persistence']]

# compute hits, misses and false alarms
summary_hits = pd.DataFrame({i: hits_opt.sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
summary_hits = summary_hits.astype(int)
summary_hits['no_events'] = summary_hits.TP + summary_hits.FN

# compute skill
summary_hits['recall'] = summary_hits.TP / (summary_hits.TP + summary_hits.FN)
summary_hits['precision'] = summary_hits.TP / (summary_hits.TP + summary_hits.FP)
for beta in betas:
    summary_hits[f'f{beta}'] = (1 + beta**2) * summary_hits.TP / ((1 + beta**2) * summary_hits.TP + beta**2 * summary_hits.FN + summary_hits.FP)

# concat criteria, hits and summary data frames
summary = pd.concat((summary_criteria, summary_hits), axis=1)
if leadtime is None:
    summary.sort_values([dim, 'OF'], inplace=True)
else:
    summary.sort_values([dim, 'leadtime', 'OF'], inplace=True)

summary

Unnamed: 0,approach,window,KGE,OF,probability,persistence,TP,FN,FP,no_events,recall,precision,f0.8,f1,f1.2,f1.25
current,1D+1P,1,0.5,,0.3,3/3,260,418,182,678,0.383481,0.588235,0.486802,0.464286,0.44729,0.44376
f0.8_1_deterministic_+_1_probabilistic,1D+1P,1,0.5,f0.8,0.65,1/1,275,403,166,678,0.405605,0.623583,0.515476,0.49151,0.473429,0.469674
f1_1_deterministic_+_1_probabilistic,1D+1P,1,0.5,f1,0.425,1/1,333,345,299,678,0.49115,0.526899,0.512346,0.508397,0.505198,0.504508
f1.2_1_deterministic_+_1_probabilistic,1D+1P,1,0.5,f1.2,0.4,1/1,347,331,342,678,0.511799,0.503628,0.506786,0.507681,0.508419,0.508579
f1.25_1_deterministic_+_1_probabilistic,1D+1P,1,0.5,f1.25,0.4,1/1,347,331,342,678,0.511799,0.503628,0.506786,0.507681,0.508419,0.508579
f0.8_brier_weighted,BW,1,0.5,f0.8,0.65,1/1,257,421,155,678,0.379056,0.623786,0.49825,0.47156,0.451683,0.447583
f1_brier_weighted,BW,1,0.5,f1,0.225,3/3,316,362,333,678,0.466077,0.486903,0.478558,0.476262,0.474393,0.473988
f1.2_brier_weighted,BW,1,0.5,f1.2,0.225,3/3,316,362,333,678,0.466077,0.486903,0.478558,0.476262,0.474393,0.473988
f1.25_brier_weighted,BW,1,0.5,f1.25,0.225,3/3,316,362,333,678,0.466077,0.486903,0.478558,0.476262,0.474393,0.473988
f0.8_model_mean,MM,1,0.5,f0.8,0.375,3/3,259,419,221,678,0.382006,0.539583,0.464767,0.447323,0.433943,0.431141


In [20]:
# export
summary.to_csv(f'{path_out}skill_by_criteria.csv', float_format='%.3f', index=False)

***

In [None]:
lt = 60
OF = 'f0.8'

df = summary.loc[(summary.leadtime == lt) & (summary.OF == OF)].set_index('approach')

df

In [None]:
df[['probability', 'f0.8', 'recall', 'precision']].plot()

***

In [None]:
df = pd.DataFrame(dtype=float)
df['recall'] = np.arange(.1, .91, .1)
df['precision'] = np.arange(.1, .91, .1)[::-1]

for b in [.8, 1, 1.25]:
    df[f'f{b}'] = (1 + b**2) * df.precision * df.recall / (b**2 * df.precision + df.recall)

    
df.drop(['recall', 'precision'], axis=1).plot()
df.plot()

    
df

***

```Python
hits_stn

hits_1D1P = hits_stn.sel(approach='1_deterministic_+_1_probabilistic',
                         probability=0.375,
                         persistence='1/1',
                         leadtime=min_leadtime)

hits_BW = hits_stn.sel(approach='brier_weighted',
                       probability=0.375,
                       persistence='1/1',
                         leadtime=min_leadtime)

hits_diff = hits_BW - hits_1D1P

tp = hits_diff['TP'].to_pandas()

tp[tp == 0]

tp[tp < 0]

stations[tp > 0].value_counts('river')

stations[tp > 0].value_counts('catchment')

stations[tp > 0][stations.catchment == 'Rhine'].sort_values(['catchment', 'subcatchment', 'river'])

```