# Summarize skill results
***

**Author**: Chus Casado Rodríguez<br>
**Date**: 12-06-2023<br>


**Introduction**:<br>
This notebook creates a table comparing the skill of the diverse notification criteria optimized for different f-scores.

In [1]:
import os
path_root = os.getcwd()
import glob
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
import pickle
import yaml

import warnings
warnings.filterwarnings("ignore")

os.chdir('../py/')
from compute import hits2skill
os.chdir(path_root)

## 1 Configuration

In [2]:
with open("../conf/config.yml", "r", encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

### 1.1 Reporting points

In [3]:
# area threshold
area_threshold = cfg.get('reporting_points', {}).get('area', 500)

# reporting points
path_stations = cfg.get('reporting_points', {}).get('output', '../results/reporting_points/')
file_stations = f'{path_stations}reporting_points_over_{area_threshold}km2.parquet'

# catchments
catchments = cfg.get('reporting_points', {}).get('catchments', None)

### 1.2 Hits

In [4]:
# dissagregate the analysis by seasons?
seasonality = cfg.get('hits', {}).get('seasonality', False)

# path that contains the NetCDFs with hit, misses and false alarms pro
path_in = cfg.get('hits', {}).get('output', f'../results/hits/')

### 1.3 Skill

In [6]:
# current operationa criteria
current_criteria = cfg.get('skill', {}).get('current_criteria', None)

# fixed notification criteria
min_leadtime = cfg.get('skill', {}).get('leadtime', 60) 
min_area = cfg.get('skill', {}).get('area', 2000) 

# coefficient of the fbeta-score
betas = [0.8, 1, 1.2]

# optimization parameters
kfold = cfg.get('skill', {}).get('optimization', {}).get('kfold', None)
train_size = cfg.get('skill', {}).get('optimization', {}).get('train_size', .8)
tolerance = cfg.get('skill', {}).get('optimization', {}).get('tolerance', 1e-2)
min_spread = cfg.get('skill', {}).get('optimization', {}).get('minimize_spread', True)

# path where results will be saved
path_out = cfg.get('skill', {}).get('output', f'../results/skill/')

## 2 Data

### 2.1 Reporting points

I load all the stations that where selected in a previous [notebook](3_0_select_stations.ipynb).

In [7]:
# load table of fixed reporting points
stations = pd.read_parquet(file_stations)
stations[['X', 'Y', 'area']] = stations[['X', 'Y', 'area']].astype(int)

# select stations that belong to the selected catchments
if catchments is not None:
    if isinstance(catchments, list) is False:
        catchments = [catchments]
    stations = stations.loc[stations.catchment.isin(catchments),:]

# remove station with erroneous behaviour
stations = stations.loc[~(stations.n_events_obs >= 6)]

# mask stations with events
stations_w_events = (stations.n_events_obs > 0)

print('All points')
print('----------')
print(f'no. reporting points:\t\t{stations.shape[0]}')
print('no. stations with events:\t{0}'.format(stations_w_events.sum()))
print('no. observed events:\t\t{0}'.format(stations.n_events_obs.sum()))

# select stations according to catchment area
if min_area > area_threshold:
    stations_optimize = stations.loc[stations.area >= min_area].index
else:
    stations_optimize = stations.index

print('\nPoints selected for otimization')
print('-------------------------------')
print(f'no. reporting points:\t\t{len(stations_optimize)}')
print('no. stations with events:\t{0}'.format((stations.loc[stations_optimize, 'n_events_obs'] > 0).sum()))
print('no. observed events:\t\t{0}'.format(stations.loc[stations_optimize, 'n_events_obs'].sum()))

# suffix that will be used when saving plots
suffix = f'{min_area}km2_{len(stations_optimize)}points'

All points
----------
no. reporting points:		2357
no. stations with events:	980
no. observed events:		1469

Points selected for otimization
-------------------------------
no. reporting points:		1424
no. stations with events:	538
no. observed events:		748


### 2.2 Hits, misses and false alarms

In [8]:
# import hits for each station
hits_stn = xr.open_mfdataset(f'{path_in}*.nc', combine='nested', concat_dim='id')
# reorder persistences
hits_stn = hits_stn.sel(persistence=['1/1', '2/4', '2/3', '2/2', '3/4', '3/3'])
# extract selected stations
hits_stn = hits_stn.sel(id=stations.index.to_list()).compute()

# convert to NaN values at long leadtimes for which the persistence criteria is impossible to meet
hits_stn = hits_stn.astype(float)
for persistence in hits_stn.persistence.data:
    last_leadtime = int(persistence.split('/')[0]) - 1
    if last_leadtime > 0:
        hits_stn.sel(persistence=persistence)[dict(leadtime=slice(-last_leadtime, None))] = np.nan

# subset of the 'hits' dataset with the stations selected for the optimization
hits_opt = hits_stn.sel(id=stations_optimize).sum('id', skipna=False)

## 3 Analysis

In this section I will compute the skill of the EFAS predictions in different ways. In all the following sections I will work with three metrics: $recall$, $precision$ and the $f_{beta}$ score. The three metrics are based in the contingency table of hits ($TP$ for true positives), false alarms ($FP$ for false positives) and misses ($FN$ for false negatives).

$$recall = \frac{TP}{TP + FN}$$
$$precision = \frac{TP}{TP + FP}$$
$$f_{beta} = \frac{(1 + \beta^2) \cdot TP}{(1 + \beta^2) \cdot TP + \beta^2 \cdot FN + FP}$$


### 3.2 Compare approaches
#### 3.2.1 Import optimize criteria

In [10]:
criteria = {'current': {'approach': '1_deterministic_+_1_probabilistic',
                   'probability': 0.3,
                   'persistence': '3/3'}}
for beta in betas:
    metric = f'f{beta}'
    file = glob.glob(f'{path_out}{metric}/*{suffix}.pkl')[0]
    opt_crit = pickle.load(open(file, 'rb'))
    for app, crit in opt_crit.items():
        if metric in crit:
            del crit[metric]
        criteria[f'{metric}_{app}'] = crit

#### 3.2.2 Compare approaches

In [12]:
# transform criteria into a DataFrame
summary_criteria = pd.concat([pd.DataFrame(crtr, index=[i]) for i, crtr in criteria.items()], axis=0)
summary_criteria.approach = [''.join([x[0].upper() for x in app.split('_')]) for app in summary_criteria.approach]
summary_criteria['OF'] = [x.split('_')[0] if x != 'current' else '' for x in summary_criteria.index]
summary_criteria = summary_criteria[['approach', 'OF', 'probability', 'persistence']]

In [13]:
# compute hits, misses and false alarms
summary_hits = pd.DataFrame({i: hits_opt.sel(leadtime=min_leadtime).sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
summary_hits = summary_hits.astype(int)
summary_hits['no_events'] = summary_hits.TP + summary_hits.FN

In [14]:
# compute skill
summary_hits['recall'] = summary_hits.TP / (summary_hits.TP + summary_hits.FN)
summary_hits['precision'] = summary_hits.TP / (summary_hits.TP + summary_hits.FP)
for beta in [0.8, 1, 1.2]:
    summary_hits[f'f{beta}'] = (1 + beta**2) * summary_hits.TP / ((1 + beta**2) * summary_hits.TP + summary_hits.FN + summary_hits.FP)

In [15]:
# concat criteria, hits and summary data frames
summary = pd.concat((summary_criteria, summary_hits), axis=1)
summary.sort_values('approach', inplace=True)

summary

Unnamed: 0,approach,OF,probability,persistence,TP,FN,FP,no_events,recall,precision,f0.8,f1,f1.2
current,1D+1P,,0.3,3/3,280,468,182,748,0.374332,0.606061,0.413992,0.46281,0.512451
f0.8_1_deterministic_+_1_probabilistic,1D+1P,f0.8,0.425,1/1,353,395,291,748,0.471925,0.548137,0.457673,0.507184,0.556653
f1_1_deterministic_+_1_probabilistic,1D+1P,f1,0.4,1/1,368,380,332,748,0.491979,0.525714,0.458769,0.508287,0.557742
f1.2_1_deterministic_+_1_probabilistic,1D+1P,f1.2,0.375,1/1,368,380,347,748,0.491979,0.514685,0.453597,0.503076,0.552593
f0.8_brier_weighted,BW,f0.8,0.525,1/1,330,418,277,748,0.441176,0.543657,0.437793,0.487085,0.536728
f1_brier_weighted,BW,f1,0.4,1/1,384,364,472,748,0.513369,0.448598,0.429647,0.478803,0.528472
f1.2_brier_weighted,BW,f1.2,0.35,1/1,415,333,602,748,0.554813,0.408063,0.421268,0.470255,0.519922
f0.8_model_mean,MM,f0.8,0.35,3/4,305,443,272,748,0.407754,0.528596,0.411619,0.460377,0.510005
f1_model_mean,MM,f1,0.1,3/3,397,351,529,748,0.530749,0.428726,0.425242,0.474313,0.523985
f1.2_model_mean,MM,f1.2,0.1,3/3,397,351,529,748,0.530749,0.428726,0.425242,0.474313,0.523985


In [16]:
# export
summary.to_csv(f'{path_out}skill_by_criteria.csv', float_format='%.3f', index=False)