# Summarize skill results
***

**Author**: Chus Casado Rodríguez<br>
**Date**: 28-06-2023<br>


**Introduction**:<br>
This notebook creates a table comparing the skill of the diverse notification criteria optimized for different f-scores.

In [1]:
import os
path_root = os.getcwd()
import glob
import numpy as np
import pandas as pd
import xarray as xr
from datetime import datetime, timedelta
import pickle
import yaml

import warnings
warnings.filterwarnings("ignore")

os.chdir('../py/')
from compute import hits2skill, limit_leadtime
os.chdir(path_root)

## 1 Configuration

In [2]:
with open("../conf/config.yml", "r", encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

### 1.1 Reporting points

In [3]:
# area threshold
area_threshold = cfg.get('reporting_points', {}).get('area', 500)

# reporting points
path_stations = cfg.get('reporting_points', {}).get('output', '../results/reporting_points/')
file_stations = f'{path_stations}reporting_points_over_{area_threshold}km2.parquet'

# catchments
catchments = cfg.get('reporting_points', {}).get('catchments', None)

# minimum performance required from the reporting points
min_kge = cfg.get('reporting_points', {}).get('KGE', None)

### 1.2 Hits

In [4]:
# lead time ranges
leadtime = cfg.get('hits', {}).get('leadtime', None)

# parameters of the rolling window used to compute hits
window = cfg.get('hits', {}).get('window', 1)

# dissagregate the analysis by seasons?
seasonality = cfg.get('hits', {}).get('seasonality', False)

# path that contains the NetCDFs with hit, misses and false alarms pro
path_in = cfg.get('hits', {}).get('output', '../results/hits/')
path_in = f'{path_in}window_{window}/'

### 1.3 Skill

In [5]:
# current operationa criteria
current_criteria = cfg.get('skill', {}).get('current_criteria', None)

# fixed notification criteria
min_leadtime = cfg.get('skill', {}).get('leadtime', 60) 
min_area = cfg.get('skill', {}).get('area', 2000) 

# path where results will be saved
path_out = cfg.get('skill', {}).get('output', f'../results/skill/')
if min_kge is not None:
    path_out = f'{path_out}window_{window}/kge_{min_kge}/'
else:
    path_out = f'{path_out}window_{window}/no_kge/'
    
# coefficient of the fbeta-score
betas = [float(item[1:]) for item in os.listdir(path_out) if os.path.isdir(f'{path_out}{item}') and item.startswith('f')]
betas = [1 if beta == 1.0 else beta for beta in betas]

## 2 Data

### 2.1 Reporting points

I load all the stations that where selected in a previous [notebook](3_0_select_stations.ipynb).

In [6]:
# load table of fixed reporting points
stations = pd.read_parquet(file_stations)
stations[['X', 'Y', 'area']] = stations[['X', 'Y', 'area']].astype(int)

# select stations that belong to the selected catchments
if catchments is not None:
    if isinstance(catchments, list) is False:
        catchments = [catchments]
    stations = stations.loc[stations.catchment.isin(catchments),:]

# remove points with a performance (KGE) lower than the established threshold
if min_kge is not None:
    mask_kge = ~(stations.KGE <= min_kge)
    stations = stations.loc[mask_kge]
else:
    # remove station with erroneous behaviour
    stations = stations.loc[~(stations.n_events_obs >= 6)]

# mask stations with events
stations_w_events = (stations.n_events_obs > 0)

print('All points')
print('----------')
print(f'no. reporting points:\t\t{stations.shape[0]}')
print('no. stations with events:\t{0}'.format(stations_w_events.sum()))
print('no. observed events:\t\t{0}'.format(stations.n_events_obs.sum()))

# select stations according to catchment area
if min_area > area_threshold:
    stations_optimize = stations.loc[stations.area >= min_area].index
else:
    stations_optimize = stations.index

print('\nPoints selected for otimization')
print('-------------------------------')
print(f'no. reporting points:\t\t{len(stations_optimize)}')
print('no. stations with events:\t{0}'.format((stations.loc[stations_optimize, 'n_events_obs'] > 0).sum()))
print('no. observed events:\t\t{0}'.format(stations.loc[stations_optimize, 'n_events_obs'].sum()))

# suffix that will be used when saving plots
suffix = f'{min_area}km2_{len(stations_optimize)}points'

All points
----------
no. reporting points:		1979
no. stations with events:	831
no. observed events:		1264

Points selected for otimization
-------------------------------
no. reporting points:		1239
no. stations with events:	480
no. observed events:		678


### 2.2 Hits, misses and false alarms

In [7]:
# import hits for each station
hits_stn = xr.open_mfdataset(f'{path_in}*.nc', combine='nested', concat_dim='id')

# extract selected stations
hits_stn = hits_stn.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits_stn = limit_leadtime(hits_stn)

# subset of the 'hits' dataset with the stations selected for the optimization
if min_leadtime is None:
    hits_opt = hits_stn.sel(id=stations_optimize).sum('id', skipna=False)
else:
    hits_opt = hits_stn.sel(id=stations_optimize, leadtime=min_leadtime).sum('id', skipna=False)

if 'approach' in hits_stn.dims:
    dim = 'approach'
elif 'model' in hits_stn.dims:
    dim = 'model'

## 3 Analysis

In this section I will compute the skill of the EFAS predictions in different ways. In all the following sections I will work with three metrics: $recall$, $precision$ and the $f_{beta}$ score. The three metrics are based in the contingency table of hits ($TP$ for true positives), false alarms ($FP$ for false positives) and misses ($FN$ for false negatives).

$$recall = \frac{TP}{TP + FN}$$
$$precision = \frac{TP}{TP + FP}$$
$$f_{beta} = \frac{(1 + \beta^2) \cdot TP}{(1 + \beta^2) \cdot TP + \beta^2 \cdot FN + FP}$$


### 3.2 Compare approaches
#### 3.2.1 Import optimize criteria

In [8]:
criteria = {}
for beta in betas:
    metric = f'f{beta}'
    file = glob.glob(f'{path_out}{metric}/*{suffix}.pkl')[0]
    opt_crit = pickle.load(open(file, 'rb'))
        
    # if criteria was fitted for a single lead time value
    if min_leadtime is not None:
        if dim == 'approach':
            criteria['current'] = current_criteria
        for key, crit in opt_crit.items():
            if metric in crit:
                del crit[metric]
            criteria[f'{metric}_{key}'] = crit
            
    # if criteria was fitted for several lead time ranges
    else:
        for lt, crit1 in opt_crit.items():
            if dim == 'approach':
                current_criteria['leadtime'] = lt
                criteria[f'current_{lt}'] =  current_criteria.copy()
            for key2, crit2 in crit1.items():
                if metric in crit2:
                    del crit2[metric]
                crit2['leadtime'] = lt
                criteria[f'{metric}_{lt}_{key2}'] = crit2

In [20]:
criteria

{'f0.8_12_COS': {'probability': array(0.875),
  'persistence': array('1/1', dtype='<U3'),
  'model': 'COS',
  'leadtime': 12},
 'f0.8_12_DWD': {'probability': array(0.05),
  'persistence': array('2/2', dtype='<U3'),
  'model': 'DWD',
  'leadtime': 12},
 'f0.8_12_EUD': {'probability': array(0.05),
  'persistence': array('2/2', dtype='<U3'),
  'model': 'EUD',
  'leadtime': 12},
 'f0.8_12_EUE': {'probability': array(0.8),
  'persistence': array('1/1', dtype='<U3'),
  'model': 'EUE',
  'leadtime': 12},
 'f0.8_60_COS': {'probability': array(0.525),
  'persistence': array('1/1', dtype='<U3'),
  'model': 'COS',
  'leadtime': 60},
 'f0.8_60_DWD': {'probability': array(0.05),
  'persistence': array('3/3', dtype='<U3'),
  'model': 'DWD',
  'leadtime': 60},
 'f0.8_60_EUD': {'probability': array(0.05),
  'persistence': array('3/3', dtype='<U3'),
  'model': 'EUD',
  'leadtime': 60},
 'f0.8_60_EUE': {'probability': array(0.5),
  'persistence': array('1/1', dtype='<U3'),
  'model': 'EUE',
  'leadtime

#### 3.2.2 Compare approaches

In [9]:
# transform criteria into a DataFrame
summary_criteria = pd.concat([pd.DataFrame(crtr, index=[i]) for i, crtr in criteria.items()], axis=0)
if dim == 'approach':
    summary_criteria[dim] = [''.join([x[0].upper() for x in app.split('_')]) for app in summary_criteria[dim]]
summary_criteria['window'] = window
summary_criteria['KGE'] = min_kge
summary_criteria['OF'] = [x.split('_')[0] if x.split('_')[0] != 'current' else '' for x in summary_criteria.index]
if leadtime is None:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'probability', 'persistence']]
else:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'leadtime', 'probability', 'persistence']]

# compute hits, misses and false alarms
if leadtime is None:
    summary_hits = pd.DataFrame({i: hits_opt.sel(leadtime=min_leadtime).sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
else:
    summary_hits = pd.DataFrame({i: hits_opt.sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
summary_hits = summary_hits.astype(int)
summary_hits['no_events'] = summary_hits.TP + summary_hits.FN

# compute skill
summary_hits['recall'] = summary_hits.TP / (summary_hits.TP + summary_hits.FN)
summary_hits['precision'] = summary_hits.TP / (summary_hits.TP + summary_hits.FP)
for beta in betas:
    summary_hits[f'f{beta}'] = (1 + beta**2) * summary_hits.TP / ((1 + beta**2) * summary_hits.TP + beta**2 * summary_hits.FN + summary_hits.FP)

# concat criteria, hits and summary data frames
summary = pd.concat((summary_criteria, summary_hits), axis=1)
if leadtime is None:
    summary.sort_values([dim, 'OF'], inplace=True)
else:
    summary.sort_values([dim, 'leadtime', 'OF'], inplace=True)

summary.head()

Unnamed: 0,model,window,KGE,OF,leadtime,probability,persistence,TP,FN,FP,no_events,recall,precision,f0.8,f1,f1.25
f0.8_12_COS,COS,1,0.5,f0.8,12,0.875,1/1,437,241,184,678,0.644543,0.703704,0.679369,0.672825,0.666406
f1_12_COS,COS,1,0.5,f1,12,0.875,1/1,437,241,184,678,0.644543,0.703704,0.679369,0.672825,0.666406
f1.25_12_COS,COS,1,0.5,f1.25,12,0.625,1/1,450,228,217,678,0.663717,0.674663,0.670348,0.669145,0.667946
f0.8_60_COS,COS,1,0.5,f0.8,60,0.525,1/1,273,405,200,678,0.402655,0.577167,0.493671,0.47437,0.456522
f1_60_COS,COS,1,0.5,f1,60,0.525,1/1,273,405,200,678,0.402655,0.577167,0.493671,0.47437,0.456522
f1.25_60_COS,COS,1,0.5,f1.25,60,0.375,1/1,324,354,428,678,0.477876,0.430851,0.448057,0.453147,0.458353
f0.8_12_DWD,DWD,1,0.5,f0.8,12,0.05,2/2,375,303,299,678,0.553097,0.55638,0.555094,0.554734,0.554374
f1_12_DWD,DWD,1,0.5,f1,12,0.05,1/1,473,205,494,678,0.69764,0.489142,0.553722,0.575076,0.598143
f1.25_12_DWD,DWD,1,0.5,f1.25,12,0.05,1/1,473,205,494,678,0.69764,0.489142,0.553722,0.575076,0.598143
f0.8_60_DWD,DWD,1,0.5,f0.8,60,0.05,3/3,266,412,265,678,0.39233,0.500942,0.4521,0.440033,0.428594


In [10]:
# export
summary.to_csv(f'{path_out}skill_by_criteria.csv', float_format='%.3f', index=False)

***

In [None]:
hits = hits_opt.sel(model='EUE', leadtime=60, probability=0.65, persistence='1/1').to_pandas()
hits['recall'] = hits.TP / hits[['TP', 'FN']].sum()
hits['precision'] = hits.TP / hits[['TP', 'FP']].sum()
for beta in betas:
    hits[f'f{beta}'] = (1 + beta**2) * hits.TP / ((1 + beta**2) * hits.TP + beta**2 * hits.FN + hits.FP)
hits.round(3)

TP           252.000
FN           426.000
FP           124.000
recall         0.372
precision      0.670
f0.8           0.510
f1             0.478
f1.25          0.450
dtype: float64

In [None]:
hits = hits_opt.sel(model='EUE', leadtime=60, persistence='1/1').to_pandas()
for beta in betas:
    hits[f'f{beta}'] = (1 + beta**2) * hits.TP / ((1 + beta**2) * hits.TP + beta**2 * hits.FN + hits.FP)
    
hits['f0.8']#.plot()

probability
0.050    0.174714
0.075    0.239346
0.100    0.321323
0.125    0.347942
0.150    0.374743
0.175    0.401941
0.200    0.441654
0.225    0.450873
0.250    0.456803
0.275    0.454121
0.300    0.458833
0.325    0.468605
0.350    0.475049
0.375    0.485376
0.400    0.497605
0.425    0.499835
0.450    0.503444
0.475    0.503956
0.500    0.506955
0.525    0.508751
0.550    0.505861
0.575    0.504211
0.600    0.501437
0.625    0.505750
0.650    0.510273
0.675    0.509087
0.700    0.507523
0.725    0.504507
0.750    0.510944
0.775    0.502635
0.800    0.502235
0.825    0.499086
0.850    0.487623
0.875    0.492293
0.900    0.488010
0.925    0.459356
0.950    0.449855
Name: f0.8, dtype: float64

***

In [35]:
# import hits for each station
hits1 = xr.open_mfdataset(f'../results/hits/NWP/leadtime_ranges/window_1/*.nc', combine='nested', concat_dim='id')

# extract selected stations
hits1 = hits1.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits1 = limit_leadtime(hits1)

# subset of the 'hits' dataset with the stations selected for the optimization
if min_leadtime is None:
    hits1 = hits1.sel(id=stations_optimize)#.sum('id', skipna=False)
else:
    hits1 = hits1.sel(id=stations_optimize, leadtime=min_leadtime)#.sum('id', skipna=False)

if 'approach' in hits1.dims:
    dim = 'approach'
elif 'model' in hits1.dims:
    dim = 'model'

In [36]:
hits2 = xr.open_mfdataset(f'../results/hits/NWP/leadtime_ranges2/window_1/*.nc', combine='nested', concat_dim='id')

# extract selected stations
hits2 = hits2.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits2 = limit_leadtime(hits2)

# subset of the 'hits' dataset with the stations selected for the optimization
if min_leadtime is None:
    hits2 = hits2.sel(id=stations_optimize)#.sum('id', skipna=False)
else:
    hits2 = hits2.sel(id=stations_optimize, leadtime=min_leadtime)#.sum('id', skipna=False)

if 'approach' in hits2.dims:
    dim = 'approach'
elif 'model' in hits2.dims:
    dim = 'model'

In [32]:
crit = dict(model = 'EUD',
            leadtime = 60,
            probability = 0.05,
            persistence = '3/3')

In [44]:
stns = hits1.where(hits1.sel(crit)['TP'] != hits2.sel(crit)['TP'], drop=True).id.data

In [45]:
hits1.sel(id=stns).sel(crit).to_pandas()

Unnamed: 0_level_0,TP,FN,FP,model,probability,leadtime,persistence
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
695,3.0,0.0,0.0,EUD,0.05,60,3/3
778,3.0,0.0,0.0,EUD,0.05,60,3/3


In [46]:
hits2.sel(id=stns).sel(crit).to_pandas()

Unnamed: 0_level_0,TP,FN,FP,model,probability,leadtime,persistence
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
695,2.0,1.0,0.0,EUD,0.05,60,3/3
778,2.0,1.0,0.0,EUD,0.05,60,3/3


In [42]:
stations.loc[[695, 778]]

Unnamed: 0_level_0,name,X,Y,area,subcatchment,river,catchment,country,KGE,correlation,...,rl1.5,rl2,rl5,rl10,rl20,rl50,rl100,rl200,rl500,n_events_obs
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
695,Sarkad,5187500,2687500,3650,Tisa,Crisul Negru,Danube,HU,,,...,83.4,103.3,152.1,184.5,215.5,255.6,285.7,315.7,355.2,3
778,Zerind,5192500,2687500,3625,Tisa,Crisul Negru,Danube,RO,0.885,0.905,...,83.3,103.1,151.9,184.2,215.2,255.3,285.3,315.3,354.8,3


In [34]:
hits2.sel(crit)

***

In [None]:
lt = 60
OF = 'f0.8'

df = summary.loc[(summary.leadtime == lt) & (summary.OF == OF)].set_index('approach')

df

In [None]:
df[['probability', 'f0.8', 'recall', 'precision']].plot()

***

In [None]:
df = pd.DataFrame(dtype=float)
df['recall'] = np.arange(.1, .91, .1)
df['precision'] = np.arange(.1, .91, .1)[::-1]

for b in [.8, 1, 1.25]:
    df[f'f{b}'] = (1 + b**2) * df.precision * df.recall / (b**2 * df.precision + df.recall)

    
df.drop(['recall', 'precision'], axis=1).plot()
df.plot()

    
df

***

```Python
hits_stn

hits_1D1P = hits_stn.sel(approach='1_deterministic_+_1_probabilistic',
                         probability=0.375,
                         persistence='1/1',
                         leadtime=min_leadtime)

hits_BW = hits_stn.sel(approach='brier_weighted',
                       probability=0.375,
                       persistence='1/1',
                         leadtime=min_leadtime)

hits_diff = hits_BW - hits_1D1P

tp = hits_diff['TP'].to_pandas()

tp[tp == 0]

tp[tp < 0]

stations[tp > 0].value_counts('river')

stations[tp > 0].value_counts('catchment')

stations[tp > 0][stations.catchment == 'Rhine'].sort_values(['catchment', 'subcatchment', 'river'])

```