# Summarize skill results
***

**Author**: Chus Casado Rodríguez<br>
**Date**: 082-08-2023<br>


**Introduction**:<br>
This notebook creates a table comparing the skill of the diverse notification criteria optimized for different f-scores.

In [1]:
import os
path_root = os.getcwd()
import glob
import numpy as np
import pandas as pd
import xarray as xr
# from datetime import datetime, timedelta
import pickle
import yaml
import warnings
warnings.filterwarnings("ignore")

os.chdir('../py/')
from compute import hits2skill, limit_leadtime
os.chdir(path_root)

## 1 Configuration

In [2]:
config_file = '../conf/config_COMB_all_leadtimes.yml'
# config_file = '../conf/config_COMB_leadtime_ranges.yml'
# config_file = '../conf/config_COMB_daily.yml'
# config_file = '../conf/config_NWP_all_leadtimes.yml'
# config_file = '../conf/config_NWP_leadtime_ranges.yml'
# config_file = '../conf/config_NWP_daily.yml'

with open(config_file, "r", encoding='utf8') as ymlfile:
    cfg = yaml.load(ymlfile, Loader=yaml.FullLoader)

### 1.1 Reporting points

In [3]:
# area threshold
area_threshold = cfg.get('reporting_points', {}).get('area', 500)

# reporting points
path_stations = cfg.get('reporting_points', {}).get('output', '../results/reporting_points/')
file_stations = f'{path_stations}reporting_points_over_{area_threshold}km2.parquet'

# catchments
catchments = cfg.get('reporting_points', {}).get('catchments', None)

# minimum performance required from the reporting points
min_kge = cfg.get('reporting_points', {}).get('KGE', None)

### 1.2 Hits

In [4]:
# lead time ranges
leadtime = cfg.get('hits', {}).get('leadtime', None)

# parameters of the rolling window used to compute hits
window = cfg.get('hits', {}).get('window', 1)

# dissagregate the analysis by seasons?
seasonality = cfg.get('hits', {}).get('seasonality', False)

# path that contains the NetCDFs with hit, misses and false alarms pro
path_in = cfg.get('hits', {}).get('output', '../results/hits/')
path_in = f'{path_in}window_{window}/'

In [5]:
# path that contains the NetCDFs with hit, misses and false alarms pro
path_in = cfg.get('hits', {}).get('output', '../results/hits/')
if leadtime is None:
    folder = 'all_leadtimes'
elif len(leadtime) == 10:
    folder = 'daily'
elif len(leadtime) == 20:
    folder = '12h'
else:
    folder = '_'.join([str(lt + 12) for lt in leadtime])
if 'COMB' in config_file:
    path_in = f'{path_in}combination/{folder}/window_{window}/'
elif 'NWP' in config_file:
    path_in = f'{path_in}NWP/{folder}/window_{window}/'

### 1.3 Skill

In [6]:
# current operationa criteria
current_criteria = cfg.get('skill', {}).get('current_criteria', None)

# fixed notification criteria
min_leadtime = cfg.get('skill', {}).get('leadtime', 60) 
min_area = cfg.get('skill', {}).get('area', 2000) 

# path where results will be saved
path_out_root = cfg.get('skill', {}).get('output', f'../results/skill/')
if 'COMB' in config_file:
    path_out = f'{path_out_root}combination/{folder}/'
elif 'NWP' in config_file:
    path_out = f'{path_out_root}NWP/{folder}/'
if min_kge is not None:
    path_out = f'{path_out}window_{window}/kge_{min_kge}/'
else:
    path_out = f'{path_out}window_{window}/no_kge/'
    
# coefficient of the fbeta-score
betas = [float(item[1:]) for item in os.listdir(path_out) if os.path.isdir(f'{path_out}{item}') and item.startswith('f')]
betas = [1 if beta == 1.0 else beta for beta in betas]

## 2 Data

### 2.1 Reporting points

I load all the stations that where selected in a previous [notebook](3_0_select_stations.ipynb).

In [7]:
# load table of fixed reporting points
stations = pd.read_parquet(file_stations)
stations[['X', 'Y', 'area']] = stations[['X', 'Y', 'area']].astype(int)

# select stations that belong to the selected catchments
if catchments is not None:
    if isinstance(catchments, list) is False:
        catchments = [catchments]
    stations = stations.loc[stations.catchment.isin(catchments),:]

# remove points with a performance (KGE) lower than the established threshold
if min_kge is not None:
    mask_kge = ~(stations.KGE <= min_kge)
    stations = stations.loc[mask_kge]
else:
    # remove station with erroneous behaviour
    stations = stations.loc[~(stations.n_events_obs >= 6)]

# mask stations with events
stations_w_events = (stations.n_events_obs > 0)

print('All points')
print('----------')
print(f'no. reporting points:\t\t{stations.shape[0]}')
print('no. stations with events:\t{0}'.format(stations_w_events.sum()))
print('no. observed events:\t\t{0}'.format(stations.n_events_obs.sum()))

# select stations according to catchment area
if min_area > area_threshold:
    stations_optimize = stations.loc[stations.area >= min_area].index
else:
    stations_optimize = stations.index

print('\nPoints selected for otimization')
print('-------------------------------')
print(f'no. reporting points:\t\t{len(stations_optimize)}')
print('no. stations with events:\t{0}'.format((stations.loc[stations_optimize, 'n_events_obs'] > 0).sum()))
print('no. observed events:\t\t{0}'.format(stations.loc[stations_optimize, 'n_events_obs'].sum()))

# suffix that will be used when saving plots
suffix = f'{min_area}km2_{len(stations_optimize)}points'

All points
----------
no. reporting points:		1979
no. stations with events:	966
no. observed events:		1683

Points selected for otimization
-------------------------------
no. reporting points:		1239
no. stations with events:	562
no. observed events:		874


### 2.2 Hits, misses and false alarms

In [8]:
# import hits for each station
hits_stn = xr.open_mfdataset(f'{path_in}*.nc', combine='nested', concat_dim='id')

# extract selected stations
hits_stn = hits_stn.sel(id=stations.index.to_list()).compute()

# convert to NaN lead times that can't be reached due to model limitations or persistence
hits_stn = limit_leadtime(hits_stn)

# subset of the 'hits' dataset with the stations selected for the optimization
if min_leadtime is None:
    hits_opt = hits_stn.sel(id=stations_optimize).sum('id', skipna=False)
else:
    hits_opt = hits_stn.sel(id=stations_optimize, leadtime=min_leadtime).sum('id', skipna=False)

if 'approach' in hits_stn.dims:
    dim = 'approach'
elif 'model' in hits_stn.dims:
    dim = 'model'

## 3 Analysis

In this section I will compute the skill of the EFAS predictions in different ways. In all the following sections I will work with three metrics: $recall$, $precision$ and the $f_{beta}$ score. The three metrics are based in the contingency table of hits ($TP$ for true positives), false alarms ($FP$ for false positives) and misses ($FN$ for false negatives).

$$recall = \frac{TP}{TP + FN}$$
$$precision = \frac{TP}{TP + FP}$$
$$f_{beta} = \frac{(1 + \beta^2) \cdot TP}{(1 + \beta^2) \cdot TP + \beta^2 \cdot FN + FP}$$


### 3.2 Compare approaches
#### 3.2.1 Import optimize criteria

In [9]:
criteria = {}
for beta in betas:
    metric = f'f{beta}'
    file = glob.glob(f'{path_out}{metric}/*{suffix}.pkl')[0]
    opt_crit = pickle.load(open(file, 'rb'))
        
    # if criteria was fitted for a single lead time value
    if min_leadtime is not None:
        if dim == 'approach':
            criteria['current'] = current_criteria
        for key, crit in opt_crit.items():
            if metric in crit:
                del crit[metric]
            criteria[f'{metric}_{key}'] = crit
            
    # if criteria was fitted for several lead time ranges
    else:
        for lt, crit1 in opt_crit.items():
            if dim == 'approach':
                current_criteria['leadtime'] = lt
                criteria[f'current_{lt}'] =  current_criteria.copy()
            for key2, crit2 in crit1.items():
                if metric in crit2:
                    del crit2[metric]
                crit2['leadtime'] = lt
                criteria[f'{metric}_{lt}_{key2}'] = crit2

In [10]:
#del criteria['current_60']
#del criteria['current_144']

#### 3.2.2 Compare approaches

In [11]:
# transform criteria into a DataFrame
summary_criteria = pd.concat([pd.DataFrame(crtr, index=[i]) for i, crtr in criteria.items()], axis=0)
if dim == 'approach':
    summary_criteria[dim] = [''.join([x[0].upper() for x in app.split('_')]) for app in summary_criteria[dim]]
summary_criteria['window'] = window
summary_criteria['KGE'] = min_kge
summary_criteria['OF'] = [x.split('_')[0] if x.split('_')[0] != 'current' else '' for x in summary_criteria.index]
if leadtime is None:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'probability', 'persistence']]
else:
    summary_criteria = summary_criteria[[dim, 'window', 'KGE', 'OF', 'leadtime', 'probability', 'persistence']]

# compute hits, misses and false alarms
if leadtime is None:
    #summary_hits = pd.DataFrame({i: hits_opt.sel(leadtime=min_leadtime).sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
    summary_hits = pd.DataFrame({i: hits_opt.sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()
else:
    summary_hits = pd.DataFrame({i: hits_opt.sel(crtr).to_pandas() for i, crtr in criteria.items()}).transpose()

summary_hits = summary_hits.dropna(axis=0, how='all').astype(int)
summary_hits['no_events'] = summary_hits.TP + summary_hits.FN

# compute skill
summary_hits['recall'] = summary_hits.TP / (summary_hits.TP + summary_hits.FN)
summary_hits['precision'] = summary_hits.TP / (summary_hits.TP + summary_hits.FP)
for beta in betas:
    summary_hits[f'f{beta}'] = (1 + beta**2) * summary_hits.TP / ((1 + beta**2) * summary_hits.TP + beta**2 * summary_hits.FN + summary_hits.FP)

# concat criteria, hits and summary data frames
summary = pd.concat((summary_criteria, summary_hits), axis=1)
if leadtime is None:
    summary.sort_values([dim, 'OF'], inplace=True)
else:
    summary.sort_values([dim, 'leadtime', 'OF'], inplace=True)

summary.head()

Unnamed: 0,approach,window,KGE,OF,probability,persistence,TP,FN,FP,no_events,recall,precision,f0.8
current,1D+1P,1,0.5,,0.3,3/3,361,513,230,874,0.413043,0.610829,0.514656
f0.8_1_deterministic_+_1_probabilistic,1D+1P,1,0.5,f0.8,0.65,1/1,388,486,206,874,0.443936,0.653199,0.55171
f0.8_brier_weighted,BW,1,0.5,f0.8,0.525,1/1,388,486,196,874,0.443936,0.664384,0.556535
f0.8_model_mean,MM,1,0.5,f0.8,0.825,1/1,319,555,148,874,0.364989,0.683084,0.509724
f0.8_member_weighted,MW,1,0.5,f0.8,0.5,1/1,389,485,200,874,0.44508,0.660441,0.55554


In [12]:
# export
summary.to_csv(f'{path_out}skill_by_criteria.csv', float_format='%.3f', index=False)