# 'D-Score Suite (v1)' Benchmark

These are custom-defined Python functions to calculate metrics against time-series data. 

These statistics adapted from the originals in <https://github.com/thodson-usgs/dscore> 

See: <https://doi.org/10.1029/2021MS002681>


## The Metrics:
This suite of metrics describesc the content of the benchmark:
| Metric          | Reference                                                           |
| ----- | ----- |
| mse             | Mean Squared Error |
| bias            |  |
| distribution    |  |
| sequence        |  |
| Seasonal-winter |  |
| Seasonal-spring |  |
| Seasonal-summer |  |
| Seasonal-fall   |  |
| mse_Q0025  ; mse_Qlow        |  |
| mse_Q2550  ; mse_Qbelowavg   |  |
| mse_Q5075  ; mse_Qaboveavg   |  |
| mse_Q75100 ; mse_Qhigh       |  |


This notebook will briefly describe each of the above metrics, and show some results using sample data. 
The specific code to implement each metric is included.  This notebook can be sourced into analysis notebooks
to get access to these functions natively. 

In [None]:

import logging
import numpy as np
import pandas as pd

## Mean Squared Error

In [None]:
def mse(obs, sim) -> float:
    """
    Mean Square Error --   Compute MSE over all paired values observed (x) and simulated/modeled (x_hat)
        .. math::
            \sum_{i=1}^{n}(x_i - \hat{x}_i)^2

    Returns
    -------
    float
        Mean square error
    """
    e = obs - sim
    return np.mean(e**2)

## Percent Bias

In [None]:

def pbias(obs, sim) -> float:
    """
    percent bias -- a measure of the mean tendency of simulated values to be
    greater than or less than associated observed values.

    Returns:
        float: calculated percent bias / units = percent (i.e. 90 rather than 0.90)
    """
    return 100 * np.sum(sim - obs) / np.sum(obs)
    

## Bias

In [None]:

def bias(obs, sim) -> float:
    """
    Bias = square of mean error

    Returns:
        _type_: _description_
    """
    return np.mean(obs - sim)**2


## Error Variannce

In [None]:


def e_variance(obs, sim) -> float:
    """_summary_

    Args:
        obs (pd.Series - like): data representing observed values
        sim (pd.Series - like): data representing simulated/modeled values

    Returns:
        float: variance of the error
    """
    e = sim - obs
    return e.var(ddof=1)

## Sequence

In [None]:

def sequence(obs, sim) -> float:
    """_summary_

    Args:
        obs (pd.Series - like): data representing observed values
        sim (pd.Series - like): data representing simulated/modeled values

    Returns:
        float: _description_
    """
    e = sim - obs
    s = np.sort(sim) - np.sort(obs)
    var_s = s.var(ddof=1)
    var_e = e.var(ddof=1)
    seq = var_e - var_s
    return seq



## Distribution

In [None]:

def distribution(obs, sim) -> float:
    """_summary_

    Args:
        obs (pd.Series - like): data representing observed values
        sim (pd.Series - like): data representing simulated/modeled values

    Returns:
        float: _description_
    """
    s = np.sort(sim) - np.sort(obs)
    var_s = s.var(ddof=1)
    return var_s

# Seasonal MSE

In [None]:


def seasonal_mse(obs, sim):
    """
    Decompose error by season.

    Args:
        obs (pd.Series - like): data representing observed values
        sim (pd.Series - like): data representing simulated/modeled values

    Both obs and sim should be time-indexed, such that we can pick out months
    from the time value.

    Returns:
        pd.Series : mse for 4 major seasons
    
    NOTE: 'season' is viewed from a northern-hemisphere perspective
    """

    names = ['winter', 'spring', 'summer', 'fall']
    
    idx = (obs.index.month == 12) | (obs.index.month <= 2) 
    winter = mse(obs[idx], sim[idx])

    idx = (obs.index.month > 2) & (obs.index.month <= 5) 
    spring = mse(obs[idx], sim[idx])

    idx = (obs.index.month > 5) & (obs.index.month <= 8) 
    summer = mse(obs[idx], sim[idx])   
    
    idx = (obs.index.month > 8) & (obs.index.month <= 11) 
    fall = mse(obs[idx], sim[idx])

    return pd.Series([winter, spring, summer, fall], index=names)

# def seasons(x, x_h):
#     """Decompose error by season.
#     Parameters
#     ----------
#     x : array_like
#     x_h : array_like
#     """
#     def season(e, index):
#         return ((e*index)**2).mean()

#     names = ['winter', 'spring', 'summer', 'fall']
#     e = x_h - x
    
#     winter = season(e, (e.index.month == 12) | (e.index.month <= 2))
#     spring = season(e, (e.index.month > 2) & (e.index.month <= 5))
#     summer = season(e, (e.index.month > 5) & (e.index.month <= 8))
#     fall = season(e, (e.index.month > 8) & (e.index.month <= 11))

#     return pd.Series([winter, spring, summer, fall], index=names)



In [None]:

def quantile_mse(obs, sim):
    """
    Description
    """
    breaks=[0, 0.25, 0.5, 0.75, 1]
    labels=['low', 'below_avg', 'above_avg', 'high']
    e = sim - obs
    scores = []
    ranks = obs.rank(method='first')
    quants = pd.qcut(ranks, q=breaks)
    for i in range(len(breaks) - 1):
        quant = e * (quants == quants.cat.categories[i])  # select quantile
        mse_q = ((quant)**2).mean()
        scores.append(mse_q)
    return pd.Series(scores, index=labels)

In [None]:

def score(e, a=1.0):
    """
    Scores an error

    Exponential scoring function that maps MSE to the unit interval.

    Parameters
    ----------
    a : float
        Positive tuning parameter.

    References
    ----------
    .. [1] Collier et al., 2018, The International Land Model Benchmarking
    (ILAMB) system: Design, theory, and implementation. Journal of Advances
    in Modeling Earth Systems, 10(11), http://dx.doi.org/10.1029/2018ms001354
    """
    if a <= 0.0:
        raise ValueError("Tuning parameter must be a positive float")
    return np.exp(-1 * a * e)

In [None]:
try:
    from statsmodels.tsa.seasonal import STL
    _SEASONAL = True
except ImportError:
    logging.debug("STL library not available.")
    _SEASONAL = False

def stl(obs, sim):
    """
    Decompose error using STL.

    Seasonal and trend decomposition using Loess (STL).
    Note that STL is not perfectly orthogonal.

    References
    ----------
    .. [1] Cleveland et al., 1990, STL: A seasonal-trend decomposition
    procedure based on loess. Journal of Official Statistics, 6(1), 3-73.
    """
    if not _SEASONAL:
        logging.warning("STL statistics not available.")
        return None
    e = sim - obs
    res = STL(e, period=365, seasonal=9).fit()
    E = pd.DataFrame(
        {
            'trend': res.trend,
            'seasonality': res.seasonal,
            'residual': res.resid
        }
    )
    return (E**2).mean()