In [37]:
import warnings

import numpy as np

from scipy.stats import entropy
from scipy.special import rel_entr, kl_div, entr

# Computing information gain and its components 
### John R. Lawson, October 2022

This shows the code and mathematics of evaluating a time series of paired forecast/observation probabilities.
The assumption is for two observation categories: 0 and 1. 
The code can be developed further for >2 classes, continuous ranked (like CRPS), and observational uncertainty (Cross-Entropy Score).

If evaluating event-by-event (like Lawson et al 2021 for cell-by-cell), don't decompose it

In [38]:
np.seterr(divide='ignore')
warnings.filterwarnings('ignore')

class DKL:
    def __init__(self,o,f):
        self.o = o 
        self.f = f
        
        assert np.max(f) < 1.0
        assert np.min(f) > 0.0
        
        # Number of unique forecast probs
        self.k = np.unique(self.f)
        
        # Number of unique classes
        self.c = np.unique(self.o)
        
        # Frequency of obs
        self.o_bar = np.mean(self.o)
        
    @staticmethod
    def __compute_dkl(o,f):
        raw_term1 = (1-o)*np.log2((1-o)/(1-f))
        raw_term2 = o*np.log2(o/f)
              
        term1 = np.nan_to_num(raw_term1)
        term2 = np.nan_to_num(raw_term2)
              
        # print(f"{term1=}")
        # print(f"{term2=}")
               
        
        raw_dkl = term1 + term2 
        # print("__compute_dkl")
        # print(dkl)
        
        # Find nans and convert to 0 
        # Not infinity, as f in {0,1} dealt with already
        # dkl = -term1 -term2
        all_dkl = np.nan_to_num(raw_dkl)
        return all_dkl, np.mean(all_dkl)        
        
    def compute_dkl(self,from_components=False):
        if from_components:
            U = self.compute_unc()
            R = self.compute_rel()
            D = self.compute_dsc()
            return R - D + U
        all_dkl, raw_dkl = self.__compute_dkl(self.o,self.f)
        dkl = np.nan_to_num(raw_dkl)
        print(f"{all_dkl=}, {raw_dkl=}, {dkl=}")
        return dkl 
    
    def compute_dsc(self):
        # N total forecasts
        # K number of unique forecasts 
        # ok_bar is frequency for obs in prob-class k
        N = len(self.o)
        K = len(self.k)
        dsc_1d = np.zeros([K])
        for ik,k in enumerate(self.k):
            ok_bar = np.mean(self.o[self.f==k])
            fk = self.f[self.f==k]
            nk = len(fk)
            dkl_all, dkl_term = self.__compute_dkl(ok_bar,self.o_bar)
            dsc_1d[ik] = nk*dkl_term 
        print(f"{dkl_all=}, {dkl_term=}, {dsc_1d=}")      
        dsc_1d = np.nan_to_num(dsc_1d)
        return np.mean(dsc_1d)
        
    def compute_unc(self):
        term1 = (1-self.o_bar) * np.log2(1-self.o_bar)
        term2 = self.o_bar * np.log2(self.o_bar)
        
        # Find nans and convert to 0 
        # Not infinity, as f in {0,1} dealt with already
        unc = -term1 - term2
        return unc
    
    def compute_rel(self):
        # N total forecasts
        # K number of unique forecasts 
        # ok_bar is frequency for obs in prob-class k
        # fk is the pmf for prob-class k
        N = len(self.o)
        K = len(self.k)
        rel_1d = np.zeros([K])
        for ik,k in enumerate(self.k):
            ok_bar = np.mean(self.o[self.f==k])
            fk = self.f[self.f==k]
            nk = len(fk)
            _, dkl = self.__compute_dkl(ok_bar,fk)
            # print(ok_bar,fk,nk,dkl)
            rel_1d[ik] = (nk*dkl)
        # rel_1d[rel_1d == np.nan] = 0
        rel_1d = np.nan_to_num(rel_1d)
        # print(rel_1d)
        return np.mean(rel_1d)
           
        
    

In [39]:
# Observations and forecast probabilities

# o = np.array([1,0,0,0,1,0])
# f = np.array([0.9,0.1,0.3,0.4,0.6,0.6])

o = np.array([1,0,0])
f = np.array([0.6,0.4,0.1])

dkl = DKL(o,f)


### Uncertainty component
This is only defined for >2 values. The frequency of one event occurring gives certainty (entropy = 0)
Uncertainty is otherwise the prior entropy (1 bit for coin)
-log2(prob)

In [40]:
print(dkl.compute_unc())

0.9182958340544896


### Reliability component
The information lost via "probabilistic false alarm" or incorrect categorisation of forecast probability
Zero reliability error (information loss) occurs when, e.g., an average 20% risk is issued for an event that occurs 1 in 5 times

In [41]:
print(dkl.compute_rel())

0.5419780939258209


### Discrimination component
We might want to return an array of dsc values per probability threshold
dsc_1d shows this currently - can output dkl.k attribute for 1-D array of forecast-probability bins
(Need a visualisation bit later in notebook)

In [42]:
print(dkl.compute_dsc())


dkl_all=1.584962500721156, dkl_term=1.584962500721156, dsc_1d=array([0.5849625, 0.5849625, 1.5849625])
0.9182958340544894


In [43]:
print(dkl.compute_dkl(from_components=False))


all_dkl=array([0.73696559, 0.73696559, 0.15200309]), raw_dkl=0.5419780939258209, dkl=0.5419780939258209
0.5419780939258209


In [44]:
print(dkl.compute_dkl(from_components=True))

dkl_all=1.584962500721156, dkl_term=1.584962500721156, dsc_1d=array([0.5849625, 0.5849625, 1.5849625])
0.541978093925821


### Skill scores
You can represent these components normalised by uncertainty
Positive indicates a gain over this measure

We would rather look at information gained over a baseline, but naiveity would be entropy to begin with
Can compare two models (different rho, for instance) and set unc to be dkl, remaining uncertainty in forecast

Information gained between forecasts issued by an old and new model f1 and f2, respectively, is:

In [44]:
def compute_info_gain(o,f1,f2):
    DKL1 = dkl(o,f1)
    DKL2 = dkl(o,f2)
    return DKL1 - DKL2