In [1]:
%cd ../
import numpy as np 
import pandas as pd
import xarray as xr

from scipy.optimize import brute
from sklearn.metrics import confusion_matrix
from triggers import TOLERANCE, GENERAL_T, NON_REGRET_T

c:\Users\amine.barkaoui\OneDrive - World Food Programme\Documents\GitHub\anticipatory-action


INFO:root:Set disk cache path.


### Original method / function

In [1]:
def objective(
    t,
    obs_val,
    obs_bool,
    prob_issue0,
    prob_issue1,
    leadtime,
    issue,
    category,
    vulnerability,
    end_season=5,
    penalty=1e6,
    alpha=10e-3,
    sorting=False,
):
    if leadtime <= end_season:
        obs_val = obs_val[1:]
        obs_bool = obs_bool[1:]
        prob_issue0 = prob_issue0[:-1]
        prob_issue1 = prob_issue1[:-1]

    prediction = np.logical_and(prob_issue0 > t[0], prob_issue1 > t[1])

    cm = confusion_matrix(obs_bool, prediction, labels=[0, 1])
    _, false, fn, hits = cm.ravel()

    number_actions = np.sum(prediction)

    far = false / (false + hits)
    false_tol = np.sum(prediction & (obs_val > TOLERANCE[category]))
    hit_rate = hits / (hits + fn)
    success_rate = hits + false - false_tol
    failure_rate = false_tol

    freq = number_actions / len(obs_val)
    return_period = np.round(1 / freq if freq != 0 else 0, 0)

    requirements = GENERAL_T if vulnerability == "GT" else NON_REGRET_T

    constraints = [
        hit_rate >= requirements["HR"],
        success_rate >= (requirements["SR"] * number_actions),
        failure_rate <= (requirements["FR"] * number_actions),
        return_period >= requirements["RP"][category],
        (leadtime - (issue + 1)) % 12 > 1,
    ]

    if not sorting:
        if not (all(constraints)):
            return penalty
        else:
            return -hit_rate + alpha * far
    else:
        return -hit_rate, failure_rate / number_actions

In [4]:
def find_optimal_triggers(
    observations_bool,
    observations_val,
    prob_issue0,
    prob_issue1,
    lead_time,
    issue,
    category,
    vulnerability,
):
    # Define grid
    threshold_range = (0.0, 1.0)
    grid = (
        slice(threshold_range[0], threshold_range[1], 0.01),
        slice(threshold_range[0], threshold_range[1], 0.01),
    )

    # Launch research
    best_triggers, best_score, _, _ = brute(
        objective,
        grid,
        args=(
            observations_val,
            observations_bool,
            prob_issue0,
            prob_issue1,
            lead_time,
            issue,
            category,
            vulnerability,
        ),
        full_output=True,
        finish=None,
    )

    return best_triggers, best_score


In [5]:
%%time
find_optimal_triggers(
    np.array([0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0]),
    np.array([0,-0.42752841,-1.14785480,1.00835359,1.01152670,0.08140295,1.07925391,2.89334059,0.49650693,-2.10596442,-0.11560103,1.13604414,-0.61310524,0.99173242,-0.80315828,-1.33247614,-0.39426482,-0.69886303,0.70436287,-0.68371397,1.43038058,0.55627447,-0.60290152,-0.89140522,1.09524286,0.35090649,0.65150774,0.17972234,1.73618770,-0.32053682]),   
    np.array([0.19403316,0.40413737,0.03950670,0.19867207,0.36617446,0.12769049,0.09033364,0.08100221,0.28882408,0.27224869,0.30656403,0.19762351,0.14381145,0.25782165,0.13781390,0.07984945,0.29496825,0.10694549,0.18244502,0.19856039,0.22373931,0.27777267,0.51857853,0.08000000,0.19938798,0.26805690,0.36049867,0.27718082,0.19917504,0]), 
    np.array([0.14940780,0.32687554,0.10491829,0.17928207,0.28445852,0.01592254,0.10268304,0.23625106,0.21073855,0.38182610,0.11036947,0.18485942,0.07852152,0.30479109,0.14028412,0.27518070,0.33471456,0.07077074,0.31999999,0.0986679,0.16447723,0.27975520,0.15368882,0.15867205,0.22455618,0.37894413,0.37922379,0.17322889,0.15639387,0]), 
    1,
    10, 
    'Moderado', 
    'NRT',
)

CPU times: total: 5.75 s
Wall time: 5.83 s


(array([0.28, 0.12]), -0.745)

### Numba-optimized way

In [2]:
from numba.core import types
from numba.typed import Dict

# Define some constants
# The Dict.empty() constructs a typed dictionary.
TOLERANCE2 = Dict.empty(
    key_type=types.unicode_type,
    value_type=types.f8,
)
TOLERANCE2['Leve'] = 0; TOLERANCE2['Moderado'] = -0.44; TOLERANCE2['Severo'] = -0.68

GENERAL_T2 = Dict.empty(
    key_type=types.unicode_type,
    value_type=types.f8,
)
GENERAL_T2['HR'] = 0.5; GENERAL_T2['SR'] = 0.65; GENERAL_T2['FR'] = 0.35; GENERAL_T2['RP'] = 4.

NON_REGRET_T2 = Dict.empty(
    key_type=types.unicode_type,
    value_type=types.f8,
)
NON_REGRET_T2['HR']=0.65; NON_REGRET_T2['SR']=0.55; NON_REGRET_T2['FR']=0.45; NON_REGRET_T2['RP'] = 3.

In [1]:
from numba import jit, float64, float32, int16, boolean
from numba.experimental import jitclass

@jit(nopython=True, cache=True)
def _compute_confusion_matrix(true, pred):
  '''
  Computes a confusion matrix using numpy for two np.arrays
  true and pred.

  Results are identical (and similar in computation time) to: 
    "from sklearn.metrics import confusion_matrix"

  However, this function avoids the dependency on sklearn and 
  allows to use numba in nopython mode.
  '''

  K = len(np.unique(true)) # Number of classes 
  result = np.zeros((K, K))

  for i in range(len(true)):
    result[true[i]][pred[i]] += 1

  return result

@jit(
#    float64(float32[:], float32[:], int16[:], float32[:], float32[:], int16, int16, types.unicode_type, types.unicode_type, int16, float64, float64),
    nopython=True, 
    cache=True,
)
def objective_numba(
    t,
    obs_val,
    obs_bool,
    prob_issue0,
    prob_issue1,
    leadtime,
    issue,
    category,
    vulnerability,
    tolerance,
    general_req,
    non_regret_req,
    end_season=5,
    penalty=1e6,
    alpha=10e-3,
    sorting=False,
):
    if leadtime <= end_season:
        obs_val = obs_val[1:]
        obs_bool = obs_bool[1:]
        prob_issue0 = prob_issue0[:-1]
        prob_issue1 = prob_issue1[:-1]
    
    prediction = np.logical_and(prob_issue0 > t[0], prob_issue1 > t[1]).astype(np.int16)

    cm = _compute_confusion_matrix(obs_bool, prediction)
    _, false, fn, hits = cm.ravel()

    number_actions = np.sum(prediction)

    far = false / (false + hits + alpha)
    false_tol = np.sum(prediction & (obs_val > tolerance[category]))
    hit_rate = hits / (hits + fn)
    success_rate = hits + false - false_tol
    failure_rate = false_tol
    
    freq = number_actions / len(obs_val)
    return_period = np.round(1 / freq if freq != 0 else 0, 0)
    
    requirements = general_req if vulnerability == "GT" else non_regret_req
    req_RP = requirements['RP'] + 1 * (category[0]=='M') + 2 * (category[0]=='S')
    
    constraints = np.array([
        hit_rate >= requirements["HR"],
        success_rate >= (requirements["SR"] * number_actions),
        failure_rate <= (requirements["FR"] * number_actions),
        return_period >= req_RP,
        (leadtime - (issue + 1)) % 12 > 1,
    ]).astype(np.int16)
    
    if sorting:
        return [-hit_rate, failure_rate / number_actions]
    else:
      if np.all(constraints):
          return [-hit_rate + alpha * far]
      else:
          return [penalty]



In [6]:
@jit(nopython=True)
def make_grid(arraylist):
    n = len(arraylist)
    k = arraylist[0].shape[0]
    a2d = np.zeros((n, k, k))
    for i in range(n):
        a2d[i] = arraylist[i]
    return(a2d)

@jit(nopython=True)
def meshxy(x, y):
    xx = np.empty(shape=(x.size, y.size), dtype=x.dtype)
    yy = np.empty(shape=(x.size, y.size), dtype=y.dtype)
    for i in range(y.size):
        for j in range(x.size):
            xx[i,j] = x[j]  # change to x[j] if indexing xy
            yy[i,j] = y[i]  # change to y[i] if indexing xy
    return xx, yy

#@jit(nopython=True)
def brute_numba(func, lrange, args=()):
    assert len(lrange) == 2
    
    x, y = meshxy(*lrange)
    grid = make_grid([x, y])

    # obtain an array of parameters that is iterable by a map-like callable
    inpt_shape = np.array(grid.shape)
    grid = np.reshape(grid, (inpt_shape[0], np.prod(inpt_shape[1:]))).T
    
    # iterate over input arrays
    Jout = np.array([
        func(np.asarray(candidate).flatten(), *args)
        for candidate in grid
    ])
    
    Jout = np.reshape(Jout, (inpt_shape[1], inpt_shape[2]))
    grid = np.reshape(grid.T, (inpt_shape[0], inpt_shape[1], inpt_shape[2]))
   
    Nshape = np.shape(Jout) 
    indx = np.argmin(Jout.ravel())
    Nindx = np.empty(2, dtype=np.uint8)    
    Nindx[1] = indx % Nshape[1]
    indx = indx // Nshape[1]
    Nindx[0] = indx % Nshape[0]

    xmin = np.array([grid[k][Nindx[0], Nindx[1]] for k in range(2)])

    Jmin = Jout[Nindx[0], Nindx[1]]

    return xmin, Jmin

In [2]:
def find_optimal_triggers_numba(
    observations_bool,
    observations_val,
    prob_issue0,
    prob_issue1,
    lead_time,
    issue,
    category,
    vulnerability,
):
    # Define grid
    threshold_range = (0.0, 1.0)
    grid = (
        np.arange(threshold_range[0], threshold_range[1], step=0.01),
        np.arange(threshold_range[0], threshold_range[1], step=0.01),
    )
    
    # Launch research
    best_triggers, best_score = brute_numba(
        objective_numba,
        grid,
        args=(
            observations_val,
            observations_bool,
            prob_issue0,
            prob_issue1,
            lead_time,
            issue,
            category,
            vulnerability,
            TOLERANCE2,
            GENERAL_T2,
            NON_REGRET_T2,
        ),
    )

    return best_triggers, best_score

In [7]:
%%time
find_optimal_triggers_numba(
    np.array([0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0]),
    np.array([0,-0.42752841,-1.14785480,1.00835359,1.01152670,0.08140295,1.07925391,2.89334059,0.49650693,-2.10596442,-0.11560103,1.13604414,-0.61310524,0.99173242,-0.80315828,-1.33247614,-0.39426482,-0.69886303,0.70436287,-0.68371397,1.43038058,0.55627447,-0.60290152,-0.89140522,1.09524286,0.35090649,0.65150774,0.17972234,1.73618770,-0.32053682]),   
    np.array([0.19403316,0.40413737,0.03950670,0.19867207,0.36617446,0.12769049,0.09033364,0.08100221,0.28882408,0.27224869,0.30656403,0.19762351,0.14381145,0.25782165,0.13781390,0.07984945,0.29496825,0.10694549,0.18244502,0.19856039,0.22373931,0.27777267,0.51857853,0.08000000,0.19938798,0.26805690,0.36049867,0.27718082,0.19917504,0]), 
    np.array([0.14940780,0.32687554,0.10491829,0.17928207,0.28445852,0.01592254,0.10268304,0.23625106,0.21073855,0.38182610,0.11036947,0.18485942,0.07852152,0.30479109,0.14028412,0.27518070,0.33471456,0.07077074,0.31999999,0.0986679,0.16447723,0.27975520,0.15368882,0.15867205,0.22455618,0.37894413,0.37922379,0.17322889,0.15639387,0]), 
    1,
    10, 
    'Moderado', 
    'NRT',
)

CPU times: total: 3.84 s
Wall time: 3.9 s


(array([0.28, 0.12]), -0.7450083194675541)

### Test

In [4]:
def test_find_optimal_triggers_guija():
    result, _ = find_optimal_triggers(
        np.array([0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0]),
        np.array([0,-0.42752841,-1.14785480,1.00835359,1.01152670,0.08140295,1.07925391,2.89334059,0.49650693,-2.10596442,-0.11560103,1.13604414,-0.61310524,0.99173242,-0.80315828,-1.33247614,-0.39426482,-0.69886303,0.70436287,-0.68371397,1.43038058,0.55627447,-0.60290152,-0.89140522,1.09524286,0.35090649,0.65150774,0.17972234,1.73618770,-0.32053682]),   
        np.array([0.19403316,0.40413737,0.03950670,0.19867207,0.36617446,0.12769049,0.09033364,0.08100221,0.28882408,0.27224869,0.30656403,0.19762351,0.14381145,0.25782165,0.13781390,0.07984945,0.29496825,0.10694549,0.18244502,0.19856039,0.22373931,0.27777267,0.51857853,0.08000000,0.19938798,0.26805690,0.36049867,0.27718082,0.19917504,0]), 
        np.array([0.14940780,0.32687554,0.10491829,0.17928207,0.28445852,0.01592254,0.10268304,0.23625106,0.21073855,0.38182610,0.11036947,0.18485942,0.07852152,0.30479109,0.14028412,0.27518070,0.33471456,0.07077074,0.31999999,0.0986679,0.16447723,0.27975520,0.15368882,0.15867205,0.22455618,0.37894413,0.37922379,0.17322889,0.15639387,0]), 
        1,
        10, 
        'Moderado', 
        'NRT',
    )
    return np.testing.assert_equal(result, np.array([0.28, 0.12]))

test_find_optimal_triggers_guija()