In [1]:
import numpy as np
import pandas as pd
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.forecasting.theta import ThetaModel
from scipy.optimize import fsolve
from scipy.special import rel_entr, kl_div
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_density_forecast(ts, horizon, base_alg, base_params={}, bins='auto', omega=None, fittedvalues=False):
    """
    Returns a list of density dictionaries {'bins': np.array, 'probs': np.array, 'dotted_forecast': float}.
    
    Parameters
    ----------
    ts : array_like
        The time series to model.
    horizon : int
        The horizon to forecast.
    base_alg : {ExponentialSmoothing, SimpleExpSmoothing, Holt}
        The name of base algoritm for making density forecast.
    base_params : dict
        A Dictionary with base algorithm parameters.
    bins: int or sequence of scalars or str, optional
        Define how to calculate bins.
    fittedvalues: bool
        Include fitted values in density dictionaries or not.
    """
    
    if omega is not None:
        bins = omega
    
    alg = base_alg(ts, **base_params).fit()
    
    if fittedvalues:
        alg_preds = alg.predict(start=0, end=len(ts) + horizon - 1)
        density_dicts = [{'bins': [], 'probs': [], 'dotted_forecast': None} for _ in range(len(ts) + horizon)]
    else:
        alg_preds = alg.predict(start=len(ts), end=len(ts) + horizon - 1)
        density_dicts = [{'bins': [], 'probs': [], 'dotted_forecast': None} for _ in range(horizon)]
    
    for i in range(len(alg_preds)):
        density_dicts[i]['dotted_forecast'] = alg_preds.iloc[i]
        
        current_density = alg.resid + alg_preds.iloc[i]
        probs, bins = np.histogram(current_density, bins=bins)
        density_dicts[i]['probs'], density_dicts[i]['bins'] = probs / np.sum(probs), bins
    
    return density_dicts

In [3]:
def plot_density_forecast(ts, delay, base_alg, ax=None, **kwargs):
        
    density_dict = get_density_forecast(ts, delay, base_alg, **kwargs)[delay - 1]

    left_edges = density_dict['bins'][:-1]

    colors = []
    for i in range(len(left_edges) - 1):
        if left_edges[i] < density_dict['dotted_forecast'] < left_edges[i+1]:
            colors.append('coral')
        else:
            colors.append('royalblue')
    
    alg_name = str(base_alg)[str(base_alg).find('model.') + 6:-2]
    
    if ax:
        ax.bar(left_edges, density_dict['probs'], align='edge',
               width=0.9*(left_edges[1] - left_edges[0]), color=colors)
        ax.set_title(f'{alg_name}: density forecast with delay={delay}')
    else:
        plt.bar(left_edges, density_dict['probs'], align='edge',
               width=0.9*(left_edges[1] - left_edges[0]), color=colors)
        plt.title(f'{alg_name}: density forecast with delay={delay}')

In [4]:
def get_omega(ts, mode, bins=None, quantile=0.1):
    if mode == "basic":
        return np.histogram_bin_edges(ts, bins)
    if mode == "quantile":
        bin_width = int(np.quantile(abs(ts - ts.shift(1))[1:], quantile))
    elif mode == "mean":
        bin_width = int(np.mean(abs(ts - ts.shift(1))[1:]))
    
    if bin_width < 1:
        bin_width = 1
    
    min_o = int(np.floor(min(ts.values)))
    max_o = int(np.ceil(max(ts.values)))
    
    bin_edges = [min_o]
    while bin_edges[-1] < max_o:
        bin_edges.append(bin_edges[-1] + bin_width)
    
    return np.array(bin_edges)

In [5]:
def brier_loss(y_true, density_dict, p=2):
    """
    Returns np.array of brier scores.
    
    Parameters
    ----------
    y_true : float
        A true value.
    density_dict : dict
        Dict with bins and probabilities information.
    """
    bins_number = density_dict['probs'].size
    bins_true = [0] * bins_number

    for i in range(bins_number):
        if (density_dict['bins'][i] <= y_true <= density_dict['bins'][i + 1]):
            bins_true[i] = 1
            break
    
    brier_loss = np.sum((abs(density_dict['probs'] - bins_true))**p)
    
    if np.sum(bins_true) == 0:
        brier_loss += 1
        
    return brier_loss

In [6]:
def get_generalized_loss(y_true, density_dicts, loss_function, p):
    """
    Returns np.array of brier scores.
    
    Parameters
    ----------
    y_true : float
        A true value.
    density_dicts : dict or array-like of dicts
        Dicts with bins and probabilities information.
    """
    if type(density_dicts) == dict:
        density_dicts = [density_dicts]
    
    losses = [np.nan] * len(density_dicts)
    
    for density_dict_count, density_dict in enumerate(density_dicts):
        losses[density_dict_count] = loss_function(y_true, density_dict, p)
    
    return np.array(losses)

In [7]:
def avoid_overflowing(base, power_array):
    maximum = np.max(power_array)
    minimum = np.min(power_array)
    
    pmax = -np.log(base)/np.log(2) * maximum
    pmin = -np.log(base)/np.log(2) * minimum
    
    if np.abs(pmax-pmin) > 2097:
        print('Overflow is imminent. Further calculations are not advised')
        return base ** power_array
    power_shift = abs((51+pmin+pmax)/2)
    power_shift = power_shift + min(0, pmin - power_shift + 1023)
    
    power_array = power_array - np.abs(power_shift * np.log(2) / np.log(base))
    
    return base ** power_array

In [8]:
def get_generalized_prediction(ts, preds, omega, weights, loss_function, p, eta):
    generalized_predictions = []
    
    for w in zip(omega, omega[1:]):
        losses = get_generalized_loss((w[0] + w[1]) / 2, preds, loss_function, p)
        exp_losses = avoid_overflowing(np.e, -eta * losses)
        generalized_predictions.append(-(1 / eta) * np.log(np.sum(weights * exp_losses)))
        
    return np.array(generalized_predictions)

In [9]:
from scipy.optimize import fsolve

def s_equation(s, generalized_predictions, m=2):
    return np.sum([max(x,0) for x in s - generalized_predictions]) - m

In [10]:
def substitution_function(generalized_predictions, s, m=2):
    predictions = [max(x,0) / m for x in (s - generalized_predictions)]
    return np.array(predictions)

In [11]:
# def get_AA_density(omega_and_predictions, bins):
#     probs = [0] * (len(bins) - 1)
#     for i in range(len(bins) - 1):
#         for omega, pred in omega_and_predictions:
#             if bins[i] <= omega < bins[i + 1]:
#                 probs[i] += pred
#     return np.array(probs)

In [12]:
def update_weights(weights, losses, eta=1):
    exp_losses = avoid_overflowing(np.e, -eta * np.array(losses))
    new_weights = weights * exp_losses
    return new_weights / (np.sum(new_weights))

In [13]:
def aggregating_algorithm(ts, horizon, base_alg_dict, bins=10, omega_mode="basic",
                          loss_function=brier_loss, p=2, weights=None, eta=1):
    """
    Returns density dictionary {'bins': np.array, 'probs': np.array, 'dotted_forecast': float}.
    
    Parameters
    ----------
    ts : array_like
        The time series to model.
    delay : int
        The delay to forecast.
    base_alg_dict : dict
        The dictionary with the names of base algoritms and their params:
        base_alg {ExponentialSmoothing, SimpleExpSmoothing, Holt} - name of base algorithm.
        base_alg_params : dict - a dictionary of base algorithm's parameters.
    loss_function : function
        The loss function of aggregating algorithm.
    """
    
    T = len(ts)
    K = len(base_alg_dict)
    
    AA_preds = [{} for i in range(T + horizon)]
    BA_preds = np.array([{} for i in range((T + horizon) * K)]).reshape(K, T + horizon) 
    
    if not weights:
        weights = np.full(K, 1/K)
    
    omega = get_omega(ts, mode=omega_mode, bins=bins) # здесь в Omega прогнозы не учитываются
 
    i = 0
    for base_alg, base_alg_params in base_alg_dict.items():
        BA_preds[i] = get_density_forecast(ts, horizon, base_alg, base_params=base_alg_params,
                                           omega=omega, fittedvalues=True)
        i += 1
        
        
    losses, prev_losses = None, None
        
    for t in tqdm(range(T + horizon)):
        preds = BA_preds[:, t]
        
        if not prev_losses:
            prev_losses = [loss_function(ts.values[t], pred, p) for pred in preds]  # cheat
              
        generalized_predictions = get_generalized_prediction(ts.values[:t], preds, omega,
                                                             weights, loss_function, p, eta)
        
        #solving the equation to find s
        s_init = np.max(generalized_predictions)
        s = fsolve(s_equation, s_init, args=generalized_predictions)
        
        #get real prediction with substitution function
        AA_preds[t]['bins'] = BA_preds[:, t][0]['bins']
        real_predictions = substitution_function(generalized_predictions, s)
        AA_preds[t]['probs'] = real_predictions
        
        #update weights 
        if t < T:            
            losses = [loss_function(ts.values[t], pred, p) for pred in preds]
            weights = update_weights(weights, losses, eta)

            prev_losses = losses
    
    return AA_preds