## Functions

### Functions to eval performance

In [0]:
import pandas as pd
from typing import Literal, Optional
import plotly.graph_objects as go
from datetime import datetime


In [0]:
def calculate_errors(df_eval:pd.DataFrame, actuals_col:str,fc_col:str, error_to_calculate:Literal["Bias", "AbsError", "SqError","RelAbsError"], error_colname_suffix:str, outside_range_threshold:int=0.1)->pd.DataFrame:
    """
    Calculates error metrics based on the provided data. Bear in mind that this function appends the columns to the existing dataframe.

    Args:
        df_eval (pd.DataFrame): The DataFrame containing the evaluation data.
        actuals_col (str): The name of the column in `df_eval` that represents the actual values.
        fc_col (str): The name of the column in `df_eval` that represents the forecasted values.
        error_to_calculate (Literal["Bias", "AbsError", "SqError", "RelAbsError"]): The type of error to calculate. 
            Possible values:
                - "Bias": Calculate the bias between forecasted and actual values.
                - "AbsError": Calculate the absolute error between forecasted and actual values.
                - "SqError": Calculate the squared error between forecasted and actual values.
                - "RelAbsError": Calculate the relative absolute error between forecasted and actual values.
        error_colname_suffix (str): The suffix to append to the error column name in `df_eval`.
        outside_range_threshold (int): Threshold for the permissible error range of the relative error. Default 0.1 

    Returns:
        pd.DataFrame: The DataFrame `df_eval` with the calculated error column added.

    Raises:
        ValueError: If the `error_to_calculate` argument is not one of the valid error types.

    Examples:
        >>> data = pd.DataFrame({'actuals': [1, 2, 3], 'forecast': [1.2, 2.5, 2.8]})
        >>> calculate_errors(data, 'actuals', 'forecast', 'AbsError', 'error')
           actuals  forecast  AbsError_error
        0        1       1.2             0.2
        1        2       2.5             0.5
        2        3       2.8             0.2
    """
    error_colname = error_to_calculate+'_'+ error_colname_suffix
    # Calculate the bias
    df_eval[error_colname] = df_eval[fc_col]-df_eval[actuals_col]
    if (error_to_calculate == 'AbsError')|(error_to_calculate == 'RelAbsError'):
      df_eval[error_colname] = df_eval[error_colname].abs()
      if error_to_calculate == 'RelAbsError':
        df_eval[error_colname] = df_eval[error_colname].div(df_eval[actuals_col])
        df_eval['Outside_range_'+ error_colname_suffix] = df_eval[error_colname]>outside_range_threshold
    if error_to_calculate == 'SqError':
      df_eval[error_colname] = df_eval[error_colname].pow(2)
    
    return df_eval


In [0]:
def plot_inbound_fc(inbound_df:pd.DataFrame, actuals_col:str, fc_col:str, show_out_of_range:bool, date_col:str, outside_range_col:Optional[str]=None):
    """
    Plots the forecasted and actual values from the provided DataFrame `inbound_df`,
    along with the outside range data points.

    Args:
        inbound_df (pd.DataFrame): The DataFrame containing the inbound data.
        actuals_col (str): The name of the column in `inbound_df` that represents the actual values.
        fc_col (str): The name of the column in `inbound_df` that represents the forecasted values.
        show_out_of_range (bool): Plots a vertical line on the days where the forecast was out of range
        outside_range_col (str): The name of the column in `inbound_df` that represents whether a data point is outside the range (defined with a bool flag).
        date_col (str): The name of the column in `inbound_df` that represents the dates.

    Returns:
        None

    Examples:
        >>> data = pd.DataFrame({'Dates': ['2023-06-01', '2023-06-02', '2023-06-03'],
                                'fc_col': [1.2, 2.5, 2.8],
                                'actuals_col': [1, 2, 3],
                                'outside_range_col': [False, False, True]})
        >>> plot_inbound_fc(data, 'actuals_col', 'fc_col',  True, 'Dates', 'outside_range_col')
        (Plot of forecasted and actual values with outside range data points displayed)
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[fc_col], name=fc_col,line=dict(color='#0000FF'),showlegend=True))
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[actuals_col], name=actuals_col,line=dict(color='#00FF00'),showlegend=True))
    if show_out_of_range:
      ## Add outside of range traces (this implementation was preferred over the vline method to have the legend included)
      outside_range_dates = inbound_df[inbound_df[outside_range_col]==True][date_col].reset_index(drop=True)
      boundaries_category = [inbound_df[actuals_col].min(),inbound_df[actuals_col].max()]
      for i in range(len(outside_range_dates)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates[i],outside_range_dates[i]], y= boundaries_category, mode='lines', line=dict(color='#FF0000', width=0.5, dash='dash'), name=outside_range_col,showlegend=showlegend))
    fig.show()

In [0]:
def plot_two_inbound_fc(inbound_df:pd.DataFrame, actuals_col:str, fc_col_1:str, fc_col_2:str, show_out_of_range:bool, date_col:str, outside_range_col_1:Optional[str]=None, outside_range_col_2:Optional[str]=None):
    """
    Plots two inbound forecasts along with actuals for a given DataFrame.

    Parameters:
        inbound_df (pd.DataFrame): The DataFrame containing the data to be plotted.
        actuals_col (str): The column name in 'inbound_df' containing the actual data points.
        fc_col_1 (str): The column name in 'inbound_df' containing the first forecast data points.
        fc_col_2 (str): The column name in 'inbound_df' containing the second forecast data points.
        show_out_of_range (bool): If True, outside-of-range data will be plotted as dashed lines.
        date_col (str): The column name in 'inbound_df' containing the dates corresponding to the data points.
        outside_range_col_1 (Optional[str]): The column name in 'inbound_df' indicating the outside-of-range points for the first forecast. Default is None.
        outside_range_col_2 (Optional[str]): The column name in 'inbound_df' indicating the outside-of-range points for the second forecast. Default is None.

    Returns:
        None

    This function creates a line plot comparing two inbound forecasts (fc_col_1 and fc_col_2) with the actual data (actuals_col).
    The plot is generated using Plotly's go.Figure() and is displayed using the 'fig.show()' method.

    If 'show_out_of_range' is True, dashed lines will be added to the plot to indicate points that are outside of the expected range.
    The outside-of-range data is determined by the columns 'outside_range_col_1' and 'outside_range_col_2' for the respective forecasts.
    These outside-of-range points will be plotted as vertical dashed lines.

    Note:
    - The 'inbound_df' DataFrame must contain columns for 'actuals_col', 'fc_col_1', and 'fc_col_2' with numeric data.
    - If 'show_out_of_range' is True, 'outside_range_col_1' and 'outside_range_col_2' must be specified as well.
    - The 'date_col' column should contain date or time information corresponding to the data points.

    Example:
    inbound_df = pd.DataFrame({
        'Dates': ['2023-07-01', '2023-07-02', '2023-07-03', '2023-07-04', '2023-07-05'],
        'Actuals': [100, 120, 110, 105, 125],
        'Forecast_1': [90, 100, 95, 105, 110],
        'Forecast_2': [95, 105, 100, 110, 115],
        'Out_of_Range_1': [False, True, False, True, False],
        'Out_of_Range_2': [False, False, True, False, True]
    })

    plot_two_inbound_fc(inbound_df, 'Actuals', 'Forecast_1', 'Forecast_2', True, 'Dates', 'Out_of_Range_1', 'Out_of_Range_2')
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[fc_col_2], name=fc_col_2,line=dict(color='#FF0000'),showlegend=True))
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[fc_col_1], name=fc_col_1,line=dict(color='#0000FF'),showlegend=True))
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[actuals_col], name=actuals_col,line=dict(color='#00FF00'),showlegend=True))
    if show_out_of_range:
      ## Add outside of range traces (this implementation was preferred over the vline method to have the legend included)
      boundaries_category = [inbound_df[actuals_col].min(),inbound_df[actuals_col].max()]

      outside_range_dates_1 = inbound_df[inbound_df[outside_range_col_1]==True][date_col].reset_index(drop=True)
      for i in range(len(outside_range_dates_1)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates_1[i],outside_range_dates_1[i]], y= boundaries_category, mode='lines', line=dict(color='#000099', width=0.5, dash='dash'), name=outside_range_col_1,showlegend=showlegend))

      outside_range_dates_2 = inbound_df[inbound_df[outside_range_col_2]==True][date_col].reset_index(drop=True)
      for i in range(len(outside_range_dates_2)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates_2[i],outside_range_dates_2[i]], y= boundaries_category, mode='lines', line=dict(color='#990000', width=0.5, dash='dash'), name=outside_range_col_2,showlegend=showlegend))
    fig.show()

In [0]:
def plot_inbound_error(inbound_df:pd.DataFrame, error_col:str,  show_out_of_range:bool, date_col:str, outside_range_col:Optional[str]=None):
    """
    Plots the forecasting error from the provided DataFrame `inbound_df`,
    along with the outside range data points.

    Args:
        inbound_df (pd.DataFrame): The DataFrame containing the inbound data.
        error_col (str): The name of the column in `inbound_df` that represents the error values.
        show_out_of_range (bool): Plots a vertical line on the days where the forecast was out of range
        outside_range_col (str): The name of the column in `inbound_df` that represents whether a data point is outside the range (defined with a bool flag).
        date_col (str): The name of the column in `inbound_df` that represents the dates.

    Returns:
        None

    Examples:
        >>> data = pd.DataFrame({'Dates': ['2023-06-01', '2023-06-02', '2023-06-03'],
                                'error_col': [1.2, 2.5, 2.8], 
                                'outside_range_col': [False, False, True]})
        >>> plot_inbound_fc(data, 'error_col', True, 'Dates', 'outside_range_col')
        (Plot of error_col values with outside range data points displayed)
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[error_col], name=error_col,line=dict(color='#0000FF'),showlegend=True))
    if show_out_of_range:
      ## Add outside of range traces (this implementation was preferred over the vline method to have the legend included)
      outside_range_dates = inbound_df[inbound_df[outside_range_col]==True][date_col].reset_index(drop=True)
      boundaries_category = [inbound_df[error_col].min(),inbound_df[error_col].max()]
      for i in range(len(outside_range_dates)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates[i],outside_range_dates[i]], y= boundaries_category, mode='lines', line=dict(color='#FF0000', width=0.5, dash='dash'), name=outside_range_col,showlegend=showlegend))
    fig.show()

In [0]:
def plot_inbound_two_errors(inbound_df:pd.DataFrame, error_col_1:str, error_col_2:str, show_out_of_range:bool, date_col:str, outside_range_col_1:Optional[str]=None, outside_range_col_2:Optional[str]=None):
    """
    Plots two sets of inbound errors for a given DataFrame.

    Parameters:
        inbound_df (pd.DataFrame): The DataFrame containing the data to be plotted.
        error_col_1 (str): The column name in 'inbound_df' containing the first set of error data points.
        error_col_2 (str): The column name in 'inbound_df' containing the second set of error data points.
        show_out_of_range (bool): If True, outside-of-range data will be plotted as dashed lines.
        date_col (str): The column name in 'inbound_df' containing the dates corresponding to the data points.
        outside_range_col_1 (Optional[str]): The column name in 'inbound_df' indicating the outside-of-range points for the first set of errors. Default is None.
        outside_range_col_2 (Optional[str]): The column name in 'inbound_df' indicating the outside-of-range points for the second set of errors. Default is None.

    Returns:
        None

    This function creates a line plot comparing two sets of inbound errors (error_col_1 and error_col_2).
    The plot is generated using Plotly's go.Figure() and is displayed using the 'fig.show()' method.

    If 'show_out_of_range' is True, dashed lines will be added to the plot to indicate points that are outside of the expected range.
    The outside-of-range data is determined by the columns 'outside_range_col_1' and 'outside_range_col_2' for the respective errors.
    These outside-of-range points will be plotted as vertical dashed lines.

    Note:
    - The 'inbound_df' DataFrame must contain columns for 'error_col_1' and 'error_col_2' with numeric data.
    - If 'show_out_of_range' is True, 'outside_range_col_1' and 'outside_range_col_2' must be specified as well.
    - The 'date_col' column should contain date or time information corresponding to the data points.

    Example:
    inbound_df = pd.DataFrame({
        'Dates': ['2023-07-01', '2023-07-02', '2023-07-03', '2023-07-04', '2023-07-05'],
        'Error_Set_1': [5, 7, 8, 6, 4],
        'Error_Set_2': [3, 6, 4, 5, 2],
        'Out_of_Range_1': [False, True, False, True, False],
        'Out_of_Range_2': [False, False, True, False, True]
    })

    plot_inbound_two_errors(inbound_df, 'Error_Set_1', 'Error_Set_2', True, 'Dates', 'Out_of_Range_1', 'Out_of_Range_2')
    """
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[error_col_1], name=error_col_1,line=dict(color='#0000FF'),showlegend=True))
    fig.add_trace(go.Scatter(x=inbound_df.Dates, y=inbound_df[error_col_2], name=error_col_2,line=dict(color='#FF0000'),showlegend=True))
    if show_out_of_range:
      ## Add outside of range traces (this implementation was preferred over the vline method to have the legend included)
      boundaries_category = [inbound_df[error_col_1].min(),inbound_df[error_col_1].max()]
      outside_range_dates_1 = inbound_df[inbound_df[outside_range_col_1]==True][date_col].reset_index(drop=True)
      for i in range(len(outside_range_dates_1)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates_1[i],outside_range_dates_1[i]], y= boundaries_category, mode='lines', line=dict(color='#000099', width=0.5, dash='dash'), name=outside_range_col_1,showlegend=showlegend))
      boundaries_category = [inbound_df[error_col_2].min(),inbound_df[error_col_2].max()]
      outside_range_dates_2 = inbound_df[inbound_df[outside_range_col_2]==True][date_col].reset_index(drop=True)
      for i in range(len(outside_range_dates_2)):
        showlegend = False
        if i ==0:
          showlegend = True
        fig.add_trace(go.Scatter(x=[outside_range_dates_2[i],outside_range_dates_2[i]], y= boundaries_category, mode='lines', line=dict(color='#990000', width=0.5, dash='dash'), name=outside_range_col_2,showlegend=showlegend))
    fig.show()

In [0]:
def describe_inbound_fc(inbound_fc:pd.DataFrame, cols_to_keep:list)->pd.DataFrame:
    """
    Generates descriptive statistics for the selected columns of the provided DataFrame `inbound_fc`.

    Args:
        inbound_fc (pd.DataFrame): The DataFrame containing the inbound forecast data.
        cols_to_keep (list): A list of column names to include in the descriptive statistics.

    Returns:
        pd.DataFrame: The DataFrame with descriptive statistics for the selected columns.

    Examples:
        >>> data = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': ['A', 'B', 'C']})
        >>> describe_inbound_fc(data, ['col1', 'col2'])
          Metrics  col1  col2
        0   count   3.0   3.0
        1    mean   2.0   5.0
        2     std   1.0   1.0
        3     min   1.0   4.0
        4     25%   1.5   4.5
        5     50%   2.0   5.0
        6     75%   2.5   5.5
        7     max   3.0   6.0
    """
    described_df = inbound_fc.describe(include='all')[cols_to_keep].rename_axis('Metrics').reset_index(drop=False).copy()
    return described_df