# Taguchi loss function

This module contains two function:
1) taguchi_loss_function - Generates a parabolic function using the USL, LSL , and target.
2) one_sided_loss_function - Generates a one sided loss function using the USL and target.

In [1]:
def taguchi_loss_function(data, 
                          USL, 
                          LSL, 
                          k = 1, 
                          kde_plot_label = 'kde plot',
                          show_kde = True,
                          show_mean = True,
                          show_annotations = True,
                          show_loss_annotation = True,
                          flip_loss_annotation = False,
                          show_spec_limit_xticks = True,
                          show_yticks = False,
                          kde_bandwidth = 1,
                          x_label = 'Quality Characteristic',
                          ax2_ylabel = '',
                          round_value=1, 
                          spec_buffer = 2):
    
    """
    Plots the Taguchi loss function for a dataset based on the USL and LSL. 

    This function visualizes the loss due to poor quality associated with deviations from 
    the target value according to Taguchi's Loss function. It overlays a 
    loss curve with key process information and optionally displays a KDE 
    plot of the data.

    Parameters:
    ----------
    data : array-like
        Dataset of the quality characteristic to analyze.
    USL : float
        Upper Specification Limit.
    LSL : float
        Lower Specification Limit.
    k : float, optional (default=1)
        Constant multiplier used in the Taguchi loss formula.
    kde_plot_label : str, optional (default='kde plot')
        Label for the KDE plot legend.
    show_kde : bool, optional (default=True)
        Whether to overlay a kernel density estimate (KDE) of the data.
    show_mean : bool, optional (default=True)
        Whether to mark the process mean on the loss curve.
    show_annotations : bool, optional (default=True)
        Whether to annotate the target, USL, LSL, and loss function.
    show_loss_annotation : bool, optional (default=True)
        Whether to display mean, standard deviation, and average loss at the mean point.
    flip_loss_annotation : bool, optional (default=False)
        [Not used currently.] Placeholder for flipping annotation position above/below mean.
    show_spec_limit_xticks : bool, optional (default=True)
        Whether to show x-axis ticks at the target and spec limits.
    show_yticks : bool, optional (default=False)
        Whether to show y-axis ticks on the loss curve plot.
    kde_bandwidth : float, optional (default=1)
        Bandwidth adjustment factor for the KDE plot.
    x_label : str, optional (default='Quality Characteristic')
        Label for the x-axis of the loss curve.
    ax2_ylabel : str, optional (default='')
        Label for the secondary y-axis (typically left blank or set to 'Density').
    round_value : int, optional (default=1)
        Decimal precision for rounding calculated statistics.
    spec_buffer : float, optional (default=2)
        Extra range added beyond specification limits on the x-axis.

    Returns:
    -------
    output_dict : dict
        Dictionary containing:
        - 'Results': DataFrame with mean, standard deviation, average loss, and slope at mean.
        - 'Loss Curve': DataFrame with x- and y-values of the loss curve.

    Notes:
    -----
    - The loss curve is generated using the vertex form of the parabolic equation: 
            y = a(x-h)**2 + k.
    - The loss curve is expressed in the context of manufacturing as follows:
            L(x) = k(x-T) ** 2
    - Here, k is a scaling factor, x is the observed value, and T is the target value.
    - Loss due to poor quality for values set outside the specification limits is constant.
    - Mean (Average) loss is calculated as: k * (standard deviation)^2 + (mean - target)^2
    """
    
    # Create list of values used to generate the parabola
    x_values = np.linspace(LSL - spec_buffer, USL + spec_buffer, 500)
        
    # Create empty list
    result = []
    
    # Calculate the limit_delta
    tolerance = USL - LSL
    
    # Calculate target
    target = round(LSL + (tolerance/2), round_value)
    
    for value in x_values:
        if value <= LSL:
            result.append(tolerance * (target - LSL) ** 2)
        elif value >= USL:
            result.append(tolerance * (USL - target) ** 2)
        else:
            result.append(tolerance * (value - target) ** 2)
            
    # Combine result list with values
    loss_curve_df = pd.DataFrame({'x-values':x_values,
                                  'y-values':result})
    
    # Find the closest x-value to spec_limit
    idx_closest = (loss_curve_df['x-values'] - 15).abs().idxmin()
    y_at_spec_limits = loss_curve_df.loc[idx_closest, 'y-values']
    
    # Calculate mean and standard deviation of data 
    mean = round(data.mean(), round_value)
    std = round(data.std(),round_value)
    
    # Conditionally calculate slope
    if (mean <= LSL) | (mean >= USL):
        slope = 0
    else:
        slope = round(tolerance * (mean - target) ** 2, round_value)

    # Calculate the loss 
    loss = round((k*(std)**2 + (mean - target)**2), round_value)
    
    # Plot the one sided loss function
    fig, ax1 = plt.subplots(figsize=(15,5),dpi=500)

    line = sns.lineplot(loss_curve_df,x='x-values',y='y-values',
                c='black',lw=3, ax=ax1)

    # Conditionally show location of the mean
    if show_mean == True:
        sns.scatterplot(x=[mean], 
                        y=[slope],
                        color='tab:blue',
                        edgecolor='black',
                        s=400, 
                        zorder=10, 
                        ax=ax1)
        
    # Create twin x-axis
    ax2 = ax1.twinx()
    
    if show_kde:
        # Generate kde plot of data
        sns.kdeplot(data, fill=True, 
                    bw_adjust=kde_bandwidth, 
                    label=kde_plot_label, 
                    ax=ax2)

    # Plot vertical lines at target and specification limits
    ax1.axvline(target, ymin=0, ymax=y_at_spec_limits, lw=3, ls='--', c='black', zorder=5)
    ax1.axvline(USL, ymin=0, ymax=y_at_spec_limits, lw=3, ls='--', c='black', zorder=5)
    ax1.axvline(LSL, ymin=0, ymax=y_at_spec_limits, lw=3, ls='--', c='black', zorder=5)
    
    if show_annotations == True:
        # Add annotations for target, spec_limit and L(x)
        ax1.annotate('Target', xy=(target, y_at_spec_limits),
                    ha='center', va='center', fontsize=14, zorder=14,
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

        ax1.annotate('USL', xy=(USL, y_at_spec_limits), 
                    zorder=14,
                    ha='center', va='center', fontsize=14,
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))
        
        ax1.annotate('LSL', xy=(LSL, y_at_spec_limits), 
                    zorder=14,
                    ha='center', va='center', fontsize=14,
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

        ax1.annotate('L(x)', xy=(USL + spec_buffer, y_at_spec_limits), 
                    zorder=14,
                    ha='center', va='center', fontsize=14,
                    bbox=dict(facecolor='white', edgecolor='white', boxstyle='round'))

    # After plotting (after scatterplot and lineplot are made)
    ylim = ax1.get_ylim()
    y_range = ylim[1] - ylim[0]

    # Define the offset as a small percentage of the y-range (e.g., 5%)
    loss_annotation_y_position = 0.05 * y_range
        
    # Show loss annotation at mean
    if show_loss_annotation:
        # Set position offset as negative or positive
        offset = loss_annotation_y_position if LSL <= mean <= USL else -loss_annotation_y_position
        
        ax1.annotate(
        f'Mean: {mean}',
        xy=(mean, slope),
        xytext=(mean, slope + offset),
        textcoords='data',
        zorder=14,
        color='black',
        ha='center',
        va='bottom',
        fontsize=14,
        bbox=dict(facecolor='white', edgecolor='white', alpha=0.9, boxstyle='round')
        )
    
    # Show xticks at the target and spec limit
    if show_spec_limit_xticks:
        # Specific xticks
        specific_ticks = [target, USL, LSL]
        ax1.set_xticks(specific_ticks)
        ax1.tick_params(axis='x', labelsize=14)
    
    # Conditionally show y-axis ticks
    if show_yticks == False:
        ax1.set_yticks([])
    
    # Set ax1 parameters
    ax1.set_xlim(LSL - spec_buffer, USL + spec_buffer)
    ax1.set_ylabel('Loss', fontsize=14)
    ax1.set_xlabel(x_label, fontsize=14)
    
    # Set ax2 ytick and ylabel
    ax2.set_yticks([])
    ax2.set_ylabel(ax2_ylabel) # This would be by default 'Density'
    
    # Despine
    sns.despine()
    
    # Show figure
    plt.show()
    
    # Create dataframe of results
    output_df = pd.DataFrame()
    output_df['Statistics'] = pd.Series(['Mean','s', 'Ave. Loss', 'Slope at Ave.'])
    output_df['Values'] = pd.Series([mean, std, loss, slope])
    
    # Create output dictionary
    output_dict = {'Results':output_df, 
                   'Loss Curve':loss_curve_df}
    
    return(output_dict)

In [9]:
def one_sided_loss_function(data, target, spec_limit, 
                            k = 1,
                            kde_plot_label = 'kde plot',
                            show_mean = True,
                            show_annotations = True,
                            show_loss_annotation = True,
                            loss_annotation_y_position = 10,
                            show_xticks = True,
                            kde_bandwidth = 1,
                            x_label = 'Quality Characteristic',
                            ax2_ylabel = '',
                            round_value=1,
                            below_target=5, 
                            past_spec=5):

    """
    Plots a one-sided loss function combined with a KDE plot of the data 
    and calculates related statistics.

    The one-sided loss function models quality loss occurring only when 
    the quality characteristic exceeds the target value, up to the specification limit.
    The function also visualizes the mean, slope, and loss at the mean, and outputs 
    a dictionary containing a summary of key statistics and the loss curve data.

    Parameters:
    ----------
    data : array-like
        The data representing the quality characteristic.
    target : float
        The ideal target value for the quality characteristic.
    spec_limit : float
        The upper specification limit.
    k : float, optional
        Constant for scaling the loss calculation (default is 1).
    kde_plot_label : str, optional
        Label for the KDE plot (default is 'kde plot').
    show_mean : bool, optional
        Whether to highlight and annotate the mean value on the loss curve (default is True).
    show_annotations : bool, optional
        Whether to annotate target, spec limit, and loss function on the plot (default is True).
    show_loss_annotation : bool, optional
        Whether to annotate the calculated loss near the mean (default is True).
    loss_annotation_y_position : float, optional
        Vertical position offset for the loss annotation (default is 10).
    show_xticks : bool, optional
        Whether to show x-axis ticks at the target and spec limit (default is True).
    kde_bandwidth : float, optional
        Bandwidth adjustment for the KDE plot (default is 1).
    x_label : str, optional
        Label for the x-axis (default is 'Quality Characteristic').
    ax2_ylabel : str, optional
        Label for the y-axis of the KDE plot (default is an empty string).
    round_value : int, optional
        Number of decimals to round mean, std, and loss values (default is 1).
    below_target : float, optional
        Range to extend below the target on the x-axis for the plot (default is 5).
    past_spec : float, optional
        Range to extend past the spec limit on the x-axis for the plot (default is 5).

    Returns:
    -------
    dict
        A dictionary containing:
        - 'Results': a DataFrame with the mean, standard deviation, average loss, and slope at the mean.
        - 'Loss Curve': a DataFrame with x-values and corresponding y-values representing the loss curve.
    
    Notes:
    -----
    - The loss function is zero for values below or equal to the target.
    - For values between the target and specification limit, loss increases quadratically.
    - Values beyond the specification limit are capped at a maximum loss.
    - The KDE plot helps visualize the distribution of the quality characteristic.
    """
        
    # Create list of values used to generate the parabola
    x_values = np.linspace(target-below_target, spec_limit+past_spec, 500)
    
    # Create empty list
    result = []
    
    # Calculate the limit_delta
    tolerance = spec_limit - target
    
    for value in x_values:
        if value >= spec_limit:
            result.append(tolerance * (spec_limit - target) ** 2)
        elif value <= target:
            result.append(0)
        else:
            result.append(tolerance * max(0,(value - target) ** 2))
    # Combine result list with values
    loss_curve_df = pd.DataFrame({'x-values':x_values,
                             'y-values':result})
    
    # Find the closest x-value to spec_limit
    idx_closest = (loss_curve_df['x-values'] - 15).abs().idxmin()
    y_at_spec_limit = loss_curve_df.loc[idx_closest, 'y-values']
    
    # Calculate mean and standard deviation of data 
    mean = round(data.mean(), round_value)
    std = round(data.std(),round_value)
    
    # Conditionally calculate the slope of the parabola at the data.mean()
    if mean <= target:
        slope = 0
    elif target < mean <= spec_limit:
        slope = tolerance * (mean - target) ** 2
    elif spec_limit < mean:
        slope = 0
    
    # Conditionally calculate loss at the data.mean()
    if mean < target:
        loss = 0
    else:
        loss = round((k * (std) ** 2 + (mean - target) ** 2), round_value)

    # Plot the one sided loss function
    fig, ax1 = plt.subplots(figsize=(15,5),dpi=500)

    line = sns.lineplot(loss_curve_df,x='x-values',y='y-values',
                c='black',lw=3, ax=ax1)

    if show_mean == True:
        sns.scatterplot(x=[mean], 
                        y=[slope], 
                        color='tab:blue',
                        edgecolor='black',
                        s=400, zorder=10, ax=ax1)
    
    # Create twin x-axis
    ax2 = ax1.twinx()
    
    # Generate kde plot of data
    sns.kdeplot(data, fill=True, 
                bw_adjust=kde_bandwidth, 
                label=kde_plot_label, 
                ax=ax2)

    # Plot vertical lines at target and specification limits
    ax1.axvline(target, ymin=0, ymax=y_at_spec_limit, lw=3, ls='--', c='black', zorder=5)
    ax1.axvline(spec_limit, ymin=0, ymax=y_at_spec_limit, lw=3, ls='--', c='black', zorder=5)
    
    if show_annotations == True:
        # Add annotations for target, spec_limit and L(x)
        ax1.annotate('Target', xy=(target,y_at_spec_limit),
                    ha='center', va='center', fontsize=14, zorder=14,
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

        ax1.annotate('Spec Limit', xy=(spec_limit,y_at_spec_limit), 
                    zorder=14,
                    ha='center', va='center', fontsize=14,
                    bbox=dict(facecolor='white', edgecolor='black', boxstyle='round'))

        ax1.annotate('L(x)', xy=(spec_limit+past_spec,y_at_spec_limit), 
                    zorder=14,
                    ha='center', va='center', fontsize=14,
                    bbox=dict(facecolor='white', edgecolor='white', boxstyle='round'))

    # Show loss annitation at mean
    if show_loss_annotation == True:
        ax1.annotate(
            f'Mean: {mean}', 
            xy=(mean, slope + loss_annotation_y_position),
            zorder=14,
            color='black',
            ha='center',
            va='center',
            fontsize=14,
            bbox=dict(facecolor='white', edgecolor='white', alpha=0.9, boxstyle='round'))
    
    # Show xticks at the target and spec limit
    if show_xticks == True:
        # Specific xticks
        specific_ticks = [target, spec_limit]
        ax1.set_xticks(specific_ticks)
        ax1.tick_params(axis='x', labelsize=14)
    
    # Set ax1 parameters
    ax1.set_xlim(target-below_target, spec_limit+past_spec)
    ax1.set_yticks([])
    ax1.set_ylabel('Loss', fontsize=14)
    ax1.set_xlabel(x_label, fontsize=14)
    
    # Set ax2 ytick and ylabel
    ax2.set_yticks([])
    ax2.set_ylabel(ax2_ylabel) # This would be by default 'Density'
    
    # Despine
    sns.despine()  
    
    # Show the plot
    plt.show()
    
    # Create dataframe of results
    output_df = pd.DataFrame()
    output_df['Statistics'] = pd.Series(['Mean','s', 'Ave. Loss', 'Slope at Ave.'])
    output_df['Values'] = pd.Series([mean, std, loss, slope])
    
    output_dict = {'Results':output_df, 
                   'Loss Curve':loss_curve_df}
    
    return(output_dict)