# Plot the Chosen Runtimes Statistic with Runtime Standard Deviations

## Default Values for Papermill Parameters

In [None]:
PARAM_RUNTIME_STATISTICS_PATH = "../outputs/statistics_merged_result_set.csv"
PARAM_OUT_FILENAME = "runtime_barplot_confidence.pdf"
PARAM_RUNTIME_STATISTIC = "time_mean"

## Import and Set Parameters

In [None]:
from subroc import util

import pandas as pd
import os

# import and configure matplotlib
import matplotlib.pyplot as plt
plt.rcParams.update({
    "text.usetex": True,
    "figure.autolayout": True
})

# fill environment variables into params
PARAM_RUNTIME_STATISTICS_PATH = util.prepend_experiment_output_path(PARAM_RUNTIME_STATISTICS_PATH)

# get environment variables
STAGE_OUTPUT_PATH = os.environ.get("STAGE_OUTPUT_PATH", "../outputs")

## Read the Runtime Statistics

In [None]:
statistics_df = pd.read_csv(PARAM_RUNTIME_STATISTICS_PATH)

## Define the Plots

In [None]:


def label_bars(bars, ax, logaxis):
    for bar in bars:
        height = bar.get_height()

        ax.text(bar.get_x() + bar.get_width()/2, height,
                "{:.2f}\n\n".format(height),
                ha="center", va="center")


def create_fig(x_no_oe, y_no_oe, yerr_no_oe, x_with_oe, y_with_oe, yerr_with_oe, xlabel, ylabel, logaxis, barwidth=0.3):
    # get the figure and axis
    fig = plt.figure(figsize=(3.3, 3.3/1.5))
    ax = fig.gca()

    # set axis labels
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    
    # create the grid
    ax.set_axisbelow(True)
    ax.grid(visible=True, which="major", axis="y", linestyle="dotted")

    # create the bar plots
    barshift = barwidth/2 + 0.05
    bars_no_oe = ax.bar([depth - barshift for depth in x_no_oe], y_no_oe,
        width=0.3,
        log=logaxis,
        yerr=yerr_no_oe,
        capsize=10,
        label="no pruning")
    bars_with_oe = ax.bar([depth + barshift for depth in x_with_oe], y_with_oe,
        width=0.3,
        log=logaxis,
        yerr=yerr_with_oe,
        capsize=10,
        label="pruning")
    
    # add legend
    ax.legend(loc="upper left")

    # add bar labels
    label_bars(bars_no_oe, ax, logaxis)
    label_bars(bars_with_oe, ax, logaxis)

    # layout configuration
    ax.set_xlim(0, max(x_no_oe)+1)
    ax.set_xticks(x_no_oe)

    ax.margins(y=0.2)
    if logaxis:
        ax.set_ylim(1)

    return fig



## Create the Plots for Each Interestingness Measure

In [None]:
df_groupby_qfs = statistics_df.groupby("qf_name", as_index=False)

plots_per_qf = {}

for qf_name in df_groupby_qfs.groups.keys():
    df_groupby_oe = statistics_df.iloc[df_groupby_qfs.groups.get(qf_name)].groupby("optimistic_estimate", as_index=False)

    no_oe_idx = df_groupby_oe.groups.get(False)
    depth_no_oe = statistics_df.iloc[no_oe_idx]["depth"]
    stat_no_oe = statistics_df.iloc[no_oe_idx][PARAM_RUNTIME_STATISTIC]
    std_no_oe = statistics_df.iloc[no_oe_idx]["time_std"]
    num_visited_subgroups_no_oe = statistics_df.iloc[no_oe_idx]["num_visited_subgroups"]

    with_oe_idx = df_groupby_oe.groups.get(True)
    depth_with_oe = statistics_df.iloc[with_oe_idx]["depth"]
    stat_with_oe = statistics_df.iloc[with_oe_idx][PARAM_RUNTIME_STATISTIC]
    std_with_oe = statistics_df.iloc[with_oe_idx]["time_std"]
    num_visited_subgroups_with_oe = statistics_df.iloc[with_oe_idx]["num_visited_subgroups"]

    fig_runtimes_linear = create_fig(depth_no_oe, stat_no_oe, std_no_oe, depth_with_oe, stat_with_oe, std_with_oe, xlabel="Maximum Pattern Length", ylabel="Average Runtime in s", logaxis=False)
    fig_runtimes_log = create_fig(depth_no_oe, stat_no_oe, std_no_oe, depth_with_oe, stat_with_oe, std_with_oe, xlabel="Maximum Pattern Length", ylabel="Average Runtime in s", logaxis=True)

    fig_counts_linear = create_fig(depth_no_oe, num_visited_subgroups_no_oe, None, depth_with_oe, num_visited_subgroups_with_oe, None, xlabel="Maximum Pattern Length", ylabel="\#Evaluated Subgroups", logaxis=False)
    fig_counts_log = create_fig(depth_no_oe, num_visited_subgroups_no_oe, None, depth_with_oe, num_visited_subgroups_with_oe, None, xlabel="Maximum Pattern Length", ylabel="\#Evaluated Subgroups", logaxis=True)

    plots_per_qf[qf_name] = [fig_runtimes_linear, fig_runtimes_log, fig_counts_linear, fig_counts_log]

## Write the Results

In [None]:
import matplotlib.backends.backend_pdf as backend_pdf

for qf_name in plots_per_qf.keys():
    pdf = backend_pdf.PdfPages(f"{STAGE_OUTPUT_PATH}/{qf_name}_{PARAM_OUT_FILENAME}")

    for fig in plots_per_qf[qf_name]:
        pdf.savefig(fig)

    pdf.close()