In [None]:
from collections import defaultdict
import pickle
import re

from covvvr.functions import *
import numpy as np

from scripts.constants import DATA_DIR


In [None]:
functions = [
    NGauss(2),
    NGauss(4),
    NGauss(8),
    NGauss(16),
    NCamel(2),
    NCamel(4),
    NCamel(8),
    NCamel(16),
    EntangledCircles(),
    AnnulusWCuts(),
    ScalarTopLoop(),
    NPolynomial(18),
    NPolynomial(54),
    NPolynomial(96),
]


---
# Load in Data & Format

Creates table of the VRP and time taken for Vegas, 1 CV, 2 CVs and every CV.

In [None]:
evts = 5000
iters = 50
N = 100

fname = f"compare_{iters}x{evts}_avg{N}.pkl"
with open(DATA_DIR / fname, "rb") as f:
    datas = pickle.load(f)

for data, value_types in datas.items():
    if data == "metadata":
        # save metadata separately to use later
        metadata = value_types
        continue

    # data == variances, vprs, times, means
    for func, values in value_types.items():
        # func == 2D Gaussian, etc.
        for cv, value in values.items():
            # Find average over the N runs
            datas[data][func][cv] = np.mean(value)


# Make Table

In [None]:
row_copies = {"Gaussian": 4, "Camel": 4, "Polynomial": 3}


def make_table(
    place: str = "h!",
    title_bg: str = "black",
    title_txt: str = "white",
    sub_bg: str = "white",
    sub_txt: str = "black",
    err: str = "red",
    err_alph1: float = 90,
    err_alph2: float = 60,
    err_cut1: float = 0.10,
    err_cut2: float = 0.05,
):
    """
    Build table used in paper.

    Parameters:
    place (default "h!") - The LaTeX placement of the table, e.g. h, h!, t, b, c and so on
    title_bg (default "black") - The background color of the titles
    title_txt (default "white") - The text color of the titles
    sub_bg (default "white") - The background color of the subtitles
    sub_txt (default "black") - The text color of the subtitles
    err (default "red") - The color for the error cells, where the difference between the
        found mean and the true value (normalized by the true value) is above `err_cut2`
    err_alph1 (default 90) - The alpha value (0 - 100) applied to an error cell when the error
        is above `err_cut1`
    err_alph2 (default 60) - The alpha value (0 - 100) applied to the error cell when the error
        is above `err_cut2` but less than `err_cut1`
    err_cut1 (default 0.10) - The maximum cutoff value for designating a cell as an error cell
    err_cut2 (default 0.05) - The secondary cutoff value
    """

    # Formatting for the first row of the table
    def title(label: str, struct: str, ncols: int):
        return f"\\multicolumn{{{ncols}}}{{{struct}}}{{\\textcolor{{{title_txt}}}{{{label}}}}}"

    # Formatting for the second row of the table
    def subtitle(label: str, struct: str, ncols: int):
        return f"\\multicolumn{{{ncols}}}{{{struct}}}{{\\textcolor{{{sub_txt}}}{{{label}}}}}"

    # Formatting for whether a cell should be colored as an error cell
    def cell_color(calc: float, true: float):
        # How error is defined
        err_val = abs(calc - true) / true
        if err_val > err_cut2:
            return (
                f"\\cellcolor{{{err}!{err_alph1 if err_val > err_cut1 else err_alph2}}}"
            )
        return ""

    # Environment initialization and first two layers
    sub_col = f"{subtitle('VRP', 'c|', 1)} & {subtitle('Time (s)', 'c||', 2)}"
    init_table = (
        f"\\begin{{table}}[{place}]\centering\n"
        "\t\\resizebox{\\columnwidth}{!}{\n"
        + "\t\t\\begin{tabular}{|| l | r || r || r | r r || r | r r || r | r r ||}\n"
        + f"\t\t\t\\rowcolor{{{title_bg}}} {title('', '|c||', 2)} & {title('Vegas', '|c||', 1)} & {title('1 CV', 'c||', 3)} & {title('2 CVs', 'c||', 3)} & {title('All CVs', 'c|', 3)}\\\\\n"
        + f"\t\t\t\\rowcolor{{{sub_bg}}} {subtitle('Function', '||c|', 1)} & {subtitle('Dim', 'c||', 1)} & {subtitle('Time (s)', 'c||', 1)} & {sub_col} & {sub_col} & {sub_col}\\\\\n"
        + f"\t\t\t\\hline\\hline\n"
    )

    # These are used to decided whether or not to add an hline or use multirow command
    mid_table, prev_name = "", ""
    # Go through each function
    for func, true_value, dim in zip(
        metadata["functions"], metadata["true_values"], metadata["dims"]
    ):
        # Find generic name (i.e. without an dimension mention)
        gen_name = re.findall(r"^(?:\d+D\W)?(\D*)$", func)[0]
        # Use it to determine if we add an hline to split between function rows
        if func != metadata["functions"][0]:
            mid_table += " \\\\\n"
            if prev_name != gen_name:
                mid_table += "\t\t\t\\hline\n"

        # Gather all relevant data
        vegas_time = datas["times"][func][0]
        cv1_time = datas["times"][func][1]
        cv2_time = datas["times"][func][2]
        cvall_time = datas["times"][func]["all"]
        cv1_vpr = datas["vprs"][func][1]
        cv2_vpr = datas["vprs"][func][2]
        cvall_vpr = datas["vprs"][func]["all"]
        vegas_mean = datas["means"][func][0]
        cv1_mean = datas["means"][func][1]
        cv2_mean = datas["means"][func][2]
        cvall_mean = datas["means"][func]["all"]

        # Should the cell be colored as an error or no?
        vegas_color = f"{cell_color(vegas_mean, true_value)}"
        cv1_color = f"{cell_color(cv1_mean, true_value)}"
        cv2_color = f"{cell_color(cv2_mean, true_value)}"
        cvall_color = f"{cell_color(cvall_mean, true_value)}"

        # Entries to put into the row
        vegas_entry = f"{vegas_color}{vegas_time:.2f}"
        cv1_entry = f"{cv1_color}{100 * cv1_vpr:.2f}\\% & {cv1_color}{cv1_time:.2f} & {cv1_color}({cv1_time / vegas_time:.1f})"
        cv2_entry = f"{cv2_color}{100 * cv2_vpr:.2f}\\% & {cv2_color}{cv2_time:.2f} & {cv2_color}({cv2_time / vegas_time:.1f})"
        cvall_entry = f"{cvall_color}{100 * cvall_vpr:.2f}\\% & {cvall_color}{cvall_time:.2f} & {cvall_color}({cvall_time / vegas_time:.1f})"

        # If function has multiple dimensions, e.g. NGauss or NCamel...
        if re.findall(r"^\d+D\W(\D*)$", func):
            # Are we starting it and should add a multirow? e.g. 2D Gaussian
            if gen_name != prev_name:
                rows = row_copies[gen_name]
                row = f"\t\t\t\\multirow{{{rows}}}{{6em}}{{{gen_name}}}"
            else:
                # Or we aren't and so shouldn't, e.g. 4D Gaussian
                row = f"\t\t\t"
            row += (
                f" & {dim} & {vegas_entry} & {cv1_entry} & {cv2_entry} & {cvall_entry}"
            )
        else:
            # If function isn't redone with multiple dimensions, this is the row format
            row = f"\t\t\t{func} & {dim} & {vegas_entry} & {cv1_entry} & {cv2_entry} & {cvall_entry}"
        mid_table += row
        prev_name = gen_name

    # Get all relevant metadata for the caption
    caption = (
        f"Results for {metadata['iters']} iterations and {metadata['evts']} events per iteration averaged over "
        + f"{metadata['N']} runs. The lighter (darker) colored cells are for runs that are more than {err_cut2 * 100:.1f}\% "
        + f"({err_cut1 * 100:.1f}\%) off from the true value. The values in the parentheses in the time column are how much "
        + "longer that instance took to run compared to the corresponding Vegas instance."
    )
    end_table = (
        "\\\\\n"
        + "\t\t\t\\hline\n"
        + "\t\t\\end{tabular}\n"
        + "\t}\n"
        + f"\t\\caption{{{caption}}}\n"
        + f"\t\\label{{tab:{iters}x{evts}_compare}}\n"
        + "\\end{table}"
    )

    return init_table + mid_table + end_table


In [None]:
print(make_table())


---

# Load in Data

Creates a table showing the mean value for Vegas and 1 CV compared to the true value and their RMS's normalized by the true value.

In [None]:
evts = 5000
iters = 50
N = 10

fname = f"means_{iters}x{evts}_avg{N}.pkl"
with open(DATA_DIR / fname, "rb") as f:
    data = pickle.load(f)
    metadata = data["metadata"]
    data = data["means"]


In [None]:
row_copies = {"Gaussian": 4, "Camel": 4, "Polynomial": 3}


def make_table(
    place: str = "h!",
    title_bg: str = "black",
    title_txt: str = "white",
    sub_bg: str = "white",
    sub_txt: str = "black",
    err: str = "red",
    err_alph1: float = 90,
    err_alph2: float = 60,
    err_cut1: float = 0.10,
    err_cut2: float = 0.05,
):
    """
    Build table used in paper.

    Parameters:
    place (default "h!") - The LaTeX placement of the table, e.g. h, h!, t, b, c and so on
    title_bg (default "black") - The background color of the titles
    title_txt (default "white") - The text color of the titles
    sub_bg (default "white") - The background color of the subtitles
    sub_txt (default "black") - The text color of the subtitles
    err (default "red") - The color for the error cells, where the difference between the
        found mean and the true value (normalized by the true value) is above `err_cut2`
    err_alph1 (default 90) - The alpha value (0 - 100) applied to an error cell when the error
        is above `err_cut1`
    err_alph2 (default 60) - The alpha value (0 - 100) applied to the error cell when the error
        is above `err_cut2` but less than `err_cut1`
    err_cut1 (default 0.10) - The maximum cutoff value for designating a cell as an error cell
    err_cut2 (default 0.05) - The secondary cutoff value
    """

    # Formatting for the first row of the table
    def title(label: str, struct: str, ncols: int):
        return f"\\multicolumn{{{ncols}}}{{{struct}}}{{\\textcolor{{{title_txt}}}{{{label}}}}}"

    # Formatting for the second row of the table
    def subtitle(label: str, struct: str, ncols: int):
        return f"\\multicolumn{{{ncols}}}{{{struct}}}{{\\textcolor{{{sub_txt}}}{{{label}}}}}"

    # Formatting for whether a cell should be colored as an error cell
    def cell_color(calc: float, true: float):
        # How error is defined
        err_val = abs(calc - true) / true
        if err_val > err_cut2:
            return (
                f"\\cellcolor{{{err}!{err_alph1 if err_val > err_cut1 else err_alph2}}}"
            )
        return ""

    # Environment initialization and first two layers
    sub_col = f"{subtitle('Vegas', 'c|', 1)} & {subtitle('CVInt', 'c||', 1)}"
    init_table = (
        f"\\begin{{table}}[{place}]\centering\n"
        + "\t\t\\begin{tabular}{|| l | r | r || r | r || r | r ||}\n"
        + f"\t\t\t\\rowcolor{{{title_bg}}} {title('', '||c||', 3)} & {title('Mean', '|c||', 2)} & {title('Normalized RMS', 'c||', 2)}\\\\\n"
        + f"\t\t\t\\rowcolor{{{sub_bg}}} {subtitle('Function', '||c|', 1)} & {subtitle('Dim', 'c|', 1)} & {subtitle('True Value', 'c||', 1)} & {sub_col} & {sub_col}\\\\\n"
        + f"\t\t\t\\hline\\hline\n"
    )

    # # These are used to decided whether or not to add an hline or use multirow command
    mid_table, prev_name = "", ""
    # # Go through each function
    for func, true_value, dim in zip(
        metadata["functions"], metadata["true_values"], metadata["dims"]
    ):
        # Find generic name (i.e. without an dimension mention)
        gen_name = re.findall(r"^(?:\d+D\W)?(\D*)$", func)[0]
        # Use it to determine if we add an hline to split between function rows
        if func != metadata["functions"][0]:
            mid_table += " \\\\\n"
            if prev_name != gen_name:
                mid_table += "\t\t\t\\hline\n"

        # Gather all relevant data
        vegas_mean = np.mean(data[func]["vegas"])
        cv_mean = np.mean(data[func]["cv"])
        vegas_rms = (
            np.sqrt(np.sum((data[func]["vegas"] - true_value) ** 2)) / true_value
        )
        cv_rms = np.sqrt(np.sum((data[func]["cv"] - true_value) ** 2)) / true_value

        # Entries to put into the row
        if vegas_mean < 1e-4 or cv_mean < 1e-4:
            mean_entry = f"{vegas_mean:.4e} & {cv_mean:.4e}"
        else:
            mean_entry = f"{vegas_mean:.6f} & {cv_mean:.6f}"
        if vegas_rms < 1e-4 or cv_rms < 1e-4:
            rms_entry = f"{vegas_rms:.4e} & {cv_rms:.4e}"
        else:
            rms_entry = f"{vegas_rms:.6f} & {cv_rms:.6f}"

        # If function has multiple dimensions, e.g. NGauss or NCamel...
        tv_format = "4e" if true_value < 1e-4 else "6f"
        if re.findall(r"^\d+D\W(\D*)$", func):
            # Are we starting it and should add a multirow? e.g. 2D Gaussian
            if gen_name != prev_name:
                rows = row_copies[gen_name]
                row = f"\t\t\t\\multirow{{{rows}}}{{6em}}{{{gen_name}}}"
            else:
                # Or we aren't and so shouldn't, e.g. 4D Gaussian
                row = f"\t\t\t"
            row += f" & {dim} & {true_value:.{tv_format}} & {mean_entry} & {rms_entry}"
        else:
            # If function isn't redone with multiple dimensions, this is the row format
            row = f"\t\t\t{func} & {dim} & {true_value:.{tv_format}} & {mean_entry} & {rms_entry}"
        mid_table += row
        prev_name = gen_name

    # Get all relevant metadata for the caption
    caption = (
        f"Results for {metadata['iters']} iterations and {metadata['evts']} events per iteration averaged over "
        + f"{metadata['N']} runs. The data collected for the CVInt column were done with the one control variate "
        + "that gives maximum variance reduction. The normalized RMS is given by $\sqrt{\sum_{i=1}^N(\hat{I}_i-I)^2}/I$."
    )
    end_table = (
        "\\\\\n"
        + "\t\t\t\\hline\n"
        + "\t\t\\end{tabular}\n"
        + f"\t\\caption{{{caption}}}\n"
        + f"\t\\label{{tab:{iters}x{evts}_compare}}\n"
        + "\\end{table}"
    )

    return init_table + mid_table + end_table


In [None]:
print(make_table())
