# Notebook for generating plots from the simulation results

In [1]:
from math import log2, ceil
import os
from typing import Optional

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mp

### Util functions for outputting plots

In [2]:
def save_file(ax, name: str, bbox_inches: Optional[str] = 'tight', format="pdf", **kwargs):
    ax.figure.savefig(f"./{name}.{format}", bbox_inches=bbox_inches, format=format, **kwargs)

def get_outputs_path(rand: int, adversary: bool):
    adv = "_adversary" if adversary else ""
    try:
        os.makedirs("results")
    except Exception:
        pass
    plot_path = f"results/init_{rand}{adv}"
    # create plot_path directory if it does not exist yet
    try:
        os.makedirs(plot_path)
    except Exception:
        pass
    return os.path.join(plot_path, f"max_init {rand}{adv}")


### Functions to parse the simulation results

In [3]:
def find_median(row: pd.Series, percentile: float = 0.5):
    cutoff = row.sum() * percentile
    curr = 0
    for i, v in row.items():
        curr += v
        if curr >= cutoff:
            return i


def read_result(file_name: str, col_name: str):
    population_size_col = "n"
    all_cols = ["time", col_name, population_size_col]

    all_data = pd.read_csv(file_name, delimiter=";", header=0, index_col=0, usecols=all_cols, engine="c")
    population_size = np.log2(all_data[population_size_col])
    parsed_col = all_data[col_name].apply(lambda row: pd.Series(row[:-1].split(",")).astype(int))
    min_col = parsed_col.idxmax(axis=1)
    med_col = parsed_col.apply(find_median, axis=1)
    max_col = parsed_col.iloc[:, ::-1].idxmax(axis=1)
    return pd.DataFrame({"Maximum": max_col, "Median": med_col, "Minimum": min_col,
                         "Population size ($\log n$)": population_size}, index=all_data.index)

def read_results(path: str, col_name: str, random_max: int = 10, n: int = 100000, adversary: bool = True, files: int = 5):
    adv = "_adversary" if adversary else ""
    min_cols, med_cols, max_cols = [], [], []
    df = None
    for index in range(0, files):
        file_path = os.path.join(path, f"random_max={random_max}_n={n}_{index}{adv}.csv")
        df = read_result(file_path, col_name)
        min_cols.append(df["Minimum"].rename(index))
        med_cols.append(df["Median"].rename(index))
        max_cols.append(df["Maximum"].rename(index))
    min_df = pd.DataFrame(min_cols).T.set_index(min_cols[0].index)
    med_df = pd.DataFrame(med_cols).T.set_index(med_cols[0].index)
    max_df = pd.DataFrame(max_cols).T.set_index(max_cols[0].index)
    results = pd.DataFrame({"Maximum": max_df.max(axis=1), "Median": med_df.median(axis=1), "Minimum": min_df.min(axis=1), "Population size ($\log n$)": df["Population size ($\log n$)"]}, index=min_df.index)
    return results


### Functions for plotting the parsed results

In [21]:
def plot(df: pd.DataFrame, y_axis_label: str, x_axis_label: str = "Parallel Time", ax: Optional[plt.Axes] = None, **kwargs):
    figsize = (7.5, 4)

    mp.rc('font', size=15)
    mp.rc('axes', labelsize=15, titlesize=16, titleweight="normal")
    mp.style.use("tableau-colorblind10")
    data = df.loc[:, df.columns != "Population size ($\log n$)"]

    if ax is None:
        _, ax = plt.subplots()
    data.plot(ax=ax, figsize=figsize, **kwargs)
    l = ax.get_children()
    l[0].set_zorder(3)
    l[1].set_zorder(2)
    l[2].set_zorder(1)

    if "Population size ($\log n$)" in df.columns:
        df["Population size ($\log n$)"].plot(ax=ax, linestyle=":", sharex=True)
    ax.set(xlabel=x_axis_label, ylabel=y_axis_label)
    ax.legend()
    return ax

def plot_examples(path: str, exp_start: int, exp_end: int, rand: int, adversary: bool, examples: int = 1):
    adv = "_adversary" if adversary else ""
    plot_filename = get_outputs_path(rand, adversary)
    for i in range(examples):
        for exp in range(exp_start, exp_end + 1):
            for col_name, y_axis_name in [("estimate", "Estimate of $\log n$"), ("max", "Maximum GRV"), ("interactions", "Last wrap-around"), ("timer", "Time")]:
                file_name = os.path.join(path, f"random_max={rand}_n={10 ** exp}_{i}{adv}.csv")
                df = read_result(file_name=file_name, col_name=col_name)
                ax = plot(df, y_axis_label=f"{col_name.title()}")
                save_file(ax, f"{plot_filename}_example_{col_name}__n={exp}_{i}")
                plt.close(ax.figure)

def plot_aggregates(path: str, exp_start: int, exp_end: int, rand: int, adversary: bool, files: int):
    # aggregates the results of multiple simulations with the same parameters, only varying population size
    upper_ylim = None if rand == 60 else 50
    plot_filename = get_outputs_path(rand, adversary)
    min_list, med_list, max_list, idx = [], [], [], []
    for exp in range(exp_start, exp_end + 1):
        df = read_results(path=path, col_name="estimate", random_max=rand, n=10 ** exp, adversary=adversary, files=files)
        ax = plot(df, y_axis_label="Estimate of $\log n$", ylim=[-0.6, upper_ylim])
        save_file(ax, f"{plot_filename}_aggregate_estimate_{exp}")
        plt.close(ax.figure)
        dev = (df / log2(10 ** exp)).iloc[500:]
        min_list.append(dev["Minimum"].min())
        med_list.append(dev["Median"].median())
        max_list.append(dev["Maximum"].max())
        idx.append(exp)
    deviation = pd.DataFrame({"Maximum": max_list, "Medium": med_list, "Minimum": min_list}, index=idx)
    ax = plot(deviation, "Relative deviation from $\log n$", "Population size ($\log_{10}$)")
    save_file(ax, f"{plot_filename}_deviation")
    plt.close(ax.figure)

def plot_all(exp_start: int, exp_end: int, rand: int, adversary: bool, files: int):
    plot_examples("../outputs/", exp_start=exp_start, exp_end=exp_end, rand=rand, adversary=adversary, examples=1)
    plot_aggregates("../outputs/", exp_start=exp_start, exp_end=exp_end, rand=rand, adversary=adversary, files=files)

In [22]:
files = 96
plot_all(exp_start=1, exp_end=5, rand=1, adversary=False, files=files)
plot_all(exp_start=1, exp_end=6, rand=10, adversary=False, files=files)
plot_all(exp_start=1, exp_end=6, rand=60, adversary=False, files=files)
plot_all(exp_start=1, exp_end=6, rand=1, adversary=True, files=files)