# Computer search for codes

## General (TODO: merge with the other)

In [None]:
import itertools
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline
import matplotlib.pyplot as plt
from math import ceil
import matplotlib.ticker as mtick
from ipywidgets import interact, HBox, VBox
import ipywidgets as widgets
from cycler import cycler


results_path = Path('zfd_pd_comp.csv')

def drop_redundant_columns(df):
    return df[[c for c in df.columns if len(df[c].unique()) > 1]]

def load_df(fname):
    df = pd.read_csv(fname)
    return df

def get_axes(n_rows, n_cols, width, height):
    fig, ax = plt.subplots(n_rows, n_cols, squeeze=False)
    fig.set_size_inches(width*n_cols, height*n_rows)
    return ax

def plot_simulations(df,
                     group_by,
                     x, y,
                     subplots=None,
                     logx=2,
                     logy=False,
                     fixy=False,
                     n_cols=3,
                     width=8,
                     height=5,
                     scatter=False,
                     axs=None,
                     percent=None,
                     sep_figs=False,
                     add_markers=True,
                     int_x=False,
                     only_bottom_x=False,
                     save_tikz=False,
                     save_csv=False):
    df = df.sort_values(by=[x])

    if subplots is None:
        sub_keys = [None]
        n_plots = 1
    else:
        sub_keys = sorted(df[subplots].unique())
        n_plots = len(sub_keys)

    n_rows = ceil(n_plots/n_cols)

    if axs is None:
        if sep_figs:
            axs = np.asarray([get_axes(1, 1, width, height) for i in range(n_plots)])
        else:
            axs = get_axes(n_rows, n_cols, width, height)

    ylims = []

    for sub_key, ax in zip(sub_keys, axs.flatten()):
        markers = itertools.cycle(
            cycler(ls=['-', '--', '-.', ':']) * cycler(marker=['.', '*', '+', 'x', '^', 'v', 'o', '>', '<'])
        ) if add_markers else itertools.repeat({})

        filtered = df[df[subplots] == sub_key] if subplots is not None else df

        for key, grp in filtered.groupby(group_by):
            if grp.empty:
                continue

            minimal_grp = grp.drop_duplicates(subset=[x, y])

            y_vec = minimal_grp[y]

            plotter = ax.scatter if scatter else ax.plot
            plotter(minimal_grp[x], y_vec,
                    label=f'{key}'.replace('_','\_'),
                    **next(markers))

        ylims.append(ax.get_ylim())
        ax.set_title(f'{subplots}={sub_key}')
        if logx:
            ax.set_xscale(**({'value':'log', 'base':logx}))
        if logy:
            ax.set_yscale(**({'value':'log', 'base':logy}))
        ax.set_xlabel(x)
        ax.set_ylabel(y)
        ax.legend(title=','.join(group_by))
        if percent:
            if 'x' in percent:
                ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.))
            if 'y' in percent:
                ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.))
        if int_x:
            from matplotlib.ticker import MaxNLocator
            ax.xaxis.set_major_locator(MaxNLocator(integer=True))


    if fixy:
        stacked = np.array(ylims).T
        ylim = (stacked[0].min(), stacked[1].max())
        for ax in axs.flatten():
            ax.set_ylim(ylim)

    plt.legend(loc='best')
    if only_bottom_x:
        plt.gcf().autofmt_xdate()
    if save_csv:
        drop_redundant_columns(filtered).to_csv(f'{1}.csv')
    if save_tikz:
        import tikzplotlib
        tikzplotlib.save(f'{save_tikz}.tex')

    return axs

In [None]:
table = load_df(results_path)
# table['min_rank'] = table['m'] - table[['m', 'n']].min(1)
# table['rank_gain'] = table['rank'] - table['min_rank']
table

In [None]:
axs = plot_simulations(
    table[
        (table['k'] == 3) &
        (table['m'] != table['d'])
    ].rename(columns={'m':'M'}),
    group_by=['code'],
    x='M',
    y='d',
    subplots='n',
    fixy=True,
    logx=False,
    int_x=True,
)

# axs.flatten()[0].figure.savefig('pd_zfd_compare_computer_search.pdf', bbox_inches='tight')

In [None]:
axs = plot_simulations(
    table[
            (table['n'] == 5) &
            (table['k'] == 0) &
            (table['d'] == 3) &
            (table['code'] == 'PD') &
            (table['m'] != table['d'])
    ].rename(columns={'m':'M'}),
    group_by=['code'],
    x='M',
    y='rank_gain',
    fixy=True,
    logx=False,
    int_x=True,
    n_cols=1,
)

## Interactive

In [None]:
out = table

out_cols = set(out.columns) - {'code'}
in_cols = out_cols
sortable = in_cols
not_many = set(out.columns)

w_n_cols=widgets.IntSlider(value=3,
                           min=1,
                           max=5,
                           step=1,
                           description='n_cols')
w_width=widgets.IntSlider(value=8,
                          min=1,
                          max=15,
                          step=1,
                          description='width')
w_height=widgets.IntSlider(value=5,
                           min=1,
                           max=15,
                           step=1,
                           description='height')
w_group_by=widgets.SelectMultiple(options=not_many,
                                  value=['code'],
                                  rows=len(not_many),
                                  description='group_by(*)')
w_x=widgets.Select(options=sortable,
                   value='m',
                   rows=len(sortable),
                   description='x')
w_y=widgets.Select(options=out_cols,
                   value='d',
                   rows=len(out_cols),
                   description='y')
w_subplots=widgets.Select(options=not_many | {None},
                          value='n',
                          rows=len(not_many)+1,
                          description='subplots')
w_logx=widgets.ToggleButtons(options=[None,2,10],
                             value=None,
                             description='logx base')
w_logy=widgets.ToggleButtons(options=[None,2,10],
                             value=None,
                             description='logy base')

In [None]:
@interact
def plot(
    group_by=w_group_by,
    x=w_x,
    y=w_y,
    subplots=w_subplots,
    logx=w_logx,
    logy=w_logy,
    scatter=False,
    n_cols=w_n_cols,
    width=w_width,
    height=w_height,
    fixy=True,
    percent_x=False,
    percent_y=False,
):
    plot_simulations(
        df=out[
#             (out['k'] == 0) &
            (out['d'] == 4) &
            (out['code'] == 'PD') &
            (out['m'] != out['d']) &
            (out['k'] == out['k'])
        ],
        group_by=list(group_by),
        x=x,
        y=y,
        subplots=subplots,
        logx=logx,
        logy=logy,
        percent='x'*percent_x + 'y'*percent_y,
        n_cols=n_cols,
        width=width,
        height=height,
        scatter=scatter,
        fixy=fixy,
    )

HBox([VBox([w_x, w_y]), w_group_by, w_subplots, VBox([w_logx, w_logy])])

# Set-Sync: IBLT splitting

## General (TODO: merge with the other)

In [None]:
import itertools
import numpy as np
import pandas as pd
from pathlib import Path
%matplotlib inline
import matplotlib.pyplot as plt
from math import ceil
import matplotlib.ticker as mtick
from ipywidgets import interact, HBox, VBox
import ipywidgets as widgets
from cycler import cycler


results_path = Path('csv/iblt_split_rates.csv')

def drop_redundant_columns(df):
    return df[[c for c in df.columns if len(df[c].unique()) > 1]]

def load_df(fname):
    df = pd.read_csv(fname)
    rates_cols = {'noPriorSuccessRate', 'withPriorSuccessRate'}
    other_cols = set(df.columns) - rates_cols
    df = df.melt(id_vars=other_cols,
                 value_vars=rates_cols,
                 var_name='method',
                 value_name='successRate')
    df['method'] = df['method'].str[:-len('SuccessRate')]
    return df

def get_axes(n_rows, n_cols, width, height):
    fig, ax = plt.subplots(n_rows, n_cols, squeeze=False)
    fig.set_size_inches(width*n_cols, height*n_rows)
    return ax

def plot_simulations(df,
                     group_by,
                     x, y,
                     subplots=None,
                     rolling=None,
                     logx=2,
                     logy=False,
                     fixy=False,
                     n_cols=3,
                     width=8,
                     height=5,
                     scatter=False,
                     axs=None,
                     percent=None,
                     sep_figs=False,
                     add_markers=True,
                     save_tikz=False,
                     save_csv=False):
    df = df.sort_values(by=[x])

    if subplots is None:
        sub_keys = [None]
        n_plots = 1
    else:
        sub_keys = sorted(df[subplots].unique())
        n_plots = len(sub_keys)

    n_rows = ceil(n_plots/n_cols)

    if axs is None:
        if sep_figs:
            axs = np.asarray([get_axes(1, 1, width, height) for i in range(n_plots)])
        else:
            axs = get_axes(n_rows, n_cols, width, height)

    ylims = []

    for sub_key, ax in zip(sub_keys, axs.flatten()):
        markers = itertools.cycle(
            cycler(ls=['-', '--', '-.', ':']) * cycler(marker=['.', ',', 'o', '*', '+', 'x', '^', 'v', '>', '<'])
        ) if add_markers else itertools.repeat({})

        filtered = df[df[subplots] == sub_key] if subplots is not None else df

        for key, grp in filtered.groupby(group_by):
            if grp.empty:
                continue

            minimal_grp = grp.drop_duplicates(subset=[x, y])

            y_vec = minimal_grp[y]
            if rolling:
                if rolling is True:
                    rolling = 3
                y_vec = y_vec.rolling(rolling).mean()

            plotter = ax.scatter if scatter else ax.plot
            plotter(minimal_grp[x], y_vec,
                    label=f'{key}'.replace('_','\_'),
                    **next(markers))

        ylims.append(ax.get_ylim())
        ax.set_title(f'{subplots}={sub_key}')
        if logx:
            ax.set_xscale(**({'value':'log', 'basex':logx}))
        if logy:
            ax.set_yscale(**({'value':'log', 'basey':logy}))
        ax.set_xlabel(x)
        ax.set_ylabel(y)
        ax.legend(title=','.join(group_by))
        if percent:
            if 'x' in percent:
                ax.xaxis.set_major_formatter(mtick.PercentFormatter(1.))
            if 'y' in percent:
                ax.yaxis.set_major_formatter(mtick.PercentFormatter(1.))

    if fixy:
        stacked = np.array(ylims).T
        ylim = (stacked[0].min(), stacked[1].max())
        for ax in axs.flatten():
            ax.set_ylim(ylim)

    plt.legend(loc='best')
    plt.gcf().autofmt_xdate()

    if save_csv:
        drop_redundant_columns(filtered).to_csv(f'{1}.csv')
    if save_tikz:
        import tikzplotlib
        tikzplotlib.save(f'{save_tikz}.tex')

    return axs

## simulation evaluation

In [None]:
table = load_df(results_path)
table

In [None]:
plot_simulations(table[
    (table['totalTxsNum'] == 1000) &
    (table['idealMemoryFrac'] == .95) &
    (table['totalTxsNum'] == table['totalTxsNum'])
],
                 group_by=['method'],
                 x='bothFraction',
                 y='successRate',
                 subplots='errorRate',
                 percent='xy'
);

In [None]:
plot_simulations(table[
    (table['bothFraction'] == .95) &
    (table['totalTxsNum'] == 1000) &
    (table['totalTxsNum'] == table['totalTxsNum'])
],
                 group_by=['idealMemoryFrac'],
                 x='errorRate',
                 y='successRate',
                 subplots='method',
                 n_cols=2,
                 percent='xy',
                 rolling=5,
);

In [None]:
plot_simulations(table[
    (table['bothFraction'] == .95) &
    (table['totalTxsNum'] == 1000) &
    (table['totalTxsNum'] == table['totalTxsNum'])
],
                 group_by=['errorRate'],
                 x='idealMemoryFrac',
                 y='successRate',
                 subplots='method',
                 n_cols=2,
                 percent='xy'
);

In [None]:
plot_simulations(
    table[
        (table['errorRate'] == .9) &
        (table['bothFraction'] > .5) &
        (table['totalTxsNum'] == 1000) &
        (table['totalTxsNum'] == table['totalTxsNum'])
    ],
    group_by=['method'],
    x='idealMemoryFrac',
    y='successRate',
    subplots='bothFraction',
    n_cols=3,
    percent='xy',
#     fixy=True
);

## Interactive

In [None]:
out = table

out_cols = {'successRate'}
in_cols = set(out.columns) - out_cols
sortable = in_cols
not_many = in_cols

w_n_cols=widgets.IntSlider(value=3,
                           min=1,
                           max=5,
                           step=1,
                           description='n_cols')
w_width=widgets.IntSlider(value=8,
                          min=1,
                          max=15,
                          step=1,
                          description='width')
w_height=widgets.IntSlider(value=5,
                           min=1,
                           max=15,
                           step=1,
                           description='height')
w_group_by=widgets.SelectMultiple(options=not_many,
                                  value=['method'],
                                  rows=len(not_many),
                                  description='group_by(*)')
w_x=widgets.Select(options=sortable,
                   value='idealMemoryFrac',
                   rows=len(sortable),
                   description='x')
w_y=widgets.Select(options=out_cols,
                   value='successRate',
                   rows=len(out_cols),
                   description='y')
w_subplots=widgets.Select(options=not_many | {None},
                          value='totalTxsNum',
                          rows=len(not_many)+1,
                          description='subplots')
w_logx=widgets.ToggleButtons(options=[None,2,10],
                             value=None,
                             description='logx base')
w_logy=widgets.ToggleButtons(options=[None,2,10],
                             value=None,
                             description='logy base')

In [None]:
@interact
def plot(
    group_by=w_group_by,
    x=w_x,
    y=w_y,
    subplots=w_subplots,
    logx=w_logx,
    logy=w_logy,
    scatter=False,
    n_cols=w_n_cols,
    width=w_width,
    height=w_height,
    fixy=True,
    percent_x=False,
    percent_y=False,
):
    plot_simulations(
        df=out[
#             (out['method'] == 'withPrior') &
#             (out['errorRate'] == 1) &
#             (out['bothFraction'] == .7) &
#             (out['errorRate'] > .5) &
#             (out['bothFraction'] > .5) &
            (out['totalTxsNum'] == 1000) &
            (out['totalTxsNum'] == out['totalTxsNum'])
        ],
        group_by=list(group_by),
        x=x,
        y=y,
        subplots=subplots,
        logx=logx,
        logy=logy,
        percent='x'*percent_x + 'y'*percent_y,
        n_cols=n_cols,
        width=width,
        height=height,
        scatter=scatter,
        fixy=fixy,
    )

HBox([VBox([w_x, w_y]), w_group_by, w_subplots, VBox([w_logx, w_logy])])