In [None]:
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from matplotlib import pyplot

import os
from parse import parse
import re

In [None]:
plt.rc('font', size=16)
plt.rcParams['legend.fontsize'] = 14
plt.rcParams['legend.title_fontsize'] = 14
plt.rc('pdf', fonttype=42)

In [None]:
coda_name = 'DUMBO'
results_dir = '../../results/simu_output'
traces_dir = f'{results_dir}/../../traces'
models_dir = f'{results_dir}/../5_pk'

In [None]:
hh_frac, trace = 0.02, 'traces/caida/20160121-135000.UTC.anon.pcap'
proto = 'all'
q = 0.95

trace_set, trace_name = trace.split('/')[-2:]
trace_id = f"{trace_set}-{trace_name.split('.')[0]}"
folder = f'{results_dir}/{trace_id}/{proto}/top_{hh_frac}_pct/iat/error'

In [None]:
# fnr_to_fpr_file = f'{trace}.{proto}.{hh_frac}_fnr_to_fpr.csv'
fnr_to_fpr_file = f'{traces_dir}/{trace_set}/{trace_name[:-5]}.{proto}.{hh_frac}_fnr_to_fpr.csv'
fnr_to_fpr = {round(k, 3): round(v, 3) for k, v in pd.read_csv(fnr_to_fpr_file, usecols=["fnr", "fpr"], index_col=0).squeeze("columns").to_dict().items()}

data_ap = []
data_fnr = []
data_fpr = []
baseline_1 = None
baseline_2 = None
baseline_3 = None
oracle = None
coda = None
pheavy = None

coda_fnr = float(next(line.split(",")[1] for line in open(f"{models_dir}/{'tcp_udp' if proto == 'all' else 'tcp'}/initial5min_pruning+feat_selection+quantization_0dryrun_{trace_set}/rates.txt") if "FNR" in line))
coda_fpr = float(next(line.split(",")[1] for line in open(f"{models_dir}/{'tcp_udp' if proto == 'all' else 'tcp'}/initial5min_pruning+feat_selection+quantization_0dryrun_{trace_set}/rates.txt") if "FPR" in line))
pheavy_fnr = float(next(line.split(",")[1] for line in open(f"{models_dir}/{'tcp_udp' if proto == 'all' else 'tcp'}/pheavy_5-20_thr0.6_0dryrun_{trace_set}/rates.txt") if "FNR" in line))
pheavy_fpr = float(next(line.split(",")[1] for line in open(f"{models_dir}/{'tcp_udp' if proto == 'all' else 'tcp'}/pheavy_5-20_thr0.6_0dryrun_{trace_set}/rates.txt") if "FPR" in line))
for filename in os.listdir(folder):
    if not '_500KB' in filename and not 'baseline' in filename and not 'coda' in filename and not 'pheavy' in filename:
        continue
    model = filename.split('_')[0].replace('.txt', '')
    print(f"{folder}/{filename}")
    if model == 'baseline':
        with open(f"{folder}/{filename}", 'r') as f:
            output = f.read()
            baseline_1 = float(re.search(r'MRE {} 32 bins 1 byte:\s+([\d\.e-]+)'.format(q), output).group(1))
            baseline_2 = float(re.search(r'MRE {} 16 bins 2 bytes:\s+([\d\.e-]+)'.format(q), output).group(1))
            baseline_3 = float(re.search(r'MRE {} 32 bins 2 bytes:\s+([\d\.e-]+)'.format(q), output).group(1))
    if model == 'oracle':
        with open(f"{folder}/{filename}", 'r') as f:
            output = f.read()
            oracle = float(re.search(r'MRE {} Model:\s+([\d\.e-]+)'.format(q), output).group(1))
    if model == 'coda':
        with open(f"{folder}/{filename}", 'r') as f:
            output = f.read()
            coda = float(re.search(r'MRE {} Model:\s+([\d\.e-]+)'.format(q), output).group(1))
    if model == 'pheavy':
        with open(f"{folder}/{filename}", 'r') as f:
            output = f.read()
            pheavy = float(re.search(r'MRE {} Model:\s+([\d\.e-]+)'.format(q), output).group(1))
    elif (model == 'sim') and filename.split('_')[1][:2] == 'ap':
        ap, _ = parse("sim_ap{:f}_{:d}KB.txt", filename)
        with open(f"{folder}/{filename}", 'r') as f:
            error = float(re.search(r'MRE {} Model:\s+([\d\.e-]+)'.format(q), f.read()).group(1))
        data_ap.append([ap, error])
    elif (model == 'sim') and filename.split('_')[1][:3] == 'fnr':
        fnr, fpr, _ = parse("sim_fnr{:f}_fpr{:f}_{:d}KB.txt", filename)
        with open(f"{folder}/{filename}", 'r') as f:
            error = float(re.search(r'MRE {} Model:\s+([\d\.e-]+)'.format(q), f.read()).group(1))
        if (fnr, fpr) in fnr_to_fpr.items():
            data_fnr.append([fnr, fpr, error])

errors_ap_df = pd.DataFrame(data_ap, columns=['AP', 'error'])
errors_fnr_df = pd.DataFrame(data_fnr, columns=['FNR', 'FPR', 'error'])

In [None]:
errors_ap_df = errors_ap_df.sort_values(by=['AP'])
errors_fnr_df = errors_fnr_df.sort_values(by=['FNR'])

plot_baseline_2 = baseline_2 < (baseline_1-baseline_3)*3

fig, ax = plt.subplots(1, 1, figsize=(6,4))

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

# FNR
ax.plot(errors_fnr_df['FNR'], errors_fnr_df['error'], zorder=0, color="tab:blue")
l_synth = mlines.Line2D([], [], color="tab:blue", marker='None', linestyle='-', label=f'hint-based (synth)')

# FPR
ax2 = ax.twiny()
ax2.set_prop_cycle(color=colors[1:])
# l, = ax2.plot(errors_fnr_df['FPR'], errors_fnr_df['error'], zorder=0)

# baseline 1
ax.axhline(y=baseline_1, color='black', linestyle='--', label=f'32 bins 1 byte')

# baseline 2
if plot_baseline_2:
    ax.axhline(y=baseline_2, color='black', linestyle=':', label=f'16 bins 2 byte')

# baseline 3
ax.axhline(y=baseline_3, color='black', linestyle='-', label=f'2x memory')


# Mice mispred ticks
ax2.set_xlabel('Mice mispredictions')
ax.set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1.], [0.0, 0.2, 0.4, 0.6, 0.8, 1.])
mice_tick_locations = ax.get_xticks()
# ax2.set_xlim([errors_fnr_df['FPR'].min(), errors_fnr_df['FPR'].max()*1.3])
def tick_function(x_list, df):
    ticks = []
    for v in x_list:
        mice_tick = df[df["FNR"] == v]["FPR"].values[0]
        ticks.append(round(mice_tick, 3))
    return ticks
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(mice_tick_locations)
ax2.set_xticklabels(tick_function(mice_tick_locations, errors_fnr_df))
ax2.set_xlabel("Mice mispredictions")


ax.set_ylim([baseline_3*0.95, None])

# DUMBO
# ax2.scatter(coda_fpr, coda, marker="*", color=fpr_color, s=120, zorder=2)
ax.scatter(coda_fnr, coda, marker="*", color="black", s=120, zorder=2)
l_coda = mlines.Line2D([], [], color='black', marker='*', linestyle='None', markersize=10, label=f'{coda_name}')
# pHeavy
# ax2.scatter(pheavy_fpr, pheavy, marker=".", color=fpr_color, s=120, zorder=2)
# ax.scatter(pheavy_fnr, pheavy, marker=".", color="black", s=120, zorder=2)
# ax.scatter(0.97, pheavy, marker=r'$\rightarrow$', color=fpr_color, s=200, zorder=2)
# ax.text(0.93, pheavy, f"pHeavy\nmice misp.={pheavy_fpr:.2f}", ha="right", va="center", fontsize=12)
# l2 = mlines.Line2D([], [], color='black', marker='.', linestyle='None', markersize=10, label='pHeavy')

bot_ref = min(baseline_3, errors_fnr_df.iloc[[0]].iloc[:, :3].values[0][2])
top_ref = baseline_2 if plot_baseline_2 else baseline_1
y_max = top_ref+(top_ref-bot_ref)*0.05
y_min = (bot_ref - 0.25*y_max)/0.75

pyplot.gca().add_artist(
    pyplot.legend(
        handles=[l_synth,
                 l_coda
                 ], ncol=2, columnspacing=0.9, frameon=False,
        loc='lower center', borderaxespad=0.2
    )
)

ax.legend(
    ncol=1, columnspacing=0.9, frameon=False, loc='upper left', title='Baselines', 
    borderaxespad=(y_max-baseline_1)/(y_max-y_min)*14
)

ax.set_xlabel('Elephants mispredictions')
ax.set_ylabel('Mean relative error')
ax.grid(alpha=0.5)

ax.set_xlim([0, 1])
ax.set_ylim([y_min, y_max])

#plt.tight_layout()
plt.subplots_adjust(left=0.17, right=0.97, bottom=0.15, top=0.86)
plt.savefig(f'out/iat_sim.{trace_set}.{proto}.{hh_frac}.q{q}.pdf', format='pdf', dpi=1200)
print(f'trace={trace_set} proto={proto}, frac={hh_frac}, quantile={q}')
print("ISSUE: hint-based (synth) and DUMBO are both based on a 20K Elpeh Tr. constraint but NOT pHeavy! \nHence the Mice mispredictions ticks are incorrect for pHeavy")
plt.show()