# **Live Demo Evaluation**

Small notebook that fetches the live results, plots query marks and computes the relative risk.


In [5]:
import pathlib
import sys

# honeyquest path hack to make module imports work
__package__ = "honeyquest"
modulepath = pathlib.Path.cwd().joinpath("../../../").resolve().as_posix()
if modulepath not in sys.path:
    sys.path.append(modulepath)

##########################################################################

from pathlib import Path
from datetime import datetime, timezone, timedelta

from time import sleep
from dotenv import dotenv_values
import matplotlib.pyplot as plt

from honeyquest.data.ops import (
    counting,
    loading,
    cleaning,
    processing,
    transforming,
    visuals,
)

NB_CONFIG = {**dotenv_values(".env.shared"), **dotenv_values(".env.local")}

QUERY_DATA_PATH = pathlib.Path.cwd().joinpath(NB_CONFIG["QUERY_DATA_PATH"]).resolve().as_posix()
QUERY_INDEX_PATH = pathlib.Path.cwd().joinpath(NB_CONFIG["QUERY_INDEX_PATH"]).resolve().as_posix()

LIVE_URL = NB_CONFIG["LIVE_URL"]
LIVE_TOKENS = { key.split("__")[1]: val for key, val in NB_CONFIG.items() if key.startswith("LIVE_TOKENS__") }

In [6]:
def load_and_process(min_timestamp: datetime = None):
    # parse all queries and responses for all experiments
    QUERIES_DICT = loading.parse_all_queries(QUERY_DATA_PATH)
    RESULTS_DICT = loading.load_experiments(
        local_base="",
        local_paths={},
        live_url=LIVE_URL,
        live_tokens=LIVE_TOKENS,
    )

    cleaning.drop_timeframe(RESULTS_DICT, min_timestamp=min_timestamp)

    # flatten and postprocess the data
    MARKS_DF, _ = transforming.flatten_experiments(RESULTS_DICT)
    if MARKS_DF.empty:
        return

    MARKS_DF, RESPONSES_DF = processing.postprocess_marks(MARKS_DF, QUERIES_DICT)

    # dataframes just with the ids for easy merges
    MARK_IDS = MARKS_DF[["eid", "uid", "rid", "qid"]]
    RESPONSE_IDS = MARK_IDS.drop_duplicates().set_index("rid")
    assert MARK_IDS.index.is_unique
    assert RESPONSE_IDS.index.is_unique

    # query rating
    qd = "TR856.httpheaders.outdated-apache-server-1.0.3-and-httpheader-apiserver"
    qr = "TR150.httpheaders.outdated-apache-server-1.0.3"

    # compute relative risk
    df = RESPONSES_DF.merge(RESPONSE_IDS, on="rid", how="left")
    pd = df.query("ans_hack_on_risk and qid == @qd").shape[0]
    pr = df.query("ans_hack_on_risk and qid == @qr").shape[0]

    # print relative risk, if it is negative
    rr = int(((pd / pr) - 1) * 100) if pr > 0 else float("nan")
    rr = str(rr) if rr < 0 else "-"
    print("relative risk", rr)

    for query_id in [qd, qr]:
        df = counting.get_query_rating(query_id, MARKS_DF, QUERIES_DICT)
        fig = visuals.answer.plot_query_rating(df, show=False, cleanup=False)

        # add small text in lower left corner
        ax = plt.gca()
        ax.text(
            0.99,
            0.99,
            rr,
            horizontalalignment="right",
            verticalalignment="bottom",
            transform=ax.transAxes,
            fontsize=8,
        )

        # save figure as svg
        path = f"./outputs/queries/{query_id}.svg"
        Path(path).parent.mkdir(parents=True, exist_ok=True)
        plt.savefig(path, dpi=fig.dpi, bbox_inches="tight", pad_inches=0)

        # cleanup figure again
        fig.clear()
        plt.close(fig)

In [None]:
austria_tz = timezone(timedelta(hours=2))
presentation_start = datetime(2023, 5, 1, 21, 40, tzinfo=austria_tz)

while True:
    try:
        load_and_process(min_timestamp=presentation_start)
        sleep(3)
    except Exception:
        continue