In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('../../dil/crawl'))
if module_path not in sys.path:
    sys.path.append(module_path)
import glob

In [None]:
import pandas as pd
from http import HTTPStatus
import h2o
from pruner import fit_data, post_process_single, single_methods
import re
import contextlib

In [None]:
h2o.init(nthreads=100, max_mem_size="100G")
h2o.no_progress()
files = glob.glob("trees/2022-04-22-2/mojo/*")
with open(os.devnull, 'w') as devnull:
    with contextlib.redirect_stdout(devnull):
        models = [h2o.import_mojo(os.path.abspath(file)) for file in files]

In [None]:
len(models)

In [None]:
from multiprocessing.pool import ThreadPool

In [None]:
def predict_model(args):
    hf, model = args
    model_name = model.actual_params["path"]
    model_name = model_name.replace("fetch_response", "fetch-response")
    model_name = model_name.replace("fetch_errormessage", "fetch-errormessage")
    inc, method, browser, _ = model_name.split("mojo/")[1].split("_")
    try:
        # print(model_name)
        res = h2o.as_list(model.predict(hf))
        if res["predict"].nunique() == 2:
            # output[f"{inc}_{method}_{browser}"] = (res.iloc[0]["predict"], res.iloc[1]["predict"])
            return [inc, method, browser, res.iloc[0]["predict"], res.iloc[1]["predict"]]
    except Exception as e:
        print(e)
    return [inc, method, browser, None, None]


def predict_trees(input_rows):
    """"Get the predictions for all fitted responses."""
    hf = h2o.H2OFrame(input_rows)
    # Predict for every working method/model/tree
    with ThreadPool(processes=50) as pool:
        output = pool.map(predict_model, [[hf, model] for model in models])
        output = pd.DataFrame(output, columns=["inc", "method", "browser", "val-1", "val-2"]).dropna()
    return output

In [None]:
def test_single_key():
    """Test if single headers/status-codes can be distinguished.
       Compare an empty response, with a response with only one header/status-code set and test every tree."""
    acc = {}
    # Compare headers on most normal responses?
    for header in [
        {"cross-origin-resource-policy": "same-origin"}, {"content-disposition": "attachment"},
        {"content-type": "image/png"}, {"cross-origin-opener-policy": "same-orgin"},
        {"x-content-type-options": "nosniff"}, {"x-frame-options": "deny"}
    ]:
        # Compare headers for most common responses
        for code in [200, 302, 404]:
        #for code in [200]:
            af = pd.DataFrame([{"url": "a", "resp_code": code, "state": "a", "resp_headers": header, "resp_body_info": "empty", "resp_body_hash": ""},
                               {"url": "a", "resp_code": code, "state": "b", "resp_headers": {}, "resp_body_info": "empty", "resp_body_hash": ""}])
            acc = test_af(af, f"{header}-{code}", acc)
    
    for code in [code.value for code in HTTPStatus] + [999]:
        # Compare all 2XX codes with 404 and all other codes with 200
        compare_code = 200 if not str(code).startswith("2") else 404
        af = pd.DataFrame([{"url": "a", "resp_code": code, "state": "a", "resp_headers": {}, "resp_body_info": "empty", "resp_body_hash": ""},
                           {"url": "a", "resp_code": compare_code, "state": "b", "resp_headers": {}, "resp_body_info": "empty", "resp_body_hash": ""}])
        acc = test_af(af, f"{code}-{compare_code}", acc)
    return acc
        
def test_af(af, diff, acc):
    """Test two responses if a method can distinguish them.
       af: pandas Dataframe of size 2
       diff: the difference between the two rows in the af
       acc: acculumulator dict to save the results in"""
    af = af.apply(fit_data, axis=1) 
    leaky_endpoints = predict_trees(af)
    acc[diff] = leaky_endpoints
    return acc

In [None]:
#acc = test_single_key()
acc = []

In [None]:
# Open several new bug reports based on these results?
# + test additional common combinations
for key in acc:
    print()
    print(key)
    display(acc[key].groupby(["browser"])["val-1"].count().to_frame())
    display(acc[key].head(3))

## Distinguish two responses application

In [None]:
import ipywidgets as widgets
from IPython.display import display, Markdown
import functools

In [None]:
codes = [100, 101, 102, 103, 200, 201, 202, 203, 204, 205, 206, 207, 208, 226, 300, 301, 302, 303, 304, 305, 307, 308, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 421, 422, 423, 424, 425, 426, 428, 429, 431, 451, 500, 501, 502, 503, 504, 505, 506, 507, 508, 510, 511, 999]
bodies = ['ecocnt_html=num_frames=1,input_id=test1', 'ecocnt_html=num_frames=2', 'ecocnt_html=post_message=mes1', 'ecocnt_html=meta_refresh=0;http://172.17.0.1:8000', 'ecocnt_html=paymentAPI=true', 'ecocnt_css=h1 {color: blue}', 'ecocnt_js=.,,.', 'ecocnt_js=var a=5;', 'ecocnt_img=width=50,height=50,type=png', 'ecocnt_vid=width=100,height=100,duration=2', 'ecocnt_audio=duration=1', 'ecocnt_pdf=a=a', 'empty']
xcto_ops = ['nosniff', 'empty']
xfo_ops = ['deny', 'empty']
ct_ops = ['text/html', 'text/css', 'application/javascript', 'video/mp4', 'audio/wav', 'image/png', 'application/pdf', 'empty']
cd_ops = ['attachment', 'empty']
coop_ops = ['same-origin', 'empty']
corp_ops = ['same-origin', 'empty']
loc_ops = ['http://localhost:8000/echo/', '/', 'empty']
csp_ops = ["frame-ancestors 'self'", "default-src 'self'", "empty"]

In [None]:
# Change font size of output
display(Markdown("""<style>
td {
  font-size: 15px
}
th {
  font-size: 15px
}
</style>\n|Table|\n|--|\n|T|"""))

In [None]:
from IPython.display import display, HTML
output = widgets.Output(width="60%")

@output.capture()
def btn_eventhandler(obj, resp_dict):
    output.clear_output()
    r1 = resp_dict["one"]
    r2 = resp_dict["two"]
    responses = []
    for r in [r1, r2]:
        responses.append({"URL": "", "Status-Code": r["code"].value, "body": r["body"].value, "Content-Type": r["ct"].value, "X-Content-Type-Options": r["xcto"].value, 
                          "X-Frame-Options": r["xfo"].value, "Content-Disposition": r["cd"].value, "Location": r["loc"].value, "Cross-Origin-Opener-Policy": r["coop"].value,
                          "Cross-Origin-Resource-Policy": r["corp"].value, "Content-Security-Policy": r["csp"].value})
    if responses[0] == responses[1]:
        display(f"Responses are the same, please change at least one attribute!")
    else:
        af = pd.DataFrame(responses)
        preds = predict_trees(af)
        screenshot = True
        if screenshot:
            preds = preds.loc[preds["method"] != "events-fired-all"]
            preds = preds.sort_values(["inc", "method", "browser"])
            #preds = preds.loc[(preds["inc"] == "embed") | (preds["inc"] == "link-stylesheet") | (preds["inc"] == "object")]
            preds = preds.rename(columns={"inc": "Inclusion method", "method": "Observation method", "browser": "Browser", "val-1": "Observation 1", "val-2": "Observation 2"})
            output.append_display_data(HTML(preds.to_html(index=False)))
            #output.append_stdout("See more...")
        else:
            preds = preds.sort_values(["inc", "method", "browser"])
            display(HTML(preds.to_html(index=False)))
                           
        
        #working_string = """**Results:**\n\n|Browser|Observation channel|Value 1|Value 2|\n|:-|:-|:-|:-|\n"""
        #for channel in sorted(leaky_endpoints.keys()):
        #    inc, method, browser = channel.split("_")
        #    working_string += f"|{browser}|{inc}_{method}|{leaky_endpoints[channel][0]}|{leaky_endpoints[channel][1]}|\n"
        #display(Markdown(working_string))


resp_dict = {}
table_style = {'description_width': ''}
table_layout = {'width':'auto'}


def create_header():
    resp = widgets.Button(description="", disabled=False, font_weight='bold', button_style="", tooltip="", icon="", layout=table_layout, style=table_style, grid_area="header1")
    code = widgets.Button(description="Status-Code", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    body = widgets.Button(description="Body-Content", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")    
    ct = widgets.Button(description="Content-Type", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    xcto = widgets.Button(description="X-Content-Type-Options", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    xfo = widgets.Button(description="X-Frame-Options", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    cd = widgets.Button(description="Content-Disposition", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    loc = widgets.Button(description="Location", disabled=False, font_weight='bold', button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    coop = widgets.Button(description="Cross-Origin-Opener-Policy", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    corp = widgets.Button(description="Cross-Origin-Resource-Policy", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")
    csp = widgets.Button(description="Content-Security-Policy", font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, grid_area="header1")

    
    return [resp, code, body, ct, xcto, xfo, cd, loc, coop, corp, csp]

def create_responses(title):    
    resp_title = widgets.Button(description=title, font_weight='bold', disabled=False, button_style="", tooltip="", icon="", layout=table_layout, style=table_style, grid_area=title)
    code = widgets.Dropdown(options=codes, value=200, layout=table_layout, style=table_style, grid_area=title)
    body = widgets.Dropdown(options=bodies, value="empty", layout=table_layout, style=table_style, grid_area=title)
    ct = widgets.Dropdown(options=ct_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    xcto = widgets.Dropdown(options=xcto_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    xfo = widgets.Dropdown(options=xfo_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    cd = widgets.Dropdown(options=cd_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    loc = widgets.Dropdown(options=loc_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    coop = widgets.Dropdown(options=coop_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    corp = widgets.Dropdown(options=corp_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    csp = widgets.Dropdown(options=csp_ops, value="empty", layout=table_layout, style=table_style, grid_area=title)
    
    resp_list = [resp_title, code, body, ct, xcto, xfo, cd, loc, coop, corp, csp]
    resp = {"code": code, "body": body, "ct": ct, "xcto": xcto, "xfo": xfo, "cd": cd, "loc": loc, "coop": coop, "corp": corp, "csp": csp}
    return resp, resp_list

header_list = create_header()
resp_dict["one"], resp1_list = create_responses("Response 1")
resp_dict["two"], resp2_list = create_responses("Response 2")
horizontal = '''
            "header header header header header header header header header header"
            "Response1 Response1 Response1 Response1 Response1 Response1 Response1 Response1 Response1 Response1"
            "Response2 Response2 Response2 Response2 Response2 Response2 Response2 Response2 Response2 Response2"
'''
vertical = '''
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"
            "header1 Response1 Response2"

'''
grid = header_list + resp1_list + resp2_list
grid = zip(header_list, resp1_list, resp2_list)
grid = [y for x in grid for y in x]
#conf = widgets.GridBox(grid, layout=widgets.Layout(overflow_y="auto", grid_template_rows="auto auto auto auto auto auto auto auto auto auto", grid_template_columns=f"auto auto auto"),
#                      grid_template_areas=vertical)
conf = widgets.GridBox(grid, layout=widgets.Layout(width="100%", grid_template_rows="auto auto auto auto auto auto auto auto auto auto auto", grid_template_columns=f"50% 25% 25%"),
                      grid_template_areas=vertical)
display(conf)
btn = widgets.Button(description='Distinguish!')
btn.on_click(functools.partial(btn_eventhandler, resp_dict=resp_dict))
display(btn)
display(output)