In [1]:
import glob
import json
from anytree.importer import JsonImporter
from anytree.exporter import UniqueDotExporter
from anytree import RenderTree, PreOrderIter
import pandas as pd

In [2]:
# TODO: Only paths with name/browser_id in them are interesting?
# Trees that are the same do not have to be analyzed
# ...

In [3]:
base_dir = "trees/2023-11-23T13:48"

tree_files = []
svg_files = []
channel_files = []
for base_dir in [base_dir]:
    tree_files += glob.glob(f"{base_dir}/anytree/*")
    svg_files += glob.glob(f"{base_dir}/svg/*")
    channel_files += glob.glob(f"{base_dir}/obs/*")
    print(len(svg_files))

tree_list = []
for tree_file in tree_files:
    with open(tree_file, "r") as f:
        tree_txt = f.read().replace('"path":', '"path2":')
        tree_name = tree_file.split("anytree/")[1]
        tree_list.append((tree_name, JsonImporter().import_(tree_txt)))

35


In [4]:
def create_path(leaf):
    path_dict = {}
    pred = leaf.pred
    for node in leaf.path:
        prop, value = node.path2.split(":", maxsplit=1)
        if prop == "root":
            continue
        old_value = path_dict.get(prop, None)
        if old_value is not None:
            if len(value) < len(old_value):
                path_dict[prop] = value
        else:
            path_dict[prop] = value
    return sorted(list(path_dict.items())), pred
          

In [5]:
paths_dict = {}
trees_dict = {}
nodes_dict = {}
all_paths = 0
for tree_name, tree in tree_list:
    paths = [create_path(leaf) for leaf in tree.leaves]
    nodes = set([node.path2 for node in PreOrderIter(tree)])
    for node in nodes:
        vals = nodes_dict.get(node, {"tree_names": []})
        vals["tree_names"].append(tree_name)
        nodes_dict[node] = vals
    # Have a more relaxed fingerprint? (Ignore some of the paths or something like that?)
    # ...
    path_fingerprint = sorted([path for path, pred in paths])
    vals = trees_dict.get(json.dumps(path_fingerprint), {"tree_names": [], "org_paths": [], "tree": tree})
    vals["tree_names"].append(tree_name)
    vals["org_paths"].append(paths)
    trees_dict[json.dumps(path_fingerprint)] = vals
    all_paths += len(paths)
    for path, pred in paths:
        path_string = json.dumps(path)
        vals = paths_dict.get(path_string, {"tree_names": [], "preds": [], "org_path": path})
        vals["tree_names"].append(tree_name)
        vals["preds"].append(pred)
        paths_dict[path_string] = vals


df = pd.DataFrame(columns=["test", "obs_num", "tree_num"])

# Sort paths by most occuring ones, second sorting key length of path
class reversor:
    def __init__(self, obj):
        self.obj = obj

    def __eq__(self, other):
        return other.obj == self.obj

    def __lt__(self, other):
        return other.obj < self.obj
paths_dict = {k: v for k, v in sorted(paths_dict.items(), key=lambda item: (reversor(len(item[1]["preds"])), len(item[1]["org_path"])))}
trees_dict = {k: v for k, v in sorted(trees_dict.items(), key=lambda item: reversor(len(item[1]["tree_names"])))}

enum_trees_dict = list(enumerate(trees_dict.items()))
for num, (fp, vals) in enum_trees_dict:
    for tmp in vals["tree_names"]:
        test, obs_num = tmp.split(")")
        df.loc[len(df)] = [test + ")", obs_num.split(".json")[0], num]
        
print(f"All paths: {all_paths}, all unique paths: {len(paths_dict.keys())}")
print(f"All trees: {len(tree_list)}, all unique trees: {len(trees_dict.keys())}, all svg trees: {len(svg_files)}, all channels: {len(channel_files)}")
print(f"All unique nodes: {len(nodes_dict.keys())}")

print("Keep in mind that binary outcomes result in one tree, and non-binary outcomes result in num-outcomes trees. So, there should be more trees than svg trees.")

All paths: 924, all unique paths: 641
All trees: 127, all unique trees: 95, all svg trees: 35, all channels: 35
All unique nodes: 221
Keep in mind that binary outcomes result in one tree, and non-binary outcomes result in num-outcomes trees. So, there should be more trees than svg trees.


In [6]:
def to_list(string):
    return string[1:-1].split()

with pd.option_context("display.max_rows", 137):
    with pd.option_context("display.max_colwidth", None):
        groups = df.groupby("test")["tree_num"].unique().to_frame().reset_index()
        display(groups)
        groups["tree_num_str"] = groups["tree_num"].apply(str)
        groups = groups.groupby("tree_num_str")["test"].unique().to_frame().reset_index()
        groups["tree_num"] = groups["tree_num_str"].apply(to_list)
        groups["len"] = groups["tree_num"].str.len()
        groups["tree_num_str_len"] = groups["tree_num_str"].str.len()
        groups = groups.sort_values(["len", "tree_num_str_len", "tree_num_str"])[["tree_num", "test"]].reset_index(drop=True)
        display(groups)

Unnamed: 0,test,tree_num
0,"('accesswindow_direct', 'direct')",[15]
1,"('fetch_GET', 'credendials')","[2, 3, 5, 6, 7, 8]"
2,"('fetch_GET', 'custom_headers')","[2, 3, 5, 6, 7, 8]"
3,"('fetch_GET', 'simple')","[2, 3, 5, 6, 7, 50, 67]"
4,"('fetch_TEST', 'custom_method')","[2, 3, 5, 6, 7, 8]"
5,"('framing_embed', 'direct')",[9]
6,"('framing_embed', 'nested')",[1]
7,"('framing_embed', 'sandbox')",[1]
8,"('framing_iframe', 'direct')",[44]
9,"('framing_iframe', 'nested')",[48]


Unnamed: 0,tree_num,test
0,[0],"[('fullscreen_iframe', 'child_sandbox')]"
1,[1],"[('framing_embed', 'nested'), ('framing_embed', 'sandbox'), ('framing_object', 'nested'), ('framing_object', 'sandbox')]"
2,[9],"[('framing_embed', 'direct'), ('framing_object', 'direct')]"
3,[15],"[('accesswindow_direct', 'direct')]"
4,[19],"[('script_execution_iframe', 'sandbox')]"
5,[21],"[('upgradeHSTS_subdomain', 'subdomain')]"
6,[44],"[('framing_iframe', 'direct')]"
7,[48],"[('framing_iframe', 'nested')]"
8,[57],"[('framing_iframe', 'sandbox')]"
9,[69],"[('upgradeHSTS_direct', 'direct')]"


In [7]:
def render(node):
    if node.is_leaf:
        if node.pred < 0.5:
            return f"\x1b[31m{node.pred}\x1b[0m-{node.path2}"
        else:
            return f"\x1b[32m{node.pred}\x1b[0m-{node.path2}"
    return node.path2

last_trees = []
for _, row in groups.iterrows():
    current_trees = row["tree_num"]
    tests = row["test"]
    print(f"Current Tests: {tests}, current trees: {current_trees}")
    unique_trees = sorted(set(current_trees) - set(last_trees))
    for tree_num in unique_trees:
        tree_num = int(tree_num)
        print(RenderTree(enum_trees_dict[tree_num][1][1]["tree"]).by_attr(render))
    example_tree = tests[0]
    last_trees = current_trees
    print("\n")


Current Tests: ["('fullscreen_iframe', 'child_sandbox')"], current trees: ['0']
root:root
├── [31m0.0[0m-status_code:['302']
└── [32m0.5[0m-status_code:['200']


Current Tests: ["('framing_embed', 'nested')" "('framing_embed', 'sandbox')"
 "('framing_object', 'nested')" "('framing_object', 'sandbox')"], current trees: ['1']
root:root
├── raw_header:['[[\'Content-Security-Policy\', "frame-ancestors \'none\'"], [\'X-Frame-Options\', \'INVALID\']]', '[[\'Content-Security-Policy\', "frame-ancestors \'none\'"], [\'X-Frame-Options\', \'SAMEORIGIN\']]', '[[\'Content-Security-Policy\', "frame-ancestors \'none\'"], [\'location\', \'https://sub.headers.websec.saarland/_hp/common/empty.html\']]', '[[\'Content-Security-Policy\', "frame-ancestors \'none\'"]]', '[[\'Content-Security-Policy\', "frame-ancestors \'none\', frame-ancestors *, frame-ancestors \'self\'"]]', '[[\'Content-Security-Policy\', "frame-ancestors \'self\'"]]', "[['Content-Security-Policy', '*'], ['X-Frame-Options', 'SAMEORIGIN

In [9]:
import asyncio

import ipywidgets as widgets
from IPython.display import SVG



out = widgets.Output()
out.layout.width='1800px'


def wait_for_click(btn):
    future = asyncio.Future()
    
    def on_button_clicked(btn):
        future.set_result(btn.description)
    
    btn.on_click(on_button_clicked)
    
    return future

btn = widgets.Button(description="Next Test")


global example_tree

async def f():
    global example_tree
    last_trees = []
    all_trees = set()
    for i, row in groups.iterrows():
        current_trees = row["tree_num"]
        tests = row["test"]
        out.append_stdout(f"{i+1}/{len(groups)}; Current tests: {tests}, \n current trees: {current_trees}\n")
        unique_trees = sorted(set(current_trees) - set(all_trees))
        out.append_stdout(f"Trees different to any previous test: {unique_trees}\n")
        for tree_num in unique_trees:
            tree_num = int(tree_num)
            out.append_stdout(RenderTree(enum_trees_dict[tree_num][1][1]["tree"]).by_attr(render))
            out.append_stdout("\n")
        example_tree = tests[0]
        out.append_stdout("\n")
        tree_name = f"{base_dir}/svg/{example_tree}.svg"
        out.append_stdout(tree_name)
        out.append_display_data(SVG(tree_name))
        for tree in current_trees:
            all_trees.add(tree)
        await wait_for_click(btn)
        out.outputs = ()
        
asyncio.ensure_future(f())

display(btn, out)

Button(description='Next Test', style=ButtonStyle())

Output(layout=Layout(width='1800px'))

## Path analysis?!

In [16]:
len(paths_dict)

641

In [21]:
paths_with_browser = {}
for path in paths_dict:
    if not "browser" in path:
        continue
    paths_with_browser[path] = paths_dict[path]
len(paths_with_browser)

419

In [26]:
len(trees_dict)

95

In [27]:
trees_with_browser = {}
for path in trees_dict:
    if "browser" in path:
        trees_with_browser[path] = trees_dict[path]
        
len(trees_with_browser)

71

In [36]:
for path in trees_with_browser:
    print(RenderTree(trees_with_browser[path]["tree"]))
    print("\n\n\n\n")

AnyNode(path2='root:root', split='raw_header')
├── AnyNode(path2='raw_header:[\'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'none\\\'"], [\\\'X-Frame-Options\\\', \\\'INVALID\\\']]\', \'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'none\\\'"], [\\\'X-Frame-Options\\\', \\\'SAMEORIGIN\\\']]\', \'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'none\\\'"], [\\\'location\\\', \\\'https://sub.headers.websec.saarland/_hp/common/empty.html\\\']]\', \'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'none\\\'"]]\', \'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'none\\\', frame-ancestors *, frame-ancestors \\\'self\\\'"]]\', \'[[\\\'Content-Security-Policy\\\', "frame-ancestors \\\'self\\\'"]]\', "[[\'Content-Security-Policy\', \'*\'], [\'X-Frame-Options\', \'SAMEORIGIN\']]", "[[\'Content-Security-Policy\', \'default-src *\']]", "[[\'Content-Security-Policy\', \'frame-ancestors *\'], [\'location\', \'https://sub.headers.websec.saarland/_hp/common/em