In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
import pandas as pd
import re
import json
import h2o

from datetime import datetime

from utils import get_data, Config, clean_url, make_clickable, add_columns

from tree import make_tree

In [16]:
# Load all data (except unknown/manual testing)
initial_data = """
SELECT "Result".*, 
"Response".raw_header, "Response".status_code, "Response".label, "Response".resp_type,
"Browser".name, "Browser".version, "Browser".headless_mode, "Browser".os, "Browser".automation_mode, "Browser".add_info
FROM "Result"
JOIN "Response" ON "Result".response_id = "Response".id JOIN "Browser" ON "Result".browser_id = "Browser".id
WHERE "Browser".name != 'Unknown';
"""
df = get_data(Config(), initial_data)
df = add_columns(df)

Connecting to the PostgreSQL database...
Connection successful


## Overview

In [17]:
# In total 3 runs + couple of "repeat"-mode runs
# Lot's of failures in xvfb mode!
df.groupby(["automation_mode", "browser_id", "name", "version", "os", "headless_mode"])["id"].count().sort_values(ascending=False).to_frame().reset_index()

Unnamed: 0,automation_mode,browser_id,name,version,os,headless_mode,id
0,selenium,14,firefox,119,Ubuntu 22.04,headless,506810
1,selenium,13,chrome,119,Ubuntu 22.04,headless-new,506394
2,selenium,15,edge,119,Ubuntu 22.04,headless-new,506394
3,selenium,17,brave,1.60.114 (119.0.6045.124),Ubuntu 22.04,headless-new,506357
4,selenium,20,edge,119,Ubuntu 22.04,xvfb,438937
5,selenium,19,firefox,119,Ubuntu 22.04,xvfb,412951
6,selenium,18,chrome,119,Ubuntu 22.04,xvfb,403415
7,selenium,22,brave,1.60.114 (119.0.6045.124),Ubuntu 22.04,xvfb,334811


## Error analysis

In [18]:
# Timeout (2), Fail (1), Not-run (3)
filtered_df = df.groupby(["browser"])["test_status"].filter(lambda x: x.nunique() >= 2)
display(df[["browser", "test_status"]].loc[df.index.isin(filtered_df.index)].groupby(["browser"])["test_status"].value_counts().to_frame())

df.groupby("test_status")["browser"].value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
browser,test_status,Unnamed: 2_level_1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,0,506068
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,1,288
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,2,1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,0,334679
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,1,132
chrome Ubuntu 22.04 119 selenium headless-new,0,506105
chrome Ubuntu 22.04 119 selenium headless-new,1,288
chrome Ubuntu 22.04 119 selenium headless-new,2,1
chrome Ubuntu 22.04 119 selenium xvfb,0,403225
chrome Ubuntu 22.04 119 selenium xvfb,1,188


Unnamed: 0_level_0,Unnamed: 1_level_0,count
test_status,browser,Unnamed: 2_level_1
0,chrome Ubuntu 22.04 119 selenium headless-new,506105
0,edge Ubuntu 22.04 119 selenium headless-new,506104
0,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,506068
0,firefox Ubuntu 22.04 119 selenium headless,505662
0,edge Ubuntu 22.04 119 selenium xvfb,438669
0,firefox Ubuntu 22.04 119 selenium xvfb,411975
0,chrome Ubuntu 22.04 119 selenium xvfb,403225
0,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,334679
1,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,288
1,chrome Ubuntu 22.04 119 selenium headless-new,288


In [19]:
# Teststatus == 1 (fail)

# All browsers have it
display(df[df["test_status"] == 1]["browser"].value_counts().to_frame())

# Only for upgradeHSTS_direct and upgradeHSTS_subdomain, they always result in test status 1 in the same browser!
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 1]["full_url"])].groupby(["test_name", "response_id"])["test_status"].value_counts().to_frame().head())

# The responses that cause it all have \x00 in them!
offending_headers = {}
visited_sets = set()
for group, gdf in df.loc[df["test_status"] == 1].groupby("browser"):
    unique_headers_set = frozenset(gdf["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())
    if unique_headers_set not in visited_sets:
        visited_sets.add(unique_headers_set)
        offending_headers[unique_headers_set] = set([group])
    else:
        offending_headers[unique_headers_set].add(group)
for key, value in offending_headers.items():
    print(f"{value}:\n{key}\n\n")

# The offending URLs (without timeout is from the repeat run)
# Firefox has none without timeout as these tests failed for all browsers, thus create_repeat.py did not create repeat runs for Firefox!
offending_urls = {}
visited_sets = set()

for group, gdf in df.loc[df["test_status"] == 1].groupby("browser"):
    unique_urls_set = frozenset(gdf["clean_url"].unique())
    print(group, len(unique_urls_set))

    if unique_urls_set not in visited_sets:
        visited_sets.add(unique_urls_set)
        offending_urls[unique_urls_set] = set([group])
    else:
        offending_urls[unique_urls_set].add(group)
for key, value in offending_urls.items():
    print(f"{value}:\n{key}\n\n")

# 9 URLs failed in all browsers, 9 only failed in Chromium-based browsers (~~18x in clean_url as we have one for repeat and one without~~ we now remove timeout in clean_url)
# Firefox only fails if \x00 is in the value, if it is in the header names it does not care?!
# (manual test: Safari seems to fail for HTTP only, does not fail if the response is received via HTTPS?, in addition Safari also only cares about \x00 in values and not in header names)

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
chrome Ubuntu 22.04 119 selenium headless-new,288
edge Ubuntu 22.04 119 selenium headless-new,288
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,288
edge Ubuntu 22.04 119 selenium xvfb,264
chrome Ubuntu 22.04 119 selenium xvfb,188
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,132
firefox Ubuntu 22.04 119 selenium headless,108
firefox Ubuntu 22.04 119 selenium xvfb,72


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
test_name,response_id,test_status,Unnamed: 3_level_1
upgradeHSTS_direct,29713,1,52
upgradeHSTS_direct,29745,1,52
upgradeHSTS_direct,29816,1,54
upgradeHSTS_direct,30133,1,52
upgradeHSTS_direct,30165,1,52


{'chrome Ubuntu 22.04 119 selenium headless-new', 'brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb', 'edge Ubuntu 22.04 119 selenium xvfb', 'edge Ubuntu 22.04 119 selenium headless-new', 'brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new'}:
frozenset({"[['strict-transport-security', '\\x00max-age=60']]", "[['strict-transport-security', 'max-age=60\\x00']]", "[['strict-transport-security', 'max-age=0\\x00']]", "[['\\x00strict-transport-security', 'max-age=60']]", "[['strict-transport-security\\x00', 'max-age=60']]", "[['\\x00strict-transport-security', 'max-age=0']]", "[['strict-transport-security', '\\x00max-age=0']]", "[['strict-transport-security', 'max-\\x00age=0']]", "[['strict-trans\\x00port-security', 'max-age=20; includeSubDomains']]", "[['strict-transport-security', 'max-a\\x00ge=60']]", "[['strict-trans\\x00port-security', 'max-age=60']]", "[['strict-transport-security\\x00', 'max-age=0']]", "[['strict-trans\\x00port-security', 'max-age=0']]", "[['\

In [20]:
# Teststatus == 2 (timeout)

# Firefox a lot, others rarely
display(df[df["test_status"] == 2]["browser"].value_counts().to_frame())

# fetch_GET|TEST always timeout, others only sometimes (accidental timeout in other browsers!), upgradeHSTS (see below with teststatus==3)
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 2]["full_url"])].groupby(["test_name"])["test_status"].value_counts().to_frame())

# The responses (for fetch_GET|TEST) that cause it all have \n in them?
display(df.loc[(df["test_status"] == 2) & (df["test_name"].isin(["fetch_GET", "fetch_TEST"]))]["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())

# The offending URLs (without timeout is from the repeat run)
display(df.loc[(df["test_status"] == 2) & (df["test_name"].isin(["fetch_GET", "fetch_TEST"]))]["clean_url"].unique())


# Firefox times out if \n in header?
# (Safari only times out for some of the tests, for the others fech fails. Chrome always fetch fails?!)

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
firefox Ubuntu 22.04 119 selenium headless,860
firefox Ubuntu 22.04 119 selenium xvfb,760
edge Ubuntu 22.04 119 selenium xvfb,4
edge Ubuntu 22.04 119 selenium headless-new,2
chrome Ubuntu 22.04 119 selenium xvfb,2
chrome Ubuntu 22.04 119 selenium headless-new,1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,1


Unnamed: 0_level_0,Unnamed: 1_level_0,count
test_name,test_status,Unnamed: 2_level_1
fetch_GET,2,1134
fetch_TEST,2,378
framing_iframe,2,2
framing_iframe,0,2
fullscreen_iframe,0,23
fullscreen_iframe,2,5
oac_window.open,0,2
oac_window.open,2,1
perfAPI_img,0,2
perfAPI_img,2,2


array(["[['access-control-allow-credentials', 'true\\n'], ['Test', 'Test'], ['access-control-allow-origin', 'https://sub.headers.websec.saarland'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['\\naccess-control-allow-origin', '*'], ['Test', 'Test'], ['access-control-allow-credentials', 'true'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['access-control-allow-origin', 'https://sub.headers.websec.saarland\\n'], ['Test', 'Test'], ['access-control-allow-credentials', 'true'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['access-control-allow-headers', '*\\n'], ['Test', 'Test'], ['access-control-allow-origin', 'https://sub.headers.websec.saarland'], ['access-control-allow-credentials', 'true'], ['access-control

array(['http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=http',
       'https://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=https',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAO&first_id=8028&last_id=8028&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAO&first_id=9689&last_id=9689&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAH&first_id=13505&last_id=13505&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACEH&first_id=14593&last_id=14593&scheme=http',
       'https://sub.head

In [21]:
# Teststatus == 3 (not run)

# Only Firefox
display(df[df["test_status"] == 3]["browser"].value_counts().to_frame())

# Only for upgradeHSTS_direct and upgradeHSTS_subdomain, the later always is not run, the former is sometimes 2 (timeout)
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 3]["full_url"])].groupby(["test_name", "response_id"])["test_status"].value_counts().to_frame())

# The responses that cause it all have \n in them?
display(df.loc[df["test_status"] == 3]["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())

# The offending URLs (without timeout is from the repeat run)
display(df.loc[df["test_status"] == 3]["clean_url"].unique())

# Reason if there is a newline in the response the fetch call will timeout in Firefox.
# As we implemented the testcases as promise tests, only the first test executes and all the others will always timeout.
# Chromium-based browsers do not time out and instead parse the fetch until the newline only and stop there. (same for safari, safari has loading icon in devtools however)

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
firefox Ubuntu 22.04 119 selenium headless,180
firefox Ubuntu 22.04 119 selenium xvfb,144


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
test_name,response_id,test_status,Unnamed: 3_level_1
upgradeHSTS_direct,29885,3,18
upgradeHSTS_direct,29885,2,18
upgradeHSTS_direct,30305,2,18
upgradeHSTS_direct,30305,3,18
upgradeHSTS_direct,30725,3,18
upgradeHSTS_direct,30725,2,18
upgradeHSTS_direct,31838,3,18
upgradeHSTS_direct,31838,2,18
upgradeHSTS_direct,32324,3,18
upgradeHSTS_direct,32324,2,18


array(["[['\\nstrict-transport-security', 'max-age=60']]",
       "[['\\nstrict-transport-security', 'max-age=0']]",
       "[['\\nstrict-transport-security', 'max-age=20; includeSubDomains']]",
       "[['strict-transport-security', 'max-age=0\\n']]",
       "[['strict-transport-security', 'max-age=20; includeSubDomains\\n']]",
       "[['strict-transport-security', 'max-age=60\\n']]"], dtype=object)

array(['http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=30305&last_id=30305&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=29885&last_id=29885&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=30725&last_id=30725&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=31838&last_id=31838&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=32695&last_id=32695&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=32324&last_id=32324&scheme=http'],
      dtype=object)

## General info/Test statistics

In [31]:
df[["browser", "test_name", "relation_info", "response_id", "org_scheme", "org_host", "resp_scheme", "resp_host"]].drop_duplicates().groupby("browser").count()

Unnamed: 0_level_0,test_name,relation_info,response_id,org_scheme,org_host,resp_scheme,resp_host
browser,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,168774,168774,168774,168774,168774,168774,168774
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,168774,168774,168774,168774,168774,168774,168774
chrome Ubuntu 22.04 119 selenium headless-new,168774,168774,168774,168774,168774,168774,168774
chrome Ubuntu 22.04 119 selenium xvfb,168770,168770,168770,168770,168770,168770,168770
edge Ubuntu 22.04 119 selenium headless-new,168774,168774,168774,168774,168774,168774,168774
edge Ubuntu 22.04 119 selenium xvfb,168774,168774,168774,168774,168774,168774,168774
firefox Ubuntu 22.04 119 selenium headless,168768,168756,168768,168768,168768,168756,168756
firefox Ubuntu 22.04 119 selenium xvfb,168768,168756,168768,168768,168768,168756,168756


In [22]:
df.groupby("browser")["clean_url"].nunique()

browser
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new    85128
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb            85128
chrome Ubuntu 22.04 119 selenium headless-new                         85128
chrome Ubuntu 22.04 119 selenium xvfb                                 85127
edge Ubuntu 22.04 119 selenium headless-new                           85128
edge Ubuntu 22.04 119 selenium xvfb                                   85128
firefox Ubuntu 22.04 119 selenium headless                            85128
firefox Ubuntu 22.04 119 selenium xvfb                                85128
Name: clean_url, dtype: int64

In [23]:
df.groupby("browser")["response_id"].nunique()

browser
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new    44226
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb            44226
chrome Ubuntu 22.04 119 selenium headless-new                         44226
chrome Ubuntu 22.04 119 selenium xvfb                                 44225
edge Ubuntu 22.04 119 selenium headless-new                           44226
edge Ubuntu 22.04 119 selenium xvfb                                   44226
firefox Ubuntu 22.04 119 selenium headless                            44226
firefox Ubuntu 22.04 119 selenium xvfb                                44226
Name: response_id, dtype: int64

In [25]:
df.groupby(["test_name", "relation_info"])["response_id"].nunique()

test_name                   relation_info 
accesswindow_direct         direct            2656
fetch_GET                   credentials       8059
                            custom_headers    8059
                            simple            8059
fetch_TEST                  custom_method     8059
framing_iframe              direct            8780
                            nested            8780
fullscreen_iframe           child_allow       3880
                            direct            3880
imgloading_iframe           direct            2710
oac_window.open             window.open       1699
perfAPI_img                 direct            1601
referrer_iframe             iframe            2797
script_execution_iframe     direct            2726
subresourceloadingCOEP_img  direct            2637
subresourceloadingCORP_img  direct            3357
upgradeHSTS_direct          direct            3324
upgradeHSTS_subdomain       subdomain         3324
Name: response_id, dtype: int64

## Stability

- Apart from a handfull of timeouts (~10/1M?) only a single test with different outcome in the three test repetitions! Very stable!

In [34]:
df_org = df
df = df.loc[df["test_status"] == 0]

In [35]:
for _, os_group in df.groupby("os"):
    stab = os_group.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "browser"])[["outcome_str"]].agg(["count", "nunique"])
    # Tests with more than one outcome
    diff_outcomes = stab.loc[stab[("outcome_str", "nunique")] != 1]
    print(f"{len(diff_outcomes)} tests have different outcomes!")
    display(diff_outcomes.reset_index()["browser"].value_counts().to_frame())
    display(diff_outcomes.reset_index()[["test_name", "browser"]].value_counts().to_frame())

    diff_with_responses = diff_outcomes.reset_index().groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "browser"])["response_id"].agg(list).to_frame().reset_index()


    for id, (test_name, org_scheme, org_host, resp_scheme, resp_host, relation_info, browser, response_ids) in diff_with_responses.iterrows():
        # TODO: shortcut, only show th first result for each test_name/browser; results are usually always the same!
        print(test_name, response_ids)
        rows = df.loc[(df["org_scheme"] == org_scheme) & (df["test_name"] == test_name) & (df["resp_scheme"] == resp_scheme) & (df["resp_host"] == resp_host) & (df["relation_info"] == relation_info) & (df["response_id"].isin(response_ids)) & (df["browser"] == browser)]
        with pd.option_context("display.max_colwidth", 200):
            display(rows[["outcome_str", "name"]].value_counts().to_frame())
            disp = rows.drop_duplicates(subset="outcome_str")[["browser", "outcome_str", "raw_header", "test_name", "org_scheme", "org_host", "resp_scheme", "relation_info", "resp_host", "full_url"]]
            disp = disp.style.format({'full_url': make_clickable})
            display(disp)
        # input("Continue!")

1 tests have different outcomes!


Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,1


Unnamed: 0_level_0,Unnamed: 1_level_0,count
"(test_name, )","(browser, )",Unnamed: 2_level_1
accesswindow_direct,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,1


accesswindow_direct [23242]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,name,Unnamed: 2_level_1
"{'window.open.opener': 'object ""[object Window]""'}",brave,2
{'window.open.opener': 'null'},brave,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
2721199,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,"{'window.open.opener': 'object ""[object Window]""'}",,accesswindow_direct,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=23242&last_id=23242&scheme=https
3563981,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,{'window.open.opener': 'null'},,accesswindow_direct,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=23242&last_id=23242&scheme=https


## Xvfb vs headless?! 
- Simply use the same code as for stability but use `name` (browser_name) instead of `browser`
- This currently works as we only have one os and version per desktop browser!

- Almost no differences! (total of 23/(168774*4))
- Some are due to timeouts in Xvfb (e.g., all of fullscreen_iframe and framing_iframe seem to be timeout in Xvfb) -> headless only is better?
- One systematic difference? accesswindow_direct (COOP) if the response triggers a download Brave headfull/Xvfb records a window reference (as long as the download popup is not closed), whereas headless record null; all other browsers records null as well? -> thus only testing headless should be okay? (responses with \n in headers trigger a download)

In [42]:
for _, os_group in df.groupby("os"):
    stab = os_group.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "name"])[["outcome_str"]].agg(["count", "nunique"])
    # Tests with more than one outcome
    diff_outcomes = stab.loc[stab[("outcome_str", "nunique")] != 1]
    print(f"{len(diff_outcomes)} tests have different outcomes!")
    display(diff_outcomes.reset_index()["name"].value_counts().to_frame())
    display(diff_outcomes.reset_index()[["test_name", "name"]].value_counts().to_frame())

    diff_with_responses = diff_outcomes.reset_index().groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "name"])["response_id"].agg(list).to_frame().reset_index()


    for id, (test_name, org_scheme, org_host, resp_scheme, resp_host, relation_info, browser, response_ids) in diff_with_responses.iterrows():
        # TODO: shortcut, only show th first result for each test_name/browser; results are usually always the same!
        print(test_name, response_ids)
        rows = df.loc[(df["org_scheme"] == org_scheme) & (df["test_name"] == test_name) & (df["resp_scheme"] == resp_scheme) & (df["resp_host"] == resp_host) & (df["relation_info"] == relation_info) & (df["response_id"].isin(response_ids)) & (df["name"] == browser)]
        with pd.option_context("display.max_colwidth", 200):
            display(rows[["outcome_str", "browser"]].value_counts().to_frame())
            disp = rows.drop_duplicates(subset="outcome_str")[["browser", "outcome_str", "raw_header", "test_name", "org_scheme", "org_host", "resp_scheme", "relation_info", "resp_host", "full_url"]]
            disp = disp.style.format({'full_url': make_clickable})
            display(disp)
        # input("Continue!")

23 tests have different outcomes!


Unnamed: 0_level_0,count
name,Unnamed: 1_level_1
chrome,12
brave,11


Unnamed: 0_level_0,Unnamed: 1_level_0,count
"(test_name, )","(name, )",Unnamed: 2_level_1
accesswindow_direct,brave,11
fullscreen_iframe,chrome,10
framing_iframe,chrome,2


accesswindow_direct [21640, 22059, 22478, 23242, 23606, 23990]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
{'window.open.opener': 'null'},brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,18
"{'window.open.opener': 'object ""[object Window]""'}",brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,9


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
21273,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,{'window.open.opener': 'null'},,accesswindow_direct,http,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=22059&last_id=22059&scheme=http
42685,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,"{'window.open.opener': 'object ""[object Window]""'}",,accesswindow_direct,http,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=22059&last_id=22059&scheme=http


accesswindow_direct [21640, 22059, 22478, 23242, 23606]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
{'window.open.opener': 'null'},brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,15
"{'window.open.opener': 'object ""[object Window]""'}",brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,10
{'window.open.opener': 'null'},brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
63380,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,{'window.open.opener': 'null'},,accesswindow_direct,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=22059&last_id=22059&scheme=https
76530,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,"{'window.open.opener': 'object ""[object Window]""'}",,accesswindow_direct,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=parsing&browser_id=1&label=COOP&first_id=22059&last_id=22059&scheme=https


framing_iframe [6163, 7109]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
message send,chrome Ubuntu 22.04 119 selenium headless-new,6
message timeout,chrome Ubuntu 22.04 119 selenium xvfb,2


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
1217718,chrome Ubuntu 22.04 119 selenium headless-new,message send,,framing_iframe,https,sub.headers.websec.saarland,https,nested,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/framing.sub.html?resp_type=parsing&browser_id=1&label=CSP-FA&first_id=6163&last_id=6163&scheme=https
2048899,chrome Ubuntu 22.04 119 selenium xvfb,message timeout,,framing_iframe,https,sub.headers.websec.saarland,https,nested,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/framing.sub.html?resp_type=parsing&browser_id=1&label=CSP-FA&first_id=6163&last_id=6163&scheme=https


fullscreen_iframe [36037, 37381, 37819, 38110, 38508]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
fullscreenEnabled: true,chrome Ubuntu 22.04 119 selenium headless-new,15
message timeout,chrome Ubuntu 22.04 119 selenium xvfb,5


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
1822472,chrome Ubuntu 22.04 119 selenium headless-new,fullscreenEnabled: true,,fullscreen_iframe,https,sub.headers.websec.saarland,https,child_allow,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/fullscreen-api-pp.sub.html?resp_type=parsing&browser_id=1&label=PP&first_id=37381&last_id=37381&scheme=https
2882440,chrome Ubuntu 22.04 119 selenium xvfb,message timeout,,fullscreen_iframe,https,sub.headers.websec.saarland,https,child_allow,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/fullscreen-api-pp.sub.html?resp_type=parsing&browser_id=1&label=PP&first_id=37381&last_id=37381&scheme=https


fullscreen_iframe [36037, 37381, 37819, 38110, 38508]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
fullscreenEnabled: true,chrome Ubuntu 22.04 119 selenium headless-new,15
message timeout,chrome Ubuntu 22.04 119 selenium xvfb,5


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
1822471,chrome Ubuntu 22.04 119 selenium headless-new,fullscreenEnabled: true,,fullscreen_iframe,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/fullscreen-api-pp.sub.html?resp_type=parsing&browser_id=1&label=PP&first_id=37381&last_id=37381&scheme=https
2882439,chrome Ubuntu 22.04 119 selenium xvfb,message timeout,,fullscreen_iframe,https,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,https://sub.headers.websec.saarland/_hp/tests/fullscreen-api-pp.sub.html?resp_type=parsing&browser_id=1&label=PP&first_id=37381&last_id=37381&scheme=https


## Browser differences!