In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import re
import json
import h2o

from datetime import datetime

from utils import get_data, Config, clean_url, make_clickable, add_columns

from tree import make_tree

In [97]:
# Load all data (except unknown/manual testing)
# Original Desktop Runs+Re-Runs were performed between 2024-02-05 and 2024-02-12
# Additional browsers + more than one response_id per parsing URL was run between 2024-02-16 and TBD
# Mobile runs performed between 2024-02-28 and TBD
initial_data = """
SELECT "Result".*, 
"Response".raw_header, "Response".status_code, "Response".label, "Response".resp_type,
"Browser".name, "Browser".version, "Browser".headless_mode, "Browser".os, "Browser".automation_mode, "Browser".add_info
FROM "Result"
JOIN "Response" ON "Result".response_id = "Response".id JOIN "Browser" ON "Result".browser_id = "Browser".id
WHERE "Browser".name != 'Unknown' and "Response".resp_type = 'basic' and ("Browser".os = 'iPadOS 17.3.1' or "Browser".os = 'macOS 14.3'); -- AND "Result".created_at < '2024-02-19';
"""
df = get_data(Config(), initial_data)
df = add_columns(df)

Connecting to the PostgreSQL database...
Connection successful


In [98]:
df["raw_header"] = df["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str)
def create_test_id(row):
    return f'{row["test_name"]}_{row["relation_info"]}_{row["org_scheme"]}_{row["org_host"]}_{row["resp_scheme"]}_{row["resp_host"]}_{row["response_id"]}_{row["resp_type"]}'

df["browser_id"] = df["browser_id"].astype("category")
# Takes a while (500s+) (might be faster to already do it with postgres but not too important)
df["test_id"] = df.apply(create_test_id, axis=1)
df["test_id"] = df["test_id"].astype("category")

In [99]:
responses = """
SELECT * from "Response";
"""
responses = get_data(Config(), responses)

Connecting to the PostgreSQL database...
Connection successful


# Overview

In [100]:
# Both basic + parsing mode
# Each test should have results 5+ times (such that we can better reason about noise + majority voting makes sense)
# Basic was executed 20x+ on Linux, 5x on Mac
# Parsing was executed 5+ on Linux, 2x on Mac (currently executing a third time)
# (In addition, a couple of "repeat"-mode runs were performed in the beginning
df.groupby(["automation_mode", "browser_id", "name", "version", "os", "headless_mode"], observed=True)["id"].count().sort_values(ascending=False).to_frame().reset_index()

Unnamed: 0,automation_mode,browser_id,name,version,os,headless_mode,id
0,selenium,46,safari,17.3,macOS 14.3,real,60984
1,intent,54,brave,1.62 (24.2.9.10),iPadOS 17.3.1,real,58312


# Error + Timeout Analysis
- Only timeout exist, no other systematic failures anymore
- Systematic timeout: TAO in Safari -> fixed
- Couple of random timeouts in other tests
  - More basic tests than parsing tests (even though less exist)
  - Code 302 more often than others
  - referrer-iframe, oac_*, framing_iframe most often

In [101]:
# Timeout (2), Fail (1), Not-run (3)
filtered_df = df.groupby(["browser"])["test_status"].filter(lambda x: x.nunique() >= 2)
display(df[["browser", "test_status"]].loc[df.index.isin(filtered_df.index)].groupby(["browser"])["test_status"].value_counts().to_frame())

Unnamed: 0_level_0,Unnamed: 1_level_0,count
browser,test_status,Unnamed: 2_level_1
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,51195
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,2,7117
safari macOS 14.3 17.3 selenium real,0,60752
safari macOS 14.3 17.3 selenium real,2,232


In [102]:
# Teststatus == 2 (timeout)

# Rare timeouts in all browsers; Safari some more than others
display(df[df["test_status"] == 2]["browser"].value_counts().to_frame())

# Mostly 302 redirects for basic (oac_iframe, oac_window.open), referrer_iframe 
# perfAPI_img/TAO (only Safari, fixed!)
# Others quite rare
with pd.option_context("display.max_rows", 80):
    display(df.loc[(df["test_status"] == 2)].groupby(["resp_type", "status_code", "test_name"])["browser"].value_counts().to_frame().sort_values(by="count", ascending=False))

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,7117
safari macOS 14.3 17.3 selenium real,232


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
resp_type,status_code,test_name,browser,Unnamed: 4_level_1
basic,200,referrer_iframe,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,2723
basic,200,oac_window.open,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1771
basic,200,oac_iframe,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,810
basic,302,oac_iframe,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,798
basic,302,referrer_iframe,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,616
basic,302,oac_window.open,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,399
basic,200,referrer_iframe,safari macOS 14.3 17.3 selenium real,118
basic,302,referrer_iframe,safari macOS 14.3 17.3 selenium real,74
basic,200,oac_iframe,safari macOS 14.3 17.3 selenium real,24
basic,302,oac_iframe,safari macOS 14.3 17.3 selenium real,10


In [103]:
# Teststatus == 3 (not-run)

# Rare timeouts in all browsers; Safari some more than others
display(df[df["test_status"] == 3]["browser"].value_counts().to_frame())

# Mostly 302 redirects for basic (oac_iframe, oac_window.open), referrer_iframe 
# perfAPI_img/TAO (only Safari, fixed!)
# Others quite rare
display(df.loc[(df["test_status"] == 3)].groupby(["resp_type", "status_code", "test_name"])["browser"].value_counts().to_frame().sort_values(by="count", ascending=False))

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
resp_type,status_code,test_name,browser,Unnamed: 4_level_1


# General Test Statistics

In [104]:
df[["browser", "resp_type", "test_name", "relation_info", "response_id", "org_scheme", "org_host", "resp_scheme", "resp_host"]].drop_duplicates().groupby(["resp_type", "browser"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,test_name,relation_info,response_id,org_scheme,org_host,resp_scheme,resp_host
resp_type,browser,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
basic,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,10216,10216,10216,10216,10216,10216,10216
basic,safari macOS 14.3 17.3 selenium real,10456,10456,10456,10456,10456,10456,10456


In [74]:
168774 + 10456

179230

In [105]:
# Some browsers only used the more response_ids per page mode, thus these numbers don't say much
df.groupby("browser")["clean_url"].nunique()

browser
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real    353
safari macOS 14.3 17.3 selenium real                364
Name: clean_url, dtype: int64

In [106]:
# Number of response_ids tested (should be 44415)
# Many response_ids have more than one test (e.g., both iframe and window.open) thus this is not enough to show that all tests have results!
df.groupby("browser")["response_id"].nunique()

browser
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real    187
safari macOS 14.3 17.3 selenium real                189
Name: response_id, dtype: int64

In [107]:
df.loc[df["test_status"] == 0].groupby("browser")["test_id"].nunique()

browser
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real     9646
safari macOS 14.3 17.3 selenium real                10456
Name: test_id, dtype: int64

In [108]:
# Make sure that all responses actually have responses
# (create_repeat.py assumes that at least one browser collected the results)

r1 = df.groupby("label")["response_id"].nunique().sort_values()
r2 = responses.loc[responses["resp_type"] != "debug"].groupby("label")["id"].nunique().sort_values()

pd.concat([r1, r2], axis=1)

Unnamed: 0_level_0,response_id,id
label,Unnamed: 1_level_1,Unnamed: 2_level_1
CORS,6.0,6
CSPvsXFO,7.0,1447
COOP,11.0,2667
COEP,11.0,2648
CORP,11.0,3368
XFO,11.0,4607
TAO,13.0,1614
HSTS,14.0,3338
OAC,14.0,1713
RP,16.0,2813


In [109]:
# Number of response_ids for each "group": resp_type, test_name, relation_info
df.groupby(["resp_type", "test_name", "relation_info"])["response_id"].nunique().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,response_id
resp_type,test_name,relation_info,Unnamed: 3_level_1
basic,accesswindow_direct,direct,11
basic,fetch_GET,credentials,6
basic,fetch_GET,custom_headers,6
basic,fetch_GET,simple,6
basic,fetch_TEST,custom_method,6
basic,framing_embed,direct,35
basic,framing_embed,nested,35
basic,framing_embed,sandbox,35
basic,framing_iframe,direct,35
basic,framing_iframe,nested,35


# Stability (Results after 2x with only one response_id per parsing URL test)
- Basic tests:
    - 21/(10456x5) tests with different outcomes!
    - 13x firefox subresourceloadingCOEP_img (random behavior bug)
    - 6x timeout vs message 4(x edge, 2x brave)
    - 1x brave referrer different outcome
    - 1x safari COEP different outcome?
- Parsing tests:
    - 755/(168774x5) tests with different outcomes!
    - quite a lot! last time we only had 1 single test with a different outcome? (We used different versions of the browsers)
    - 3x accesswindow_direct (null vs accessible), 1x fullscreen_iframe (timeout vs message), 1x img_loading_iframe (timeout vs message)
    - All others (700+) upgradeHSTS_subdomain and upgradeHSTS_direct
        - Prevalence: `brave ~= edge > chrome >> firefox >> safari`
        - Hints at systematic issue: maybe the reset did not work or something similar?
        - Mostly timeout vs redirect=false; however some are also timeout vs redirect=false vs redirect=true!
        - Timeout maybe related to the HTTPS upgrade feature in chromium-based browsers which can take some time (>5s?)? (Non-Authoritative-Reason: HttpsUpgrades)
        - Unclear how redirect true vs false could happen? Maybe HSTS clearing did not work?
        - Manual testing always results in redirect=false for all tried clean_urls!

### Update: multiple response_ids per parsing URL test
- basic (123+)
    - mostly firefox subresourceloadingCOEP_img (random behavior bug)
    - some rare instances of other tests (timeouts)
- parsing (5000+)
    - fetch_* timeout issue in firefox (fixed)
    - subresourceloadingCOEP_img and imgloading_iframe timeout issues (other browsers) (probably fixed)
    - upgradeHSTS_subdomain + upgradeHSTS_direct
    - rare instances of other tests
- mostly timeouts?: Firefox fetch, other browsers subresourceloadingCOEP_img
- maybe we have too many tests per page/or the timeout is not high enough -> decreased the number of tests for some feature groups (now it seems better)
- (OT: if running `desktop_selenium.py` in the repeat mode, one has to increate `--timeout_task` (15*100 = 1500s > 1000) or decrease `--max_urls_until_restart` as we ran the repeat tests with a 3xTIMEOUT)

In [110]:
# Remove all tests that timed out (test_status == 0), each test has at least one result that did not time out in each browser
df_org = df
df = df.loc[df["test_status"] == 0]

In [111]:
details = True
for group_name, resp_type_group in df.groupby("resp_type"):
    #if group_name == "basic":
    #    continue
    print(group_name)
    stab = resp_type_group.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "browser"])[["outcome_str"]].agg(["count", "nunique"])    # Tests with more than one outcome
    diff_outcomes = stab.loc[stab[("outcome_str", "nunique")] != 1]
    print(f"{len(diff_outcomes)} tests have different outcomes!")
    display(diff_outcomes.reset_index()["browser"].value_counts().to_frame())
    with pd.option_context("display.max_rows", 100):
        display(diff_outcomes.reset_index()[["test_name", "browser"]].value_counts().to_frame())

    diff_with_responses = diff_outcomes.reset_index().groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "browser"])["response_id"].agg(list).to_frame().reset_index()

    if not details:
        continue
    for id, (test_name, org_scheme, org_host, resp_scheme, resp_host, relation_info, browser, response_ids) in diff_with_responses.iterrows():
        # TODO: shortcut, only show the first result for each test_name/browser; results are usually always the same!
        print(test_name, response_ids)
        rows = df.loc[(df["org_scheme"] == org_scheme) & (df["test_name"] == test_name) & (df["resp_scheme"] == resp_scheme) & (df["resp_host"] == resp_host) & (df["relation_info"] == relation_info) & (df["response_id"].isin(response_ids)) & (df["browser"] == browser)]
        with pd.option_context("display.max_colwidth", 200):
            display(rows[["outcome_str", "browser"]].value_counts().to_frame())
            disp = rows.drop_duplicates(subset="outcome_str")[["browser", "outcome_str", "raw_header", "test_name", "org_scheme", "org_host", "resp_scheme", "relation_info", "resp_host", "full_url"]]
            disp = disp.style.format({'full_url': make_clickable})
            display(disp)
        # input("Continue!")

basic
9 tests have different outcomes!


Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,7
safari macOS 14.3 17.3 selenium real,2


Unnamed: 0_level_0,Unnamed: 1_level_0,count
"(test_name, )","(browser, )",Unnamed: 2_level_1
subresourceloadingCOEP_img,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,7
accesswindow_direct,safari macOS 14.3 17.3 selenium real,2


accesswindow_direct [81]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
{'window.open.opener': 'null'},safari macOS 14.3 17.3 selenium real,6
"{'window.open.opener': 'object ""[object Window]""'}",safari macOS 14.3 17.3 selenium real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
5183,safari macOS 14.3 17.3 selenium real,"{'window.open.opener': 'object ""[object Window]""'}","[['Cross-Origin-Opener-Policy', 'same-origin'], ['location', 'https://sub.headers.websec.saarland/_hp/common/empty.html']]",accesswindow_direct,http,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=basic&browser_id=1&label=COOP&first_id=81&last_id=81&scheme=http
15442,safari macOS 14.3 17.3 selenium real,{'window.open.opener': 'null'},"[['Cross-Origin-Opener-Policy', 'same-origin'], ['location', 'https://sub.headers.websec.saarland/_hp/common/empty.html']]",accesswindow_direct,http,sub.headers.websec.saarland,https,direct,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=basic&browser_id=1&label=COOP&first_id=81&last_id=81&scheme=http


accesswindow_direct [81]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
{'window.open.opener': 'null'},safari macOS 14.3 17.3 selenium real,6
"{'window.open.opener': 'object ""[object Window]""'}",safari macOS 14.3 17.3 selenium real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
5181,safari macOS 14.3 17.3 selenium real,"{'window.open.opener': 'object ""[object Window]""'}","[['Cross-Origin-Opener-Policy', 'same-origin'], ['location', 'https://sub.headers.websec.saarland/_hp/common/empty.html']]",accesswindow_direct,http,sub.headers.websec.saarland,https,direct,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=basic&browser_id=1&label=COOP&first_id=81&last_id=81&scheme=http
15440,safari macOS 14.3 17.3 selenium real,{'window.open.opener': 'null'},"[['Cross-Origin-Opener-Policy', 'same-origin'], ['location', 'https://sub.headers.websec.saarland/_hp/common/empty.html']]",accesswindow_direct,http,sub.headers.websec.saarland,https,direct,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?resp_type=basic&browser_id=1&label=COOP&first_id=81&last_id=81&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,4
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68635,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
79906,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,3
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,2


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68627,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
79833,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,3
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,2


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
70764,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
79829,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,4
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68631,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
112550,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,http,sandbox,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,4
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68637,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
101768,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,headers.webappsec.eu,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,4
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68625,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
101741,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


subresourceloadingCOEP_img [59]


Unnamed: 0_level_0,Unnamed: 1_level_0,count
outcome_str,browser,Unnamed: 2_level_1
"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,4
"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}",brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,1


Unnamed: 0,browser,outcome_str,raw_header,test_name,org_scheme,org_host,resp_scheme,relation_info,resp_host,full_url
68633,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http
101749,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","[['Cross-Origin-Embedder-Policy', 'require-corp']]",subresourceloadingCOEP_img,http,sub.headers.websec.saarland,https,sandbox,sub.sub.headers.websec.saarland,http://sub.headers.websec.saarland/_hp/tests/subresource-loading-coep.sub.html?resp_type=basic&browser_id=1&label=COEP&first_id=59&last_id=59&scheme=http


## More stability?!
- How many outcomes did we collect per test in the different browsers? Also shows tests with only one outcome
- How many of them are different?

In [112]:
# How many outcomes did we collect for each test?!
# Should be between 5 and 20+ (less than 5 is concerning as we cannot do proper majority voting in such cases)

final_df = pd.DataFrame()

for group, d in df.groupby("browser"):
    # Calculate counts and unique counts
    aggs = ["count"] # ["count", "nunique"]
    res = d.groupby(["test_id"], observed=True)["outcome_str"].agg(aggs)
    # Get value counts for each combination of counts and unique counts
    counts = res.value_counts().to_frame()    
    counts = counts.rename(columns={"count": group})
    display(counts)
    final_df = pd.concat([final_df, counts], axis=1)

display(final_df)

Unnamed: 0_level_0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
count,Unnamed: 1_level_1
5,9152
2,56
12,50
3,43
9,41
21,40
13,35
10,31
6,26
1,24


Unnamed: 0_level_0,safari macOS 14.3 17.3 selenium real
count,Unnamed: 1_level_1
6,4066
5,3773
7,2203
4,313
9,84
3,10
2,4
8,3


Unnamed: 0_level_0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
count,Unnamed: 1_level_1,Unnamed: 2_level_1
5,9152,3773.0
2,56,4.0
12,50,
3,43,10.0
9,41,84.0
21,40,
13,35,
10,31,
6,26,4066.0
1,24,


In [113]:
# Which tests have how many outcomes?

grouped = df.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "browser", "resp_type"])['outcome_str'].count().reset_index()

with pd.option_context("display.max_rows", 256):
    res = grouped.groupby(["browser", "resp_type", "test_name"])["outcome_str"].agg(["mean", "min", "max", "count", "sum"]).sort_values(by="mean", ascending=False)
    display(res.describe())
    display(res.loc[res["mean"] > 0])

Unnamed: 0,mean,min,max,count,sum
count,36.0,36.0,36.0,36.0,36.0
mean,6.313374,5.0,8.416667,558.388889,3109.638889
std,2.589629,1.454058,9.156965,608.267388,3140.619249
min,4.153846,1.0,5.0,20.0,140.0
25%,5.0,5.0,5.0,176.0,880.0
50%,5.486607,5.0,6.0,280.0,1672.0
75%,7.0,5.0,7.0,544.0,3724.0
max,15.961905,7.0,44.0,1680.0,9240.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,min,max,count,sum
browser,resp_type,test_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,basic,oac_iframe,15.961905,4,44,210,3352
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,basic,oac_window.open,15.5,4,44,20,310
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,basic,referrer_iframe,10.479167,3,21,144,1509
safari macOS 14.3 17.3 selenium real,basic,accesswindow_direct,7.0,7,7,176,1232
safari macOS 14.3 17.3 selenium real,basic,subresourceloadingCOEP_img,7.0,7,7,352,2464
safari macOS 14.3 17.3 selenium real,basic,script_execution_iframe,7.0,7,7,544,3808
safari macOS 14.3 17.3 selenium real,basic,subresourceloadingCORP_img,7.0,7,7,528,3696
safari macOS 14.3 17.3 selenium real,basic,imgloading_iframe,7.0,7,7,272,1904
safari macOS 14.3 17.3 selenium real,basic,upgradeHSTS_direct,7.0,7,7,28,196
safari macOS 14.3 17.3 selenium real,basic,upgradeHSTS_subdomain,7.0,7,7,28,196


In [114]:
# Different outcomes (in percentage for a test group (test_name)
# (Does not take into account how often each test was executed)
grouped = df.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "browser", "resp_type"])['outcome_str'].nunique().reset_index()

grouped["diff_outcome"] = grouped["outcome_str"] > 1
with pd.option_context("display.max_rows", 100):
    res = grouped.groupby(["browser", "resp_type", "test_name"])["diff_outcome"].agg(["mean", "count", "sum"]).sort_values(by="mean", ascending=False)
    display(res.describe())
    display(res.loc[res["mean"] > 0])

Unnamed: 0,mean,count,sum
count,36.0,36.0,36.0
mean,0.000868,558.388889,0.25
std,0.00377,608.267388,1.204159
min,0.0,20.0,0.0
25%,0.0,176.0,0.0
50%,0.0,280.0,0.0
75%,0.0,544.0,0.0
max,0.019886,1680.0,7.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,count,sum
browser,resp_type,test_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,basic,subresourceloadingCOEP_img,0.019886,352,7
safari macOS 14.3 17.3 selenium real,basic,accesswindow_direct,0.011364,176,2


In [None]:
# Run the test groups with many different outcomes again?
# Groups with non-negligble different outcomes are subresourceloadingCOEP_img, upgradeHSTS_subdomain, upgradeHSTS_direct, imgloading_iframe, fetch_TEST, fetch_GET
test_files_to_repeat =  ["fetch-cors.sub.html", "subresource-loading-coep.sub.html", "subresource-loading-csp.sub.html", "upgrade-hsts.sub.html"]

# TODO: implement utils.py/desktop_selenium mode to only rerun these files?!
# For now, simply run everything again to get some more results for stability?!

In [None]:
# Majority voting for the tests with multiple outcomes
df.sample(100).groupby(["test_name", "relation_info", "browser", "org_host", "resp_scheme", "org_origin", "resp_origin", "response_id"])["outcome_str"].agg([pd.Series.mode, "nunique", "count"]).reset_index().sort_values("count")

# Browser differences!
- First perform majority voting
- Then diff the browsers

In [115]:
df = df_org
df = df.loc[df["test_status"] == 0]

In [116]:
df_stab = df

# Only keep one row for each test
# If the test had non-deterministic results, use majority voting! (only works reliably if there a enough repetitions of the tests runs, >= 5)

print("Original data entries", len(df))

# Majority voting (pd.Series.mode returns the most frequest item) (quite slow)

df = df.groupby(["test_id", "browser"], observed=True)["outcome_str"].agg(pd.Series.mode).reset_index()
print("Only one row for each test (per browser; majority voting)", len(df))

Original data entries 111947
Only one row for each test (per browser; majority voting) 20102


In [117]:
# Merge back additional required properties!
# ["name", "test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "resp_type", "label", 'org_origin', 'resp_origin', 'status_code', 'raw_header']

df = df.merge(df_stab.drop_duplicates(subset=["test_id", "browser"]), on=["test_id", "browser"], how="left", suffixes=["", "_ignore"])


In [118]:
# If several values occur the same, the mode is an np.ndarray and not a string!
# What to do in such cases? If we have more repetitions the issue should probably go away automatically?
# If we have exactly 5 repetitions for each test, the issue cannot exist! (in the debug phase we sometimes have more or less than 5 repetitions and the issue can exist)
print("Tests with the same mode (highest frequency outcome):")
display(df.loc[df['outcome_str'].apply(lambda x: isinstance(x, np.ndarray))].groupby(["browser", "resp_type", "test_name"])["outcome_str"].count())

# In the rare cases, where it actually matters, we can just take the first? (this might be noise in the generated trees)
df['outcome_str'] = df['outcome_str'].apply(lambda x: x[0] if isinstance(x, np.ndarray) else x)

Tests with the same mode (highest frequency outcome):


Series([], Name: outcome_str, dtype: int64)

In [119]:
# Most tests have exactly one outcome, quite some have two, couple have three!
res = df.groupby(["resp_type", "test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id", "test_id"], observed=True)
res = res["outcome_str"].agg("nunique")
display(res.value_counts().to_frame())

Unnamed: 0_level_0,count
outcome_str,Unnamed: 1_level_1
1,9878
2,578


In [120]:
# Basic:
# subresourceloadingCORP_object has different results for 100% of tests
# couple of others have 20%+ differences
# couple have 0%

# Parsing:
# oac_window.open has 99%
# Other test groups have between 0.5 - 7.5% of tests with different outcomes
r = res.reset_index()
r["diff"] = r["outcome_str"] != 1
r.groupby(["resp_type", "test_name", "relation_info"])["diff"].agg(["count", "sum", "mean"]).sort_values("mean", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum,mean
resp_type,test_name,relation_info,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
basic,accesswindow_direct,direct,176,176,1.0
basic,fullscreen_iframe,child_sandbox,384,176,0.458333
basic,fullscreen_iframe,child,384,132,0.34375
basic,perfAPI_img,direct,208,52,0.25
basic,referrer_iframe,window.open,256,25,0.097656
basic,oac_window.open,window.open,224,16,0.071429
basic,subresourceloadingCORP_img,direct,176,1,0.005682
basic,fetch_GET,custom_headers,96,0,0.0
basic,fetch_GET,credentials,96,0,0.0
basic,fetch_GET,simple,96,0,0.0


In [121]:
# Select only test rows with more than one outcome
tests_with_mult_outcomes = r.loc[r["diff"]].set_index(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id"])
df_mult = df.loc[df[["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id"]].apply(tuple, axis=1).isin(tests_with_mult_outcomes.index)]
tests_with_more_than_one_outcome = df_mult.drop_duplicates(subset=["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id"])


all_tests_should_be =  df_org.loc[df_org["test_status"] == 0]["test_id"].nunique() * df_org["browser"].nunique() # (num parsing tests + num_basic_tests) * num_browsers (168774+10456)
print(f"All test rows: {len(df)}, should be: {all_tests_should_be}, Rows with more than one outcome: {len(df_mult)}, Tests with more than one outcome: {len(tests_with_more_than_one_outcome)}")

All test rows: 20102, should be: 20912, Rows with more than one outcome: 1156, Tests with more than one outcome: 578


In [122]:
# Display difference groups
for grouping, group in df_mult.groupby(["test_name", "relation_info"]):
    print(grouping)
    df_list = []
    for _, one_test in group.groupby(["test_name", "org_scheme", "org_host", "resp_scheme", "resp_host", "relation_info", "response_id"]):
        browsers = one_test.groupby("outcome_str")["browser"].unique().apply(list).apply(sorted).to_frame()
        new = True
        for df_b in df_list:
            if df_b.equals(browsers):
                new = False
        if new:
            df_list.append(browsers)
    print(len(df_list))
    for df_b in df_list:
        with pd.option_context("display.max_colwidth", 1000):
            display(df_b)
    #input("Continue!")

('accesswindow_direct', 'direct')
2


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
"{'window.open.opener': 'object ""TypeError: null is not an object (evaluating \'w.opener\')""'}",[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]
"{'window.open.opener': 'object ""[object Window]""'}",[safari macOS 14.3 17.3 selenium real]


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
{'window.open.opener': 'null'},[safari macOS 14.3 17.3 selenium real]
"{'window.open.opener': 'object ""TypeError: null is not an object (evaluating \'w.opener\')""'}",[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


('fullscreen_iframe', 'child')
1


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
fullscreenEnabled: false,[safari macOS 14.3 17.3 selenium real]
fullscreenEnabled: true,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


('fullscreen_iframe', 'child_sandbox')
1


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
fullscreenEnabled: false,[safari macOS 14.3 17.3 selenium real]
fullscreenEnabled: true,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


('oac_window.open', 'window.open')
1


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
message timeout,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]
window.originAgentCluster: undefined,[safari macOS 14.3 17.3 selenium real]


('perfAPI_img', 'direct')
2


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
No performance entry,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]
{'requestStart != 0': True},[safari macOS 14.3 17.3 selenium real]


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
No performance entry,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]
{'requestStart != 0': False},[safari macOS 14.3 17.3 selenium real]


('referrer_iframe', 'window.open')
3


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
document.referrer: full_url,[safari macOS 14.3 17.3 selenium real]
message timeout,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
document.referrer:,[safari macOS 14.3 17.3 selenium real]
message timeout,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
document.referrer: http://sub.headers.websec.saarland/,[safari macOS 14.3 17.3 selenium real]
message timeout,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


('subresourceloadingCORP_img', 'direct')
1


Unnamed: 0_level_0,browser
outcome_str,Unnamed: 1_level_1
error,[safari macOS 14.3 17.3 selenium real]
load,[brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real]


In [123]:
# Other approach to list the difference groups
def get_uniques(df):
    browsers = f"{df['browser'].unique().tolist()}"
    try:
        outcome = unique_outcomes[df.name[0]]
    except KeyError:
        outcome = {}
        #outcome = []
    outcome[browsers] = df.name[1]
    #outcome.append(f"{df.name[1]} ({browsers})")
    unique_outcomes[df.name[0]] = outcome

for grouping, group in df_mult.groupby(["test_name", "relation_info"]):
    print(grouping)
    unique_outcomes = {}
    group.groupby(["browser"])["outcome_str"].value_counts(normalize=False).reset_index().groupby(["outcome_str", "count"], group_keys=True).apply(get_uniques)
    d = pd.DataFrame.from_dict(unique_outcomes, orient="index")
    if d.shape[1] != 1:
        display(d)
        print()

('accesswindow_direct', 'direct')


Unnamed: 0,['safari macOS 14.3 17.3 selenium real'],['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real']
{'window.open.opener': 'null'},32.0,
"{'window.open.opener': 'object ""[object Window]""'}",144.0,
"{'window.open.opener': 'object ""TypeError: null is not an object (evaluating \'w.opener\')""'}",,176.0



('fullscreen_iframe', 'child')


Unnamed: 0,['safari macOS 14.3 17.3 selenium real'],['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real']
fullscreenEnabled: false,132.0,
fullscreenEnabled: true,,132.0



('fullscreen_iframe', 'child_sandbox')


Unnamed: 0,['safari macOS 14.3 17.3 selenium real'],['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real']
fullscreenEnabled: false,176.0,
fullscreenEnabled: true,,176.0



('oac_window.open', 'window.open')


Unnamed: 0,['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real'],['safari macOS 14.3 17.3 selenium real']
message timeout,16.0,
window.originAgentCluster: undefined,,16.0



('perfAPI_img', 'direct')


Unnamed: 0,['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real'],['safari macOS 14.3 17.3 selenium real']
No performance entry,52.0,
{'requestStart != 0': False},,32.0
{'requestStart != 0': True},,20.0



('referrer_iframe', 'window.open')


Unnamed: 0,['safari macOS 14.3 17.3 selenium real'],['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real']
document.referrer:,3.0,
document.referrer: full_url,13.0,
document.referrer: http://sub.headers.websec.saarland/,9.0,
message timeout,,25.0



('subresourceloadingCORP_img', 'direct')


Unnamed: 0,['safari macOS 14.3 17.3 selenium real'],['brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real']
error,1.0,
load,,1.0





In [124]:
# Simmilarity between browsers and versions!

# Function to calculate absolute and percentage overlap
def calculate_overlap(df, col1, col2):
    df = df[[col1, col2]].dropna()
    df.columns.values[0] = 0
    df.columns.values[1] = 1
    absolute_overlap = (df[0] == df[1]).sum()
    unique_rows = len(df)
    diff_rows = unique_rows - absolute_overlap
    percentage_diff = (diff_rows / unique_rows) * 100
    return diff_rows, percentage_diff, unique_rows


def display_overlap(df, show_all=True, name="All"):
    print(name)
    sim_frame = df.set_index(["test_id", "browser"])["outcome_str"].unstack().astype("str")
    sim_frame = sim_frame.reset_index().drop(columns=["test_id"])
    # Create a dictionary to store results
    results = {}
    matrix = {}
    
    # Iterate through all pairs of columns
    for i, col1 in enumerate(sim_frame.columns):
        for col2 in sim_frame.columns:
            key = f"{col1}_vs_{col2}"
            results[key] = calculate_overlap(sim_frame, col1, col2)
            try:
                l = matrix[col1]
            except KeyError:
                l = {}
            # 0 for absolute count of differences, 1 for percentage
            l[col2] = results[key][0]
            matrix[col1] = l
            
    
    # Convert the results to a DataFrame for better visualization
    results_df = pd.DataFrame.from_dict(results, orient='index', columns=['Absolute Diff', 'Percentage Diff', "Count"])
    
    # Display the results
    # if show_all:
        # display(results_df)
    
    # Display difference matrix
    with pd.option_context("display.max_columns", 28):
        if show_all:
            display(pd.DataFrame(matrix))
        else:
            display(pd.DataFrame(matrix).drop_duplicates().T)


display_overlap(df)

for name, group in df.groupby("resp_type"):
    display_overlap(group, show_all=True, name=name)

for name, group in df.groupby("test_name"):
    display_overlap(group, show_all=False, name=name)

All


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,1388
safari macOS 14.3 17.3 selenium real,1388,0


basic


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,1388
safari macOS 14.3 17.3 selenium real,1388,0


accesswindow_direct


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,176
safari macOS 14.3 17.3 selenium real,176,0


fetch_GET


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


fetch_TEST


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


framing_embed


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


framing_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


framing_object


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


fullscreen_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,308
safari macOS 14.3 17.3 selenium real,308,0


imgloading_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


oac_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,238
safari macOS 14.3 17.3 selenium real,238,0


oac_window.open


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,220
safari macOS 14.3 17.3 selenium real,220,0


perfAPI_img


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,52
safari macOS 14.3 17.3 selenium real,52,0


referrer_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,393
safari macOS 14.3 17.3 selenium real,393,0


script_execution_iframe


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


subresourceloadingCOEP_img


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


subresourceloadingCORP_img


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,safari macOS 14.3 17.3 selenium real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0,1
safari macOS 14.3 17.3 selenium real,1,0


subresourceloadingCORP_object


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


upgradeHSTS_direct


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


upgradeHSTS_subdomain


Unnamed: 0,brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real
brave iPadOS 17.3.1 1.62 (24.2.9.10) intent real,0
safari macOS 14.3 17.3 selenium real,0


In [59]:
# How many rows exist for tree creation
df[["test_name", "label", "relation_info"]].value_counts()

test_name                   label       relation_info 
framing_iframe              XFO         direct            64344
                                        nested            64344
fullscreen_iframe           PP          child_allow       54320
                                        direct            54320
subresourceloadingCORP_img  CORP        direct            46998
upgradeHSTS_direct          HSTS        direct            46536
upgradeHSTS_subdomain       HSTS        subdomain         46536
referrer_iframe             RP          iframe            39158
framing_iframe              CSP-FA      direct            38416
                                        nested            38416
script_execution_iframe     CSP-SCRIPT  direct            38164
imgloading_iframe           CSP-IMG     direct            37940
accesswindow_direct         COOP        direct            37184
subresourceloadingCOEP_img  COEP        direct            36918
fetch_TEST                  CORS-ACAO   custom_me

In [227]:
h2o.init(nthreads=50, max_mem_size="100G", log_level="WARN")
h2o.no_progress()


# Limit on responses that have more than one outcome!
# This will remove all the branches that are the same in all browsers (and make trees large and confusing)
condition = df.groupby(["test_id"], observed=True)["outcome_str"].transform("nunique") != 1
tree_df = df.loc[condition]
print("Remove tests that are the same in all browsers", len(tree_df))
base_dir = f"trees/{datetime.today().strftime('%Y-%m-%dT%H:%M')}" 

for group_name, group in tree_df.groupby(["test_name", "relation_info"]):
    print(group_name)
    pred_cols = ["browser", "org_origin", "resp_origin", "status_code", "raw_header"]
    group = group[["outcome_str", *pred_cols]]
    tree = make_tree(group, pred_cols, group_name, base_dir)
    # print(tree)


Checking whether there is an H2O instance running at http://localhost:54321. connected.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Remove tests that are the same in all browsers 67632
('accesswindow_direct', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('accesswindow_direct', 'direct'), datapoints: 572
('fetch_GET', 'credentials')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fetch_GET', 'credentials'), datapoints: 688
('fetch_GET', 'custom_headers')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fetch_GET', 'custom_headers'), datapoints: 808
('fetch_GET', 'simple')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fetch_GET', 'simple'), datapoints: 832
('fetch_TEST', 'custom_method')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fetch_TEST', 'custom_method'), datapoints: 808
('framing_embed', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_embed', 'direct'), datapoints: 1673
('framing_embed', 'nested')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,100.0 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_embed', 'nested'), datapoints: 1079
('framing_embed', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_embed', 'sandbox'), datapoints: 1079
('framing_iframe', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_iframe', 'direct'), datapoints: 2920
('framing_iframe', 'nested')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_iframe', 'nested'), datapoints: 1152
('framing_iframe', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_iframe', 'sandbox'), datapoints: 72
('framing_object', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_object', 'direct'), datapoints: 1673
('framing_object', 'nested')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_object', 'nested'), datapoints: 1079
('framing_object', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('framing_object', 'sandbox'), datapoints: 1079
('fullscreen_iframe', 'child')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fullscreen_iframe', 'child'), datapoints: 207
('fullscreen_iframe', 'child_allow')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 29 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fullscreen_iframe', 'child_allow'), datapoints: 3857
('fullscreen_iframe', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('fullscreen_iframe', 'direct'), datapoints: 3454
('imgloading_iframe', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('imgloading_iframe', 'direct'), datapoints: 1957
('oac_iframe', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('oac_iframe', 'direct'), datapoints: 1248
('oac_iframe', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('oac_iframe', 'sandbox'), datapoints: 1248
('oac_window.open', 'window.open')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('oac_window.open', 'window.open'), datapoints: 28676
('perfAPI_img', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('perfAPI_img', 'direct'), datapoints: 759
('referrer_iframe', 'iframe')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('referrer_iframe', 'iframe'), datapoints: 952
('referrer_iframe', 'window.open')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('referrer_iframe', 'window.open'), datapoints: 1033
('script_execution_iframe', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('script_execution_iframe', 'direct'), datapoints: 1728
('script_execution_iframe', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('script_execution_iframe', 'sandbox'), datapoints: 183
('subresourceloadingCOEP_img', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('subresourceloadingCOEP_img', 'direct'), datapoints: 1381
('subresourceloadingCOEP_img', 'sandbox')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('subresourceloadingCOEP_img', 'sandbox'), datapoints: 72
('subresourceloadingCORP_img', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('subresourceloadingCORP_img', 'direct'), datapoints: 553
('subresourceloadingCORP_object', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('subresourceloadingCORP_object', 'direct'), datapoints: 1552
('upgradeHSTS_direct', 'direct')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('upgradeHSTS_direct', 'direct'), datapoints: 2450
('upgradeHSTS_subdomain', 'subdomain')
Connecting to H2O server at http://localhost:54321 ... successful.
Please download and install the latest version from: https://h2o-release.s3.amazonaws.com/h2o/latest_stable.html


0,1
H2O_cluster_uptime:,8 days 1 hour 30 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.44.0.2
H2O_cluster_version_age:,3 months and 22 days
H2O_cluster_name:,H2O_from_python_ubuntu_u7y7iq
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,99.9 Gb
H2O_cluster_total_cores:,160
H2O_cluster_allowed_cores:,50


Create tree: ('upgradeHSTS_subdomain', 'subdomain'), datapoints: 808


# Manual analysis

In [290]:
# Chromium-based treats host-source as case sensitive even though it should not? (URLs and CSP is case-insensitive!)
# Play around: https://observer.sectec.rocks/opg/iframe/?url=https://echo.sectec.rocks/echo/?content-security-policy=img-src%20HTTPS://echo.sectec.rocks&ecocnt_css=%3Cimg%20src=https://echo.sectec.rocks/%3E&content-type=text/html
# Issue is in the scheme match which should be case-insensitive but is in Chromium-based browsers https://w3c.github.io/webappsec-csp/#match-schemes
# host-part matching was fixed here https://issues.chromium.org/issues/41412675, seems like they forgot fixing scheme-sensitivity 
df.loc[df["raw_header"].str.contains("img-src HTTPS://")][["clean_url", "raw_header", "outcome_str", "browser"]].value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count
clean_url,raw_header,outcome_str,browser,Unnamed: 4_level_1
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,brave Ubuntu 22.04 v1.62.156 (121.0.6167.139) selenium headless-new,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,chrome Ubuntu 22.04 120 selenium headless-new,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,chrome Ubuntu 22.04 121 selenium headless-new,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,chrome Ubuntu 22.04 122 selenium headless-new,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,edge Ubuntu 22.04 121 selenium headless-new,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,firefox Ubuntu 22.04 121 selenium headless,5
http://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=http,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,firefox Ubuntu 22.04 122 selenium headless,5
https://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=https,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,brave Ubuntu 22.04 v1.62.156 (121.0.6167.139) selenium headless-new,5
https://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=https,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,chrome Ubuntu 22.04 120 selenium headless-new,5
https://sub.headers.websec.saarland/_hp/tests/subresource-loading-csp.sub.html?resp_type=parsing&browser_id=1&label=CSP-IMG&first_id=28274&last_id=28278&scheme=https,"[['content-security-policy', 'img-src HTTPS://SUB.HEADERS.WEBSEC.SAARLAND']]",error,chrome Ubuntu 22.04 121 selenium headless-new,5


In [60]:
bf = df.set_index(["test_id", "browser"])["outcome_str"].unstack().astype("str")

def com_browsers(df, b1, b2):
    bf["cr"] = bf[b1] != bf[b2]
    with pd.option_context("display.max_colwidth", None):
        display(bf.loc[bf["cr"]][[b1, b2]])

In [62]:
# Chromium 122 vs 121 vs 120 (desktop linux)
com_browsers(bf, "chrome Ubuntu 22.04 120 selenium headless-new", "chrome Ubuntu 22.04 121 selenium headless-new")
com_browsers(bf, "chrome Ubuntu 22.04 120 selenium headless-new", "chrome Ubuntu 22.04 122 selenium headless-new")
df.loc[df["response_id"] == 40058]["raw_header"].iloc[0]

browser,chrome Ubuntu 22.04 120 selenium headless-new,chrome Ubuntu 22.04 121 selenium headless-new
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_40058_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer:


browser,chrome Ubuntu 22.04 120 selenium headless-new,chrome Ubuntu 22.04 122 selenium headless-new
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_40058_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer:


"[['referrer-policy', 'no-referrer']]"

In [63]:
# Firefox 120 vs 121 (desktop linux)
com_browsers(bf, "firefox Ubuntu 22.04 121 selenium headless", "firefox Ubuntu 22.04 122 selenium headless")
df.loc[df["response_id"] == 65]["raw_header"].iloc[0]

browser,firefox Ubuntu 22.04 121 selenium headless,firefox Ubuntu 22.04 122 selenium headless
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_18975_parsing,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_18977_parsing,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_18978_parsing,"{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20261_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20262_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20263_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20264_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20265_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20266_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"
subresourceloadingCOEP_img_direct_https_sub.headers.websec.saarland_https_headers.webappsec.eu_20267_parsing,"{'image-events': {'swag.jpg': 'error', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}","{'image-events': {'swag.jpg': 'load', 'swag-same-site.jpg': 'error', 'swag-cross-site.jpg': 'load', 'swag-same-origin.jpg': 'error'}, 'window.crossOriginIsolated': False}"


IndexError: single positional indexer is out-of-bounds

In [64]:
# Brave 121 vs Chrome 121
with pd.option_context("display.max_rows", 72):
    com_browsers(bf, "brave Ubuntu 22.04 v1.62.156 (121.0.6167.139) selenium headless-new", "chrome Ubuntu 22.04 121 selenium headless-new")

browser,brave Ubuntu 22.04 v1.62.156 (121.0.6167.139) selenium headless-new,chrome Ubuntu 22.04 121 selenium headless-new
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39027_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39081_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39212_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39260_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39391_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39416_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39444_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39897_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39918_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url
referrer_iframe_iframe_https_sub.headers.websec.saarland_https_headers.webappsec.eu_39936_parsing,document.referrer: https://headers.webappsec.eu/,document.referrer: full_url


In [None]:
pivot_df = df.pivot_table(index='test_name', columns='browser', values='test_id', aggfunc='count', fill_value=0)
# Calculate the difference between the counts of the first and second columns
pivot_df['Diff'] = pivot_df.iloc[:, 0] - pivot_df.iloc[:, 1]
display(pivot_df)

In [212]:
df.loc[df["test_id"] == "referrer_iframe_window.open_http_sub.headers.websec.saarland_http_sub.headers.websec.saarland_196_basic"]["clean_url"].iloc[0]

'http://sub.headers.websec.saarland/_hp/tests/referrer-access-rp.sub.html?resp_type=basic&browser_id=1&label=RP&first_id=196&last_id=196&scheme=http'

# Test improved repeat mode

In [113]:
def create_test_id(row):
    return f'{row["test_name"]}_{row["relation_info"]}_{row["org_scheme"]}_{row["org_host"]}_{row["resp_scheme"]}_{row["resp_host"]}_{row["response_id"]}_{row["resp_type"]}'

df["browser_id"] = df["browser_id"].astype("category")
# Takes a while (500s+) (might be faster to already do it with postgres but not too important)
df["test_id"] = df.apply(create_test_id, axis=1)
df["test_id"] = df["test_id"].astype("category")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["browser_id"] = df["browser_id"].astype("category")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["test_id"] = df.apply(create_test_id, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["test_id"] = df["test_id"].astype("category")


In [144]:
# Luckily this is now instant
test_counts = df.groupby(["test_id"], observed=True)["browser_id"].value_counts()

In [156]:
tests_to_repeat = test_counts.loc[test_counts < 5].reset_index()
tests_to_repeat = tests_to_repeat.loc[tests_to_repeat["browser_id"] != 12]
tests_to_repeat.head()

Unnamed: 0,test_id,browser_id,count
4147,accesswindow_direct_direct_https_sub.headers.w...,46,3
4198,accesswindow_direct_direct_https_sub.headers.w...,46,3
4234,accesswindow_direct_direct_https_sub.headers.w...,46,3
4532,accesswindow_direct_direct_https_sub.headers.w...,46,3
4755,accesswindow_direct_direct_https_sub.headers.w...,46,3


In [157]:
tests_to_repeat[["browser_id", "count"]].value_counts()

browser_id  count
46          3        19879
            2            4
Name: count, dtype: int64

In [164]:
rep = tests_to_repeat.merge(df.drop_duplicates(subset=["test_id"]), on=["test_id"], how="left")

In [166]:
rep["full_url"].iloc[0]

'https://sub.headers.websec.saarland/_hp/tests/window-references-coop.sub.html?timeout=5&resp_type=parsing&browser_id=42&label=COOP&first_id=22971&last_id=22971&scheme=https'

In [32]:
tsv = pd.read_csv("browser_count.tsv", sep=" ")
tsv.sort_values(by=["browser_id", "count", "value_count"])

Unnamed: 0,browser_id,count,value_count
9,2,0,1281
3,2,1,133057
4,2,2,32561
6,2,3,1497
12,2,4,764
15,3,0,268
24,3,1,24
19,3,2,126
14,3,3,360
11,3,4,863
