In [1]:
%load_ext autoreload
%autoreload 2

In [13]:
import pandas as pd
import re
import json
import h2o

from datetime import datetime

from utils import get_data, Config, clean_url, make_clickable, add_columns

from tree import make_tree

In [5]:
# Load all data (except unknown/manual testing)
initial_data = """
SELECT "Result".*, 
"Response".raw_header, "Response".status_code, "Response".label, "Response".resp_type,
"Browser".name, "Browser".version, "Browser".headless_mode, "Browser".os, "Browser".automation_mode, "Browser".add_info
FROM "Result"
JOIN "Response" ON "Result".response_id = "Response".id JOIN "Browser" ON "Result".browser_id = "Browser".id
WHERE "Browser".name != 'Unknown';
"""
df = get_data(Config(), initial_data)
df = add_columns(df)

Connecting to the PostgreSQL database...
Connection successful


## Overview

In [16]:
# In total 3 runs + couple of "repeat"-mode runs
# Lot's of failures in xvfb mode!
df.groupby(["automation_mode", "browser_id", "name", "version", "os", "headless_mode"])["id"].count().sort_values(ascending=False).to_frame().reset_index()

Unnamed: 0,automation_mode,browser_id,name,version,os,headless_mode,id
0,selenium,14,firefox,119,Ubuntu 22.04,headless,506810
1,selenium,13,chrome,119,Ubuntu 22.04,headless-new,506394
2,selenium,15,edge,119,Ubuntu 22.04,headless-new,506394
3,selenium,17,brave,1.60.114 (119.0.6045.124),Ubuntu 22.04,headless-new,506357
4,selenium,20,edge,119,Ubuntu 22.04,xvfb,438937
5,selenium,19,firefox,119,Ubuntu 22.04,xvfb,412951
6,selenium,18,chrome,119,Ubuntu 22.04,xvfb,403415
7,selenium,22,brave,1.60.114 (119.0.6045.124),Ubuntu 22.04,xvfb,334811


## Error analysis

In [114]:
# Timeout (2), Fail (1), Not-run (3)
filtered_df = df.groupby(["browser"])["test_status"].filter(lambda x: x.nunique() >= 2)
display(df[["browser", "test_status"]].loc[df.index.isin(filtered_df.index)].groupby(["browser"])["test_status"].value_counts().to_frame())

df.groupby("test_status")["browser"].value_counts().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,count
browser,test_status,Unnamed: 2_level_1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,0,506068
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,1,288
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,2,1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,0,334679
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,1,132
chrome Ubuntu 22.04 119 selenium headless-new,0,506105
chrome Ubuntu 22.04 119 selenium headless-new,1,288
chrome Ubuntu 22.04 119 selenium headless-new,2,1
chrome Ubuntu 22.04 119 selenium xvfb,0,403225
chrome Ubuntu 22.04 119 selenium xvfb,1,188


Unnamed: 0_level_0,Unnamed: 1_level_0,count
test_status,browser,Unnamed: 2_level_1
0,chrome Ubuntu 22.04 119 selenium headless-new,506105
0,edge Ubuntu 22.04 119 selenium headless-new,506104
0,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,506068
0,firefox Ubuntu 22.04 119 selenium headless,505662
0,edge Ubuntu 22.04 119 selenium xvfb,438669
0,firefox Ubuntu 22.04 119 selenium xvfb,411975
0,chrome Ubuntu 22.04 119 selenium xvfb,403225
0,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,334679
1,brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,288
1,chrome Ubuntu 22.04 119 selenium headless-new,288


In [113]:
# Teststatus == 1 (fail)

# All browsers have it
display(df[df["test_status"] == 1]["browser"].value_counts().to_frame())

# Only for upgradeHSTS_direct and upgradeHSTS_subdomain, they always result in test status 1 in the same browser!
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 1]["full_url"])].groupby(["test_name", "response_id"])["test_status"].value_counts().to_frame().head())

# The responses that cause it all have \x00 in them!
offending_headers = {}
visited_sets = set()
for group, gdf in df.loc[df["test_status"] == 1].groupby("browser"):
    unique_headers_set = frozenset(gdf["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())
    if unique_headers_set not in visited_sets:
        visited_sets.add(unique_headers_set)
        offending_headers[unique_headers_set] = set([group])
    else:
        offending_headers[unique_headers_set].add(group)
for key, value in offending_headers.items():
    print(f"{value}:\n{key}\n\n")

# The offending URLs (without timeout is from the repeat run)
# Firefox has none without timeout as these tests failed for all browsers, thus create_repeat.py did not create repeat runs for Firefox!
offending_urls = {}
visited_sets = set()

for group, gdf in df.loc[df["test_status"] == 1].groupby("browser"):
    unique_urls_set = frozenset(gdf["clean_url"].unique())
    print(group, len(unique_urls_set))

    if unique_urls_set not in visited_sets:
        visited_sets.add(unique_urls_set)
        offending_urls[unique_urls_set] = set([group])
    else:
        offending_urls[unique_urls_set].add(group)
for key, value in offending_urls.items():
    print(f"{value}:\n{key}\n\n")

# 9 URLs failed in all browsers, 9 only failed in Chromium-based browsers (18x in clean_url as we have one for repeat and one without)
# Firefox only fails if \x00 is in the value, if it is in the header names it does not care?!
# (manual test: Safari seems to fail for HTTP only, does not fail if the response is received via HTTPS?, in addition Safari also only cares about \x00 in values and not in header names)

# TODO: change the test to properly catch such issues!

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
chrome Ubuntu 22.04 119 selenium headless-new,288
edge Ubuntu 22.04 119 selenium headless-new,288
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,288
edge Ubuntu 22.04 119 selenium xvfb,264
chrome Ubuntu 22.04 119 selenium xvfb,188
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb,132
firefox Ubuntu 22.04 119 selenium headless,108
firefox Ubuntu 22.04 119 selenium xvfb,72


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
test_name,response_id,test_status,Unnamed: 3_level_1
upgradeHSTS_direct,29713,1,52
upgradeHSTS_direct,29745,1,52
upgradeHSTS_direct,29816,1,54
upgradeHSTS_direct,30133,1,52
upgradeHSTS_direct,30165,1,52


{'edge Ubuntu 22.04 119 selenium xvfb', 'chrome Ubuntu 22.04 119 selenium headless-new', 'brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium xvfb', 'brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new', 'edge Ubuntu 22.04 119 selenium headless-new'}:
frozenset({"[['strict-trans\\x00port-security', 'max-age=60']]", "[['strict-transport-security', '\\x00max-age=60']]", "[['strict-transport-security', 'max-age=20; in\\x00cludeSubDomains']]", "[['strict-trans\\x00port-security', 'max-age=20; includeSubDomains']]", "[['\\x00strict-transport-security', 'max-age=20; includeSubDomains']]", "[['strict-transport-security', 'max-age=20; includeSubDomains\\x00']]", "[['strict-transport-security', 'max-age=0\\x00']]", "[['strict-transport-security', 'max-\\x00age=0']]", "[['\\x00strict-transport-security', 'max-age=0']]", "[['strict-transport-security', '\\x00max-age=0']]", "[['strict-transport-security\\x00', 'max-age=0']]", "[['strict-transport-security', '\\x00max-age=20; inclu

In [120]:
# Teststatus == 2 (timeout)

# Firefox a lot, others rarely
display(df[df["test_status"] == 2]["browser"].value_counts().to_frame())

# fetch_GET|TEST always timeout, others only sometimes (accidental timeout in other browsers!), upgradeHSTS (see below with teststatus==3)
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 2]["full_url"])].groupby(["test_name"])["test_status"].value_counts().to_frame())

# The responses (for fetch_GET|TEST) that cause it all have \n in them?
display(df.loc[(df["test_status"] == 2) & (df["test_name"].isin(["fetch_GET", "fetch_TEST"]))]["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())

# The offending URLs (without timeout is from the repeat run)
display(df.loc[(df["test_status"] == 2) & (df["test_name"].isin(["fetch_GET", "fetch_TEST"]))]["clean_url"].unique())


# Firefox times out if \n in header?
# TODO: add a custom timeout to record it as a normal result?!
# (Safari only times out for some of the tests?!)

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
firefox Ubuntu 22.04 119 selenium headless,860
firefox Ubuntu 22.04 119 selenium xvfb,760
edge Ubuntu 22.04 119 selenium xvfb,4
edge Ubuntu 22.04 119 selenium headless-new,2
chrome Ubuntu 22.04 119 selenium xvfb,2
chrome Ubuntu 22.04 119 selenium headless-new,1
brave Ubuntu 22.04 1.60.114 (119.0.6045.124) selenium headless-new,1


Unnamed: 0_level_0,Unnamed: 1_level_0,count
test_name,test_status,Unnamed: 2_level_1
fetch_GET,2,1134
fetch_TEST,2,378
framing_iframe,2,2
framing_iframe,0,2
fullscreen_iframe,0,23
fullscreen_iframe,2,5
oac_window.open,0,2
oac_window.open,2,1
perfAPI_img,0,2
perfAPI_img,2,2


array(["[['access-control-allow-credentials', 'true\\n'], ['Test', 'Test'], ['access-control-allow-origin', 'https://sub.headers.websec.saarland'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['\\naccess-control-allow-origin', '*'], ['Test', 'Test'], ['access-control-allow-credentials', 'true'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['access-control-allow-origin', 'https://sub.headers.websec.saarland\\n'], ['Test', 'Test'], ['access-control-allow-credentials', 'true'], ['access-control-allow-methods', 'TEST'], ['access-control-allow-headers', 'Test'], ['access-control-expose-headers', 'Test']]",
       "[['\\naccess-control-expose-headers', 'Test'], ['Test', 'Test'], ['access-control-allow-origin', 'https://sub.headers.websec.saarland'], ['access-control-allow-credentials', 'true'], ['access-con

array(['http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=http',
       'https://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=https',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=http',
       'https://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?resp_type=parsing&browser_id=1&label=CORS-ACAC&first_id=10759&last_id=10759&scheme=https',
       'http://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=CORS-ACAO&first_id=8028&last_id=8028&scheme=http',
       'https://sub.headers.websec.saarland/_hp/tests/fetch-cors.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=CORS-ACAO&first_id=9689&last_id=

In [18]:
display(df.loc[df["test_status"] == 2].groupby(["browser", "os", "test_name"])["id"].count().to_frame().sort_values(by="id", ascending=False))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,id
browser,os,test_name,Unnamed: 3_level_1
firefox Ubuntu 22.04 119 selenium headless,Ubuntu 22.04,fetch_GET,600
firefox Ubuntu 22.04 119 selenium xvfb,Ubuntu 22.04,fetch_GET,534
firefox Ubuntu 22.04 119 selenium headless,Ubuntu 22.04,fetch_TEST,200
firefox Ubuntu 22.04 119 selenium xvfb,Ubuntu 22.04,fetch_TEST,178
firefox Ubuntu 22.04 119 selenium headless,Ubuntu 22.04,upgradeHSTS_direct,60
firefox Ubuntu 22.04 119 selenium xvfb,Ubuntu 22.04,upgradeHSTS_direct,48
chrome Ubuntu 22.04 119 selenium xvfb,Ubuntu 22.04,perfAPI_img,2
edge Ubuntu 22.04 119 selenium headless-new,Ubuntu 22.04,fullscreen_iframe,2
edge Ubuntu 22.04 119 selenium xvfb,Ubuntu 22.04,framing_iframe,2
chrome Ubuntu 22.04 119 selenium headless-new,Ubuntu 22.04,fullscreen_iframe,1


In [83]:
# Teststatus == 3 (not run)

# Only Firefox
display(df[df["test_status"] == 3]["browser"].value_counts().to_frame())

# Only for upgradeHSTS_direct and upgradeHSTS_subdomain, the later always is not run, the former is sometimes 2 (timeout)
display(df.loc[df["full_url"].isin(df.loc[df["test_status"] == 3]["full_url"])].groupby(["test_name", "response_id"])["test_status"].value_counts().to_frame())

# The responses that cause it all have \n in them?
display(df.loc[df["test_status"] == 3]["raw_header"].apply(bytes).apply(lambda x: json.loads(x.decode("utf-8"))).astype(str).unique())

# The offending URLs (without timeout is from the repeat run)
display(df.loc[df["test_status"] == 3]["clean_url"].unique())

# Reason if there is a newline in the response the fetch call will timeout in Firefox.
# As we implemented the testcases as promise tests, only the first test executes and all the others will always timeout.
# Chromium-based browsers do not time out and instead parse the fetch until the newline only and stop there. (same for safari, safari has loading icon in devtools however)
# TODO: add an explicit timeout maybe to also run the other tests?, however the results will be the same, the other requests with also timeout!

Unnamed: 0_level_0,count
browser,Unnamed: 1_level_1
firefox Ubuntu 22.04 119 selenium headless,180
firefox Ubuntu 22.04 119 selenium xvfb,144


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count
test_name,response_id,test_status,Unnamed: 3_level_1
upgradeHSTS_direct,29885,3,18
upgradeHSTS_direct,29885,2,18
upgradeHSTS_direct,30305,2,18
upgradeHSTS_direct,30305,3,18
upgradeHSTS_direct,30725,3,18
upgradeHSTS_direct,30725,2,18
upgradeHSTS_direct,31838,3,18
upgradeHSTS_direct,31838,2,18
upgradeHSTS_direct,32324,3,18
upgradeHSTS_direct,32324,2,18


array(["[['\\nstrict-transport-security', 'max-age=60']]",
       "[['\\nstrict-transport-security', 'max-age=0']]",
       "[['\\nstrict-transport-security', 'max-age=20; includeSubDomains']]",
       "[['strict-transport-security', 'max-age=0\\n']]",
       "[['strict-transport-security', 'max-age=60\\n']]",
       "[['strict-transport-security', 'max-age=20; includeSubDomains\\n']]"],
      dtype=object)

array(['http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=HSTS&first_id=30305&last_id=30305&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=30305&last_id=30305&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=HSTS&first_id=29885&last_id=29885&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=29885&last_id=29885&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?timeout=5&resp_type=parsing&browser_id=1&label=HSTS&first_id=30725&last_id=30725&scheme=http',
       'http://sub.headers.websec.saarland/_hp/tests/upgrade-hsts.sub.html?resp_type=parsing&browser_id=1&label=HSTS&first_id=30725&last_id=30725&scheme=http',
       'ht