In [1]:
import random
import re
import httpx
import glob
import os
from itertools import permutations, product

In [2]:
def randomize_casing(input_string, seed=42):
    """Randomize the casing of a string (fixed with a seed)."""
    random.seed(seed)
    return ''.join(random.choice([c.upper(), c.lower()]) for c in input_string)

def insert_char_middle(input_string, char):
    """Insert a char in the middle of a string."""
    middle_index = len(input_string) // 2
    return input_string[:middle_index] + char + input_string[middle_index:]


In [3]:
# All the mutations we apply on strings such as header names and header values
# Some of them may have no effect and result in duplicates; however such duplicates are not saved in the db

all_upper = lambda x: x.upper()
all_lower = lambda x: x.lower()
random_case = lambda x: randomize_casing(x)
leadtrail_space = lambda x: " " + x + " "
in_double_quotes = lambda x: '"' + x + '"'
in_single_quotes = lambda x: "'" + x + "'"
remove_whitespace = lambda x: x.replace(" ", "")
double_spaces = lambda x: x.replace(" ", "  ")
space_to_tab = lambda x: x.replace(" ", "\t")

lead_seqs = []
trail_seqs = []
middle_seqs = []

# All all ASCII chars as leading, trailing, and middle char
ascii_chars = [char for char in map(chr, range(128))]
# Alternatively use less ones? All/Some control chars? + space, comma, ....
# ascii_chars = [char for char in map(chr, range(32))] + [" ", ",", ";", ":"]
for seq in ascii_chars:
    lead_seqs.append(lambda x, s=seq: s + x)
    trail_seqs.append(lambda x, s=seq: x + s)
    middle_seqs.append(lambda x, s=seq: insert_char_middle(x, s))

# Add other interesting leading, trailing, middle chars: Double space, non-breaking space, full-width comma
# TODO: add more?
other_chars = ["  ", "\u00A0a", "\uFF0C"]
for seq in other_chars:
    lead_seqs.append(lambda x, s=seq: s + x)
    trail_seqs.append(lambda x, s=seq: x + s)
    middle_seqs.append(lambda x, s=seq: insert_char_middle(x, s))

# Replace some chars with others
replace_chars = [
    "", " ", ";", ",", ":", "=", "-", "_",
    "'", '"', "`", "´",
    '\u2018',  # Left Single Quotation Mark
    '\u2019',  # Right Single Quotation Mark
    '\u201A',  # Single Low-9 Quotation Mark
    '\u201B',  # Single High-Reversed-9 Quotation Mark
    '\u201C',  # Left Double Quotation Mark
    '\u201D',  # Right Double Quotation Mark
    '\u201E',  # Double Low-9 Quotation Mark
    '\u201F',  # Double High-Reversed-9 Quotation Mark
]
chars_to_replace = [";", ",", ":", "=", "'", '"', "-", "_"]
replace_funcs = []
for char in chars_to_replace:
    for rp in replace_chars:
        if rp == char:
            continue
        replace_funcs.append(lambda x, c1=char, c2=rp: x.replace(c1, c2))



general_mutations = [
    all_upper,
    all_lower,
    random_case,
    leadtrail_space,
    in_double_quotes,
    in_single_quotes,
    remove_whitespace,
    double_spaces,
    space_to_tab,
    *lead_seqs,
    *trail_seqs,
    *middle_seqs,
    *replace_funcs,
]


def mutate_header_name(header_name):
    header_names = set()
    for mutation in general_mutations:
        header_names.add(mutation(header_name))
    return header_names

def mutate_header_value(header_value):
    header_values = set()
    for mutation in general_mutations:
        header_values.add(mutation(header_value))
    return header_values

In [4]:
l = mutate_header_value("D EN'Y")
print(l)
print(list(l)[0])


{"D E~N'Y", "D ERN'Y", "<D EN'Y", "D E\\N'Y", "D EN'Y|", "D EN'Y\x0c", 'D EN’Y', "D E\x1bN'Y", "D E\x13N'Y", " D EN'Y ", ">D EN'Y", "D EWN'Y", "D EkN'Y", "sD EN'Y", "xD EN'Y", "D E  N'Y", "OD EN'Y", "D ErN'Y", "aD EN'Y", "D E\x0cN'Y", "D EUN'Y", "d en'y", "\x7fD EN'Y", "D EVN'Y", "2D EN'Y", "D E\x12N'Y", "5D EN'Y", "\x08D EN'Y", "D EN'YK", 'D EN‘Y', "D\tEN'Y", "D EN'Y\x1c", "KD EN'Y", "\x12D EN'Y", "~D EN'Y", "D E\x1cN'Y", "D ESN'Y", "D EiN'Y", "!D EN'Y", "\x10D EN'Y", 'D EN”Y', "VD EN'Y", "\x0bD EN'Y", "，D EN'Y", "D E)N'Y", "/D EN'Y", "lD EN'Y", "D EN'Y\x11", "QD EN'Y", "D EN'Y\x15", "D E>N'Y", "D ENN'Y", "D E\x04N'Y", "D EN'Y\x01", "D E\x15N'Y", 'D EN‚Y', "D E5N'Y", "\x15D EN'Y", "D EaN'Y", "D EN'Y\x19", "D EN'Y\x0f", "\x17D EN'Y", "D EN'Yr", "D EN'Yx", "D EDN'Y", "D EN'Y\x05", "D E\x10N'Y", "'D EN'Y", "D EhN'Y", "D EN'Y\x14", "D E#N'Y", "D EN'Yh", "D EN'Y[", "D E'N'Y", "D EN'YJ", "D E=N'Y", "yD EN'Y", '"D EN\'Y', "-D EN'Y", "D EcN'Y", "D EN'Yi", "ZD EN'Y", "D EQN'Y", "D EyN'Y", "D E

In [5]:
class HeaderTests:
    def __init__(self, label: str, header_name: str, alt_names: list[str], block_values: list[str], allow_values: list[str], partial_values: list[str], legacy_values: list[str], other_values: list[str]):
        """"HeaderTests class to create lots of responses
        label (str): Additional information about these responses (e.g., XFO)
        header_name (str): The correct lower-case name of the header
        alt_names (list[str]): Legacy and other (invalid) alternative header names
        block_values (list[str]): Valid values to set the header to "blocking" (e.g., DENY for XFO means always disallow framing)
        allow_values (list[str]): Valid values to set the header to "allowing" (e.g., unsafe-none  for COOP means do not activate COOP/always allow)
        partial_values (list[str]): Valid values to set the header to an intermediate mode (e.g., SAMEORIGIN for XFO means allow framing only for same-origin)
        legacy_values (list[str]): Legacy values that should not work anymore
        other_values (list[str]): Other values we want to test as well (can include valid ones if we do not want to put too many in the other categories)
        """
        self.label = label
        self.header_name = header_name
        self.alt_names = alt_names
        self.block_values = block_values
        self.allow_values = allow_values
        self.partial_values = partial_values
        self.legacy_values = legacy_values
        self.other_values = other_values
        self.responses = []

    def create_response(self, header, label, status_code=200, resp_type="parsing"):
        self.responses.append((header, label, status_code, resp_type))

    def save_responses(self):
        #with Session() as session:
        #for header in header_list:
         #r, created = get_or_create(session, Response, raw_header=header, status_code=status_code, label=label, resp_type=resp_type)
         #if created:
            #print(r)
        print(len(self.responses))
        print(self.responses)

    def header_name_tests(self):
        """Test all block, allow, partial values (correct values)
            with the correct header names, with all alternative/legacy header names, and with mutated versions of the correct header_name
        """
        for value_group in [self.block_values, self.allow_values, self.partial_values]:
            for header_value in value_group:
                # Original header name
                self.create_response([(self.header_name, header_value)], self.label)
                # Alt header names
                for header_name in self.alt_names:
                    self.create_response([(header_name, header_value)], self.label)
                # Mutated header names
                for header_name in mutate_header_name(self.header_name):
                    self.create_response([(header_name, header_value)], self.label)

    def parsing_tests(self):
        """Test all header values + mutated versions.
        """
        # Test all legacy and other values (block, allow, partial do not have to be tested as they are already tests by header_name_tests)
        for value_group in [self.legacy_values, self.other_values]:
            for header_value in value_group:
                self.create_response([(self.header_name, header_value)], self.label)
        
        # Mutate/change header values (block, allow, partial)
        for value_group in [self.block_values, self.allow_values, self.partial_values]:
            for org_header_value in value_group:
                # Other status codes
                for code in [201, 203, 204, 300, 302, 400, 403, 404, 418, 500]:
                    if 300 <= code < 400:
                        self.create_response([(self.header_name, org_header_value), redirect_empty], self.label, status_code=code)
                    else:
                        self.create_response([(self.header_name, org_header_value)], self.label, status_code=code)
                # Mutated header values
                for header_value in mutate_header_value(org_header_value):
                    self.create_response([(self.header_name, header_value)], self.label)

    def mult_headers_tests(self):
        """Test involving multiple headers/values
        """
        all_valid_values = self.block_values + self.allow_values + self.partial_values
        all_orders = list(permutations(all_valid_values))
        # Basic1: all legal values in a list in all possible orders (Comma, space, semicolon-separated)
        for order in all_orders:
            self.create_response([(self.header_name, ", ".join(order))], self.label)
            self.create_response([(self.header_name, "; ".join(order))], self.label)
            self.create_response([(self.header_name, " ".join(order))], self.label)
        
        # Basic2: all legal values in separate headers in all possible orders
        for order in all_orders:
            headers = [(self.header_name, header_value) for header_value in order]
            self.create_response(headers, self.label)
        # Basic3: all legal values in both separate headers and in one header with comma?!
        # Only if at least 3 values; split in first and all others and last and all others
        for order in all_orders:
            if len(order) >= 3:
                first, rest1 = order[0], ", ".join(order[1:])
                rest2, last = ", ".join(order[:-1]), order[-1]
                self.create_response([(self.header_name, first), (self.header_name, rest1)], self.label)
                self.create_response([(self.header_name, rest2), (self.header_name, last)], self.label)

        # Basic4: all legal values duplicated
        for value in all_valid_values:
            self.create_response([(self.header_name, value), (self.header_name, value)], self.label)
            self.create_response([(self.header_name, f"{value}, {value}")], self.label)
            
            # Could be extended with mutated versions once, e.g., X-Frame-Options: ALLOWALL, allowall;
            # Browsers should first lowercase and then put each value in a set https://html.spec.whatwg.org/multipage/document-lifecycle.html#the-x-frame-options-header
            # Which means no blocking should occur, if they forget the lowercasing part, the set size would be two and it would be blocked
            # Other extensions possible as well
            self.create_response([(self.header_name, value), (self.header_name, value.lower())], self.label)
            self.create_response([(self.header_name, value), (self.header_name, value.upper())], self.label)
        
        # Advanced1: use different header names (e.g., if a browser accepts both x-frame-options and X-FRAME-OPTIONS which takes precedence?; might be none if the browser first lower-cases or something like that)
        # Currently only either uppercase the first header or all except the first header (other mutations and header duplication strategies could be added in the future)
        # Could be extended with clearly invalid headers (e.g., leading or trailing space, ...)
        for order in all_orders:
            for (header1, header2) in [(self.header_name, self.header_name.upper()), (self.header_name.upper(), self.header_name)]:
                headers = [(self.header_name, header_value) for header_value in order]
                headers = []
                for i, header_value in enumerate(order):
                    if i == 0:
                        headers.append((header1, header_value))
                    else:
                        headers.append((header2, header_value))
                self.create_response(headers, self.label)
    
        # Advanced2: use invalid values as well (e.g., a browser might always take the first header while another browser takes the first valid header?)
        for valid_value in all_valid_values:
            # Only use the first two invalid values (should be empty and a clearly invalid value ("INVALID")
            # Could be extended with more complex approaches
            for invalid_value in self.other_values[:2]:
                self.create_response([(self.header_name, valid_value), (self.header_name, invalid_value)], self.label)
                self.create_response([(self.header_name, invalid_value), (self.header_name, valid_value)], self.label)
                self.create_response([(self.header_name, f"{valid_value}, {invalid_value}")], self.label)
                self.create_response([(self.header_name, f"{invalid_value}, {valid_value}")], self.label)

        # TODO special: XFO and CSP-FA

                
    def create_all_tests(self):
        self.header_name_tests()
        self.parsing_tests()
        self.mult_headers_tests()
        self.save_responses()

In [52]:
redirect_empty = ("location", "https://sub.headers.websec.saarland/_hp/common/empty.html")
site = "sub.headers.websec.saarland"
origin_s = "https://sub.headers.websec.saarland"
origin_s_upper = "HTTPS://SUB.HEADERS.WEBSEC.SAARLAND"
origin_s_path = f"{origin_s}/abc/"
origin_s_query = f"{origin_s}/?a=a"
origin = "http://sub.headers.websec.saarland"
origin_sp = f"{origin_s}:443"
home = f"{origin_s}/"
home_p = f"{origin_sp}/"
parent = "https://headers.websec.saarland"
child = "https://sub.sub.headers.websec.saarland"
parent_childs = "*.headers.websec.saarland"
self_childs = "*.sub.headers.websec.saarland"
self_childs_https = "https://*.sub.headers.websec.saarland"
cross_site_origin = "https://headers.webappsec.eu"
all_replacements = [site, origin_s, origin_s_upper, origin_s_path, origin_s_query, origin, origin_sp, home, home_p, parent, child, parent_childs, self_childs, self_childs_https, cross_site_origin]
URL_REP = "<!URL!>"

In [35]:
def expand_urls(other_values):
    """Use different URL, origins, sites variations if the value should allow some sites"""
    return_values = []
    for value in other_values:
        if not URL_REP in value:
            return_values.append(value)
        else:
            # TODO: use combinations if more than one URL in value!
            # Only if less than 2 occurrences? Else chose a random value for each?
            for url_like in all_replacements:
                return_values.append(value.replace(URL_REP, url_like))
    return return_values

In [8]:
label = ""
header_name = ""
alt_names = []
block_values = []
allow_values = []
partial_values = []
legacy_values = []
# Always start with the empty value and then an INVALID value (e.g., "INVALID"), after that both valid and invalid values can be added
# We use the first two in `mult_headers_test`
other_values = ["", "INVALID"]
other_values = expand_urls(other_values)

In [9]:
def get_values(url, pattern):
    content = httpx.get(url).text
    content = content.replace("TESTURI", URL_REP)
    content = content.replace("http://randomorigin.com/", URL_REP)
    content = content.replace("http://randomorigin.com", URL_REP)
    content = content.replace("http://much.ninja", URL_REP)
    content = content.replace("http://random.ninja", URL_REP)
    content = content.replace("https://much.ninja", URL_REP)
    content = content.replace("http://*.ninja", URL_REP)
    content = content.replace("*.ninja", URL_REP)
    content = content.replace("much.ninja", URL_REP)
    #content = re.sub(r"https?://(.*/|.*\.com|.*\.ninja)", URL_REP, content)
    matches = list(set(re.findall(pattern, content)))
    return matches

siewert_xfo = get_values("https://raw.githubusercontent.com/hen95/HTTPHeaderBrowserTesting/main/transform_to_testcase.py", r"\'X-Frame-Options: (.*?)\'")
print(siewert_xfo)
print()
siewert_csp = get_values("https://raw.githubusercontent.com/hen95/HTTPHeaderBrowserTesting/main/transform_to_testcase.py", r"\'Content-Security-Policy: (.*?)\'")
print(siewert_csp)

['sameorigin, sameorigin', 'sameorigin; sameorigin', 'sameorigin, allow-from <!URL!>', 'allow-from <!URL!>; deny', 'allow-from <!URL!>; allow-from <!URL!>', 'sameorigin', 'deny, sameorigin', 'sameorigin, deny', 'allowall', 'deny; sameorigin', 'deny; deny', 'allow-from <!URL!>, sameorigin', 'RANDOMDIRECTIVE', 'allow-from <!URL!>', 'allow-from <!URL!>, allow-from <!URL!>', 'allow-from <!URL!>, deny', 'deny; allow-from <!URL!>', 'sameorigin; deny', 'deny, allow-from <!URL!>', 'deny', 'deny, deny']

['frame-ancestors https:;', 'frame-ancestors <!URL!>;', 'frame-ancestors <!URL!> <!URL!>;', 'frame-ancestors http:;']


In [10]:
def get_wpt_values(dir_path, pattern=r'headerValue: `(.*)`|headerValue2: `(.*)`'):
    # Initialize a list to store matching strings
    values = set()
    # Use glob to find all files with the specified extension recursively
    file_paths = glob.glob(os.path.join(dir_path, '*.html'), recursive=False)
    # Iterate through the list of file paths
    for file_path in file_paths:
        with open(file_path, 'r') as f:
            content = f.read()
            content = content.replace("https://example.com/", URL_REP)
            matches = re.findall(pattern, content)
            for match1, match2 in matches:
                values.add(match1)
                values.add(match2)
    return list(values)

wpt_xfo = get_wpt_values('../../../x-frame-options/')
wpt_xfo

['',
 'SAMEORIGIN',
 '"DENY"',
 'sameOriGin',
 'ALLOW-FROM <!URL!>',
 'sameOrigin',
 'ALLOW-FROM=<!URL!>',
 ',SAMEORIGIN,,DENY,',
 '  DENY ',
 '  SAMEORIGIN,    DENY',
 '"SAMEORIGIN"',
 'ALLOWALL',
 'INVALID',
 '  SAMEORIGIN ',
 'denY',
 'allowAll',
 '"SAMEORIGIN,DENY"',
 'DENY']

In [69]:
def limit_url_occurrences(input_string, max_occurrences):
    # Define the pattern to match <!URL!>
    url_pattern = re.compile(rf'{URL_REP}(?:(\s|,)*(ALLOW-FROM )?(allow-from )?{URL_REP})+')
    #url_pattern = re.compile(rf'{URL_REP}(?:(\s|,)*{URL_REP})+')

    # Find all consecutive occurrences of <!URL!>
    consecutive_url_matches = url_pattern.findall(input_string)

    # Calculate the number of replacements needed
    replacements_needed = max(0, len(consecutive_url_matches) - max_occurrences)

    # Replace excess consecutive occurrences with <!URL!>
    replaced_string = url_pattern.sub('', input_string, count=replacements_needed)

    return replaced_string

def cut_after_n_spaces(input_string, n):
    # Find the index of the fourth space
    fourth_space_index = -1
    for i in range(n):
        fourth_space_index = input_string.find(' ', fourth_space_index + 1)
        if fourth_space_index == -1:
            # Less than four spaces, no need to cut
            return input_string

    # Cut off everything after the fourth space
    result = input_string[:fourth_space_index]

    return result

def get_crawler_values(url):
    content = httpx.get(url).text
    content = re.sub(r"(http(s)?|HTTP(S)?)://[\w.*/\-:?=]*|([\w*\-/]+\.)+[\w*\-:/?=]+", URL_REP, content)
    return list(set([limit_url_occurrences(row.rsplit(" ", maxsplit=1)[0], 4) for row in content.split("\r\n")[1:]]))

crawler_ninja_xfo = get_crawler_values("https://crawler.ninja/files/xfo-values.txt")
print(len(crawler_ninja_xfo))

290


In [74]:
label = "XFO"
header_name = "x-frame-options"
alt_names = ["frame-options", "x-frame-option", "x-frames-options", "content-security-policy", "x_frame_options", "xframeoptions"]
block_values = ["DENY"]
allow_values = ["ALLOWALL"] # This value does not really exist but has some special meaning for processing multiple values (https://html.spec.whatwg.org/multipage/document-lifecycle.html#the-x-frame-options-header)
partial_values = ["SAMEORIGIN"]
legacy_values = [f"ALLOW-FROM {origin_s}"]

# Always start with the empty value and then an INVALID value (e.g., "INVALID"), after that both valid and invalid values can be added
# We use the first two in `mult_headers_test`
basic_values = ["", "INVALID", "null", "*", URL_REP]
# https://wpt.fyi/results/x-frame-options?label=master&label=experimental&aligned&q=x-frame
wpt_values = get_wpt_values('../../../x-frame-options/')
# https://github.com/hen95/HTTPHeaderBrowserTesting
siewert_values = get_values("https://raw.githubusercontent.com/hen95/HTTPHeaderBrowserTesting/main/transform_to_testcase.py", r"\'X-Frame-Options: (.*?)\'")
# https://crawler.ninja/files/xfo-values.txt
crawler_ninja_values = get_crawler_values("https://crawler.ninja/files/xfo-values.txt")
own_values = []
other_values = basic_values + wpt_values + siewert_values + crawler_ninja_values + own_values
other_values = expand_urls(other_values)

In [71]:
other_values

['',
 'INVALID',
 'null',
 '*',
 'sub.headers.websec.saarland',
 'https://sub.headers.websec.saarland',
 'HTTPS://SUB.HEADERS.WEBSEC.SAARLAND',
 'https://sub.headers.websec.saarland/abc/',
 'https://sub.headers.websec.saarland/?a=a',
 'http://sub.headers.websec.saarland',
 'https://sub.headers.websec.saarland:443',
 'https://sub.headers.websec.saarland/',
 'https://sub.headers.websec.saarland:443/',
 'https://headers.websec.saarland',
 'https://sub.sub.headers.websec.saarland',
 '*.headers.websec.saarland',
 '*.sub.headers.websec.saarland',
 'https://*.sub.headers.websec.saarland',
 'https://headers.webappsec.eu',
 '',
 'SAMEORIGIN',
 '"DENY"',
 'sameOriGin',
 'ALLOW-FROM sub.headers.websec.saarland',
 'ALLOW-FROM https://sub.headers.websec.saarland',
 'ALLOW-FROM HTTPS://SUB.HEADERS.WEBSEC.SAARLAND',
 'ALLOW-FROM https://sub.headers.websec.saarland/abc/',
 'ALLOW-FROM https://sub.headers.websec.saarland/?a=a',
 'ALLOW-FROM http://sub.headers.websec.saarland',
 'ALLOW-FROM https://sub.

In [72]:
# TODO: add next header
label = "CSP"
header_name = ""
alt_names = []
block_values = []
allow_values = []
partial_values = []
legacy_values = []
# Always start with the empty value and then an INVALID value (e.g., "INVALID"), after that both valid and invalid values can be added
# We use the first two in `mult_headers_test`
other_values = ["", "INVALID"]
other_values = expand_urls(other_values)

In [75]:
ht = HeaderTests(label, header_name, alt_names, block_values, allow_values, partial_values, legacy_values, other_values)
ht.create_all_tests()
#ht.save_responses()

4391
[([('x-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame-option', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frames-options', 'DENY')], 'XFO', 200, 'parsing'), ([('content-security-policy', 'DENY')], 'XFO', 200, 'parsing'), ([('x_frame_options', 'DENY')], 'XFO', 200, 'parsing'), ([('xframeoptions', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame-optionsr', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('4x-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame\x0b-options', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frameF-options', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame-optionsG', 'DENY')], 'XFO', 200, 'parsing'), ([('3x-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('x-frame-optionss', 'DENY')], 'XFO', 200, 'parsing'), ([('sx-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('Px-frame-options', 'DENY')], 'XFO', 200, 'parsing'), ([('\x13x-frame-options', 