In [37]:
import re
from collections import defaultdict
import json

def extract_error_paths(file_path, sanitizer_type):
    # Define patterns to extract information
    filename_pattern = re.compile(r".*?File name: (\S+)\n")
    if sanitizer_type == 'ASan':
        error_pattern = re.compile(r".*? AddressSanitizer: (\S+) on address")
    elif sanitizer_type == 'MSan':
        error_pattern = re.compile(r".*? MemorySanitizer: (\S+)\n")
    path_pattern = re.compile(r"    #\d+ 0x[0-9a-f]+ in (\S+)")

    errors = []

    with open(file_path, 'r') as f:
        content = f.read()

    # Split content by errors
    error_sections = content.split("----------------------------------------------------")

    for section in error_sections:

        section = section.split("\n\n")
        if len(section) < 2:
            section = "\n\n".join(section)
            last_three_points = section.rfind("...")
            print(section[last_three_points+3:])
            continue

        error_section = section[0]

        second_section = section[1]

        # Extract filename
        filename_match = filename_pattern.search(error_section)
        if not filename_match:
            continue
        filename = filename_match.group(1)

        # Extract error type
        error_match = error_pattern.search(error_section)
        if not error_match:
            continue
        
        error_type = error_match.group(1)
        
        # Extract paths
        path_matches = path_pattern.findall(error_section)
        paths = [match for match in path_matches if not match.startswith('_')]
        
        if sanitizer_type == 'ASan':
            # Extract allocation path
            allocation_matches = path_pattern.findall(second_section)
            allocations = [match for match in allocation_matches if not match.startswith('_')]
            errors.append({
                "file": filename,
                "error": error_type,
                "path": paths,
                "allocated_by": allocations
            })
        elif sanitizer_type == 'MSan':
            errors.append({
                "file": filename,
                "error": error_type,
                "path": paths
            })
    
    return errors

def find_unique_errors(results):
    unique_errors = []
    seen_errors = set()

    for result in results:

        error_tuple = (result['error'], tuple(result['path']))
        if 'allocated_by' in result:
            error_tuple += (tuple(result['allocated_by']),)
        if error_tuple not in seen_errors:
            seen_errors.add(error_tuple)
            result['files'] = set([result['file']])
            result.pop('file')
            unique_errors.append(result)
        else:
            for error in unique_errors:
                if error['error'] == result['error'] and error['path'] == result['path']:
                    error['files'].add(result['file'])
    
    for error in unique_errors:
        error['files'] = list(error['files'])
        error['files'].sort()
    
    return unique_errors

## Analyze Asan errors

In [None]:
# Input file path
file_path = "./_crash_report_asan.txt"
results = extract_error_paths(file_path, 'ASan')

# Find unique errors
unique_results = find_unique_errors(results)

==1094107==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x503000000057 at pc 0x61e74649668f bp 0x7ffc29c32a10 sp 0x7ffc29c321c8
READ of size 59 at 0x503000000057 thread T0

==1691427==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x502000000053 at pc 0x574734493eec bp 0x7fff8cd8adc0 sp 0x7fff8cd8a568
READ of size 4 at 0x502000000053 thread T0

==2280281==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x502000000053 at pc 0x644e0b979eec bp 0x7ffd5512b5e0 sp 0x7ffd5512ad88
READ of size 4 at 0x502000000053 thread T0

==2365957==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x502000000038 at pc 0x60647463f68f bp 0x7ffca9b9a4f0 sp 0x7ffca9b99ca8
READ of size 37 at 0x502000000038 thread T0

==3806229==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x502000000053 at pc 0x5e226570eeec bp 0x7ffd6e57c9a0 sp 0x7ffd6e57c148
READ of size 4 at 0x502000000053 thread T0

==3916543==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x50200000

In [39]:
# Print the JSON content
print('Total errors: ', len(results))
print('Total unique errors: ', len(unique_results))
print(json.dumps(unique_results, indent=4))

Total errors:  11017
Total unique errors:  5
[
    {
        "error": "heap-buffer-overflow",
        "path": [
            "memcpy",
            "memcpy",
            "read_buf",
            "fill_window",
            "deflate_slow",
            "deflate",
            "compress2",
            "write_chunks_before_idat",
            "spng_encode_chunks",
            "spng_encode_image",
            "fuzz_spng_write",
            "main"
        ],
        "allocated_by": [
            "malloc",
            "get_random_string",
            "fuzz_spng_write",
            "main"
        ],
        "files": [
            "basi0g01",
            "basi3p08",
            "basn0g02",
            "basn3p08",
            "ccwn3p08",
            "ch2n3p08",
            "ctgn0g04",
            "cthn0g04",
            "ctjn0g04",
            "ctzn0g04",
            "g04n0g16",
            "g05n3p04",
            "g25n0g16",
            "g25n3p04",
            "tbbn3p08",
            "tbgn2c16",
    

## Analyze Msan errors

In [None]:
file_path = "./_crash_report_msan.txt"
results = extract_error_paths(file_path, 'MSan')

# Find unique errors
unique_results = find_unique_errors(results)

 Uninitialized bytes in strlen at offset 8 inside [0x701000000006, 9)




In [41]:
# Print the JSON content
print('Total errors: ', len(results))
print('Total unique errors: ', len(unique_results))
print(json.dumps(unique_results, indent=4))

Total errors:  11387
Total unique errors:  2
[
    {
        "error": "use-of-uninitialized-value",
        "path": [
            "read_non_idat_chunks",
            "read_chunks",
            "spng_get_plte",
            "fuzz_spng_read",
            "main"
        ],
        "files": [
            "ct1n0g04",
            "cten0g04",
            "ctfn0g04",
            "ctgn0g04",
            "cthn0g04",
            "ctjn0g04",
            "ctzn0g04"
        ]
    },
    {
        "error": "use-of-uninitialized-value",
        "path": [
            "memcmp",
            "trns_row",
            "spng_decode_scanline",
            "spng_decode_row",
            "spng_decode_image",
            "fuzz_spng_read",
            "main"
        ],
        "files": [
            "tbbn2c16",
            "tbgn2c16",
            "tbrn2c08"
        ]
    }
]
