In [1]:
import itertools
import os
import random
import json
import string
from json import JSONEncoder

def _default(self, obj):
    return getattr(obj.__class__, "to_json", _default.default)(obj)

_default.default = JSONEncoder().default
JSONEncoder.default = _default

from pathos import multiprocessing
import logging
import copy
from pathlib import Path
from typing import Tuple
import re
from z3.z3 import Solver, And, Or, Not, Bool, Int, sat

baselines: Path = Path("C:/Users/kisam/b.json")
experimental_results: Path = Path("C:/Users/kisam/d.json")

with open(baselines) as f:
    baselines = json.load(f)

with open(experimental_results) as f:
    experimental_results = json.load(f)

lonely_baselines = copy.deepcopy(baselines)
lonely_experimental_results = copy.deepcopy(experimental_results)

class IntRange:
    def __init__(self, lower_bound_inclusive, upper_bound_exclusive):
        self.lower_bound_inclusive = lower_bound_inclusive
        self.upper_bound_exclusive = upper_bound_exclusive

    def __contains__(self, item):
        return isinstance(item, int) and (self.lower_bound_inclusive <= item < self.upper_bound_exclusive)

    def __repr__(self):
        return f"IntRange({self.lower_bound_inclusive, self.upper_bound_exclusive})"

    def __str__(self):
        return f"[{self.lower_bound_inclusive}:{self.upper_bound_exclusive})"

    def to_json(self):
        return str(self)


for e in experimental_results:
    toks = e['original_line'].split(':')
    try:
        e['original_line'] = IntRange(int(toks[0]), int(toks[1]) + 1)
    except Exception as ex:
        e['original_line'] = []
    #print('\t'.join(["experimental", *[str(s) for s in e.values()]]).replace("\n", ""))

    if e['function_line_range'] == 'ERROR':
        e['function_line_range'] = []
    else:
        toks = e['function_line_range'].split(':')
        try:
            e['function_line_range'] = IntRange(int(toks[1]), int(toks[2]) + 1)
        except Exception as ex:
            logging.exception(f"e was {e}")
    e['presence_condition'] = str(e['presence_condition'])

print(f"We have {len(baselines)} baseline results.")
print(f"We have {len(experimental_results)} experimental results.")

We have 62 baseline results.
We have 85 experimental results.


In [2]:
import tqdm


def match_stats(baseline_result: dict, experimental_result: dict) -> Tuple:
    """
    Returns a vector of different match information.
    (a, b, c)
    a = True iff baseline and experimental have the same line number, message, and file.
    b = True iff baseline and experimental have the same message, file, and baseline is within experimental's function scope.
    c = True iff baseline's configuration is compatible with experimental's presence condition.
    """

    a = (baseline_result['message'] == experimental_result['sanitized_message'] and \
         baseline_result['input_line'] in experimental_result['original_line'] and\
         baseline_result['input_file'].split('.')[0] == experimental_result['input_file'].split('.')[0])

    b = (baseline_result['message'] == experimental_result['sanitized_message'] and \
         baseline_result['input_line'] in experimental_result['function_line_range'] and\
         baseline_result['input_file'].split('.')[0] == experimental_result['input_file'].split('.')[0])

    c = False

    if 'Or(None' not in experimental_result['presence_condition'] and experimental_result['presence_condition'] not in ['Or(None)', 'None'] and (a or b):  # Don't bother doing this expensive step when the file and line number are different.
        baseline_var_mapping = {}
        for var in baseline_result['configuration']:
            if var.startswith('DEF'):
                baseline_var_mapping[re.sub(r"^(DEF_.*)", r"\1", var)] = True
            elif var.startswith('UNDEF'):
                baseline_var_mapping[re.sub(r"^UN(DEF_.*)", r"\1", var)] = False
            else:
                raise RuntimeError(f"Don't know how to handle variable {var}")

        s = Solver()
        for var, val in baseline_var_mapping.items():
            var = Bool(var)
            if val:
                s.add(var)
            else:
                s.add(Not(var))

        for mat in re.findall("DEF_[a-zA-Z0-9_]+", experimental_result['presence_condition']):
            exec(f"{mat} = Bool('{mat}')")
           
        for mat in re.findall("USE_[a-zA-Z0-9_]+", experimental_result['presence_condition']):
            exec(f"{mat} = Int('{mat}')")

        while True:
            try:
                s.add(eval(experimental_result['presence_condition']))  # TODO Definitely need to do more transformation here.
                break
            except NameError as ne:
                var = re.search("name '(.*)' is not defined", str(ne))
                exec(f"{var.group(1)} = Int('{var.group(1)}')")
        c = s.check() == sat
        if c:
            print(s,s.model())
        else:
            print(s,c)
    return a, b, c

def tupleize(func, args): return func(*args), tuple(args)

summary = {}

# Note that results depend on the order of keys in this dictionary, because once we find a match_stats for one level we do not keep searching for the next.
#  E.g., for a given report, we will first look for results with which it has a (True, True, True) report. If it has one, we do not continue searching for
#  matches for (False, True, True), (True, False, True), etc.
result_hierarchy = {(True, True, True): 0, (False, True, True): 0, (True, False, True): 0, (True, True, False): 0, (False, True, False): 0, (False, False, True): 0, (True, False, False): 0, (False, False, False): 0}

report = []
for b in tqdm.tqdm(baselines):
    # Results are (baseline, desugared, match tuple)
    results = [(b, e, match_stats(b, e)) for e in experimental_results]
    found = False
    for r in result_hierarchy.keys():
        for res in results:
            if res[2] == r:
                found = True
                result_hierarchy[r] += 1
                # -----
                # Here is where you compile information about any specific reports you need. This block of code
                # iterates through all baselines and finds the highest level of matching that is available.
                # So, for example, if you wanted to collect all of the unmatched originals, you would uncomment out this line of code:
                #
                if (r != (True, True, True) and r != (False, True, True)):
                    report.append(res[0])
                break # DO NOT DELETE THE BREAK!
        if found:
            break


  0%|                                                                                           | 0/62 [00:00<?, ?it/s]

[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                            And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))))))))] False
[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                     And(DEF_ENABLE_FEATURE_MDEV_CONF,
                         And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                             Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))),
                 And(DEF__FORTIFY_SOURCE,
                     And(Not(USE__FORTIFY_SOURCE > 0),
                         And(DEF_ENABLE_FEATURE_MDEV_CONF,
                           

  5%|████                                                                               | 3/62 [00:00<00:03, 16.45it/s]

[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Not(Or(Or(And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(DEF___OPTIMIZE__,
                              And(USE___OPTIMIZE__ > 0,
                                  Not(DEF_ENABLE_FEATURE_MDEV_CONF))))),
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(DEF___OPTIMIZE__,
                              And(USE___OPTIMIZE__ > 0,
                                  And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                      Not(DEF_ENABLE_FEATURE_MDEV_RENAME))))))),
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(USE___OPTIMIZE__ > 0,
                               And(DEF_ENABLE_FEATURE_MDEV_CONF,
                           

 10%|████████                                                                           | 6/62 [00:00<00:02, 20.71it/s]

[DEF_ENABLE_FEATURE_MDEV_CONF,
 Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP),
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Or(Or(Or(Or(Or(Or(Or(Or(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                                        Not(DEF_ENABLE_FEATURE_MDEV_CONF)),
                                        And(Not(DEF__FORTIFY_SOURCE),
                                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                        Not(DEF_ENABLE_FEATURE_MDEV_RENAME)))),
                                      And(Not(DEF__FORTIFY_SOURCE),
                                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                        And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                        Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP))))),
                                   And(DEF__FORTIFY_SOURCE,
                                       And(Not(USE__FORTIFY_SOURCE >
                                        0),
                                        N

[Not(DEF_ENABLE_FEATURE_STAT_FORMAT),
 DEF_ENABLE_SELINUX,
 Or(And(Not(And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___OPTIMIZE__,
                        And(USE___OPTIMIZE__ > 0,
                            DEF_ENABLE_FEATURE_STAT_FORMAT))))),
        And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        Not(DEF_ENABLE_FEATURE_STAT_FORMAT))))),
        And(DEF___STRICT_ANSI__,
            And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___OPTIMIZE__,
                        And(USE___OPTIMIZE__ > 0,
                            And(DEF_ENABLE_SELINUX,
                                Not(DEF_ENABLE_FEATURE_STAT_FORMAT))))))),
        And(DEF___STRICT_ANSI__,
            And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___

 24%|███████████████████▊                                                              | 15/62 [00:00<00:01, 33.57it/s]

[USE___OPTIMIZE__ = 1,
 USE__FORTIFY_SOURCE = 1,
 DEF___STRICT_ANSI__ = True,
 DEF___OPTIMIZE__ = True,
 DEF__FORTIFY_SOURCE = True,
 DEF_ENABLE_SELINUX = True,
 DEF_ENABLE_FEATURE_STAT_FORMAT = False]
[DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF,
 DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
 Or(And(And(DEF___STRICT_ANSI__,
            And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___OPTIMIZE__,
                        And(USE___OPTIMIZE__ > 0,
                            And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                                Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF)))))))))] False
[DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF,
 DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
 Or(And(Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                             Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_

 40%|█████████████████████████████████                                                 | 25/62 [00:00<00:01, 35.08it/s]

[DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING,
 Or(And(Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))),
                 And(DEF___STRICT_ANSI__,
                     And(DEF__FORTIFY_SOURCE,
                         And(Not(USE__FORTIFY_SOURCE > 0),
                             Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))))),
              And(DEF___STRICT_ANSI__,
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(Not(DEF___OPTIMIZE__),
                              Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING)))))),
           And(DEF___STRICT_ANSI__,
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(Not(USE___OPTIMIZE__ > 0),
                               Not(DEF_CONFIG_FEATURE_INSMOD

 47%|██████████████████████████████████████▎                                           | 29/62 [00:01<00:01, 26.85it/s]

[DEF_CROSS_COMPILE,
 DEF_WIN32,
 DEF_OS2,
 Or(And(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                     And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                         And(Not(DEF_CROSS_COMPILE),
                             And(Not(DEF_OS2),
                                 Not(DEF_NEED_ENHANCED_ESCAPES))))),
                 And(DEF__FORTIFY_SOURCE,
                     And(Not(USE__FORTIFY_SOURCE > 0),
                         And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                             And(Not(DEF_CROSS_COMPILE),
                                 And(Not(DEF_OS2),
                                     Not(DEF_NEED_ENHANCED_ESCAPES))))))),
              And(DEF__FORTIFY_SOURCE,
                  And(USE__FORTIFY_SOURCE > 0,
                      And(Not(DEF___OPTIMIZE__),
                          And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                              And(Not(DEF_CROSS_COMPILE),
                                  And(Not(DEF_OS2),
    

 52%|██████████████████████████████████████████▎                                       | 32/62 [00:01<00:01, 24.55it/s]

[Not(DEF_CROSS_COMPILE),
 DEF_WIN32,
 DEF_OS2,
 Or(And(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                     And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                         And(Not(DEF_CROSS_COMPILE),
                             And(Not(DEF_OS2),
                                 Not(DEF_NEED_ENHANCED_ESCAPES))))),
                 And(DEF__FORTIFY_SOURCE,
                     And(Not(USE__FORTIFY_SOURCE > 0),
                         And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                             And(Not(DEF_CROSS_COMPILE),
                                 And(Not(DEF_OS2),
                                     Not(DEF_NEED_ENHANCED_ESCAPES))))))),
              And(DEF__FORTIFY_SOURCE,
                  And(USE__FORTIFY_SOURCE > 0,
                      And(Not(DEF___OPTIMIZE__),
                          And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                              And(Not(DEF_CROSS_COMPILE),
                                  And(Not(DEF_OS2),

 63%|███████████████████████████████████████████████████▌                              | 39/62 [00:01<00:00, 27.55it/s]

[Not(DEF_SHARED_MODULE),
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__, USE___OPTIMIZE__ > 0))),
        Not(And(DEF___STRICT_ANSI__,
                And(DEF__FORTIFY_SOURCE,
                    And(USE__FORTIFY_SOURCE > 0,
                        And(DEF___OPTIMIZE__,
                            And(USE___OPTIMIZE__ > 0,
                                DEF_SHARED_MODULE)))))),
        Not(Or(Or(Or(Not(DEF__FORTIFY_SOURCE),
                     And(DEF__FORTIFY_SOURCE,
                         Not(USE__FORTIFY_SOURCE > 0))),
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          Not(DEF___OPTIMIZE__)))),
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           Not(USE___OPTIMIZE__ > 0)))))),
        And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOUR

100%|██████████████████████████████████████████████████████████████████████████████████| 62/62 [00:01<00:00, 38.29it/s]

[Not(DEF_CONFIG_OF_IRQ),
 DEF_CONFIG_TWL4030_CORE,
 Or(And(Or(And(Not(DEF_CONFIG_TWL4030_CORE),
               DEF_CONFIG_IRQ_DOMAIN),
           DEF_CONFIG_TWL4030_CORE),
        DEF_CONFIG_TWL4030_CORE,
        Not(And(DEF_CONFIG_TWL4030_CORE, DEF_CONFIG_OF_IRQ)),
        And(DEF_CONFIG_TWL4030_CORE, Not(DEF_CONFIG_OF_IRQ)),
        DEF_CONFIG_TWL4030_CORE,
        Not(And(Not(DEF_CONFIG_TWL4030_CORE),
                DEF_CONFIG_IRQ_DOMAIN)),
        DEF_CONFIG_TWL4030_CORE))] [DEF_CONFIG_IRQ_DOMAIN = False,
 DEF_CONFIG_TWL4030_CORE = True,
 DEF_CONFIG_OF_IRQ = False]
[Not(DEF_CONFIG_IPV6),
 DEF_CONFIG_NETPOLL,
 Or(And(DEF_CONFIG_NETPOLL,
        Not(And(DEF_CONFIG_NETPOLL, DEF_CONFIG_IPV6)),
        And(DEF_CONFIG_NETPOLL, Not(DEF_CONFIG_IPV6)),
        DEF_CONFIG_NETPOLL,
        DEF_CONFIG_NETPOLL,
        Not(Not(DEF_CONFIG_NETPOLL)),
        DEF_CONFIG_NETPOLL))] [DEF_CONFIG_NETPOLL = True, DEF_CONFIG_IPV6 = False]
[Not(DEF_CONFIG_BF60x), Or(And(Not(DEF_CONFIG_BF60x)))] [DEF_CON




# Results

In [3]:
# Change this if you want the reports printed out differently. For example, if above you collected unmatched reports,
#  then here, you probably want something like
#
print(json.dumps(report, indent=2))
#
_4488 = dict()
for e in report:
    key = (e['message'], e['input_file'], e['input_line'])
    if key not in _4488:
        _4488[key] = 0
    _4488[key] += 1
print(len(_4488))
print('-----------')
print(f"Number of baseline results: {len(baselines)}")
print(f"Number of desugared results: {len(experimental_results)}")
print(f"Number of exact matches: {result_hierarchy[(True, True, True)]}")
print(f"Number of partial matches: {result_hierarchy[False, True, True]}")
print(f"Number of unmatched: {sum(v for k, v in result_hierarchy.items() if k not in [(True, True, True), (False, True, True)])}")

[
  {
    "id": "0",
    "input_file": "/targets/VarBugsPatches/BUSYBOX/199501f2a00.c",
    "input_line": 21,
    "original_line": "ERROR",
    "function_line_range": "GLOBAL:1:29",
    "message": "Null pointer passed to 2nd parameter expecting 'nonnull'",
    "sanitized_message": "Null pointer passed to 2nd parameter expecting 'nonnull'",
    "presence_condition": null,
    "feasible": null,
    "configuration": [
      "UNDEF_ENABLE_FEATURE_MODPROBE_MULTIPLE_OPTIONS"
    ],
    "analysis_time": 0.7946839332580566,
    "desugaring_time": null,
    "get_recommended_space": true,
    "remove_errors": false,
    "verified": null,
      [
        7,
        9,
        21,
        27
      ]
    ],
  },
  {
    "id": "0",
    "input_file": "/targets/VarBugsPatches/BUSYBOX/b62bd7b261b.c",
    "input_line": 16,
    "original_line": "ERROR",
    "function_line_range": "GLOBAL:1:46",
    "message": "Value stored to 'val' is never read",
    "sanitized_message": "Value stored to 'val' is never 

In [4]:
results = dict()

for b in baselines:
    conf = str(b.get('configuration'))
    if conf not in results:
        results[conf] = 0
    results[conf] += 1

print(f"Number of configurations with warnings: {len(results)}")
import random
while len(results.keys()) < 1000:
    results[''.join(random.choices(string.ascii_letters, k=5))] = 0

print(f"Average warnings per config is {float(sum(results.values()))/float(len(results.values()))}")



In [5]:
result_hierarchy = {(True, True, True): 0, (False, True, True): 0, (True, False, True): 0, (True, True, False): 0, (False, True, False): 0, (False, False, True): 0, (True, False, False): 0, (False, False, False): 0}

for e in tqdm.tqdm(experimental_results):
    results = [(b, e, match_stats(b, e)) for b in baselines]
    found = False
    for r in result_hierarchy.keys():
        for res in results:
            if res[2] == r:
                found = True
                result_hierarchy[r] += 1
                break
        if found:
            break

print('-----------')
print(f"Number of desugared results: {len(experimental_results)}")
print(f"Number of baseline results: {len(baselines)}")
print(f"Number of exact matches: {result_hierarchy[(True, True, True)]}")
print(f"Number of partial matches: {result_hierarchy[False, True, True]}")
print(f"Number of unmatched: {sum(v for k, v in result_hierarchy.items() if k not in [(True, True, True), (False, True, True)])}")

  0%|                                                                                           | 0/85 [00:00<?, ?it/s]

[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                            And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))))))))] False
[DEF_ENABLE_FEATURE_MDEV_CONF,
 Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP),
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                            And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))))))))] [USE___OPTIMIZE__ = 1,
 USE__FORTIFY_SOURCE = 1,
 DEF

  5%|███▉                                                                               | 4/85 [00:00<00:03, 26.32it/s]

[DEF_ENABLE_FEATURE_MDEV_CONF,
 Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP),
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Or(Or(Or(Or(Or(Or(Or(Or(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                                        Not(DEF_ENABLE_FEATURE_MDEV_CONF)),
                                        And(Not(DEF__FORTIFY_SOURCE),
                                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                        Not(DEF_ENABLE_FEATURE_MDEV_RENAME)))),
                                      And(Not(DEF__FORTIFY_SOURCE),
                                        And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                        And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                        Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP))))),
                                   And(DEF__FORTIFY_SOURCE,
                                       And(Not(USE__FORTIFY_SOURCE >
                                        0),
                                        N

[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Not(Or(Or(And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(DEF___OPTIMIZE__,
                              And(USE___OPTIMIZE__ > 0,
                                  Not(DEF_ENABLE_FEATURE_MDEV_CONF))))),
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(DEF___OPTIMIZE__,
                              And(USE___OPTIMIZE__ > 0,
                                  And(DEF_ENABLE_FEATURE_MDEV_CONF,
                                      Not(DEF_ENABLE_FEATURE_MDEV_RENAME))))))),
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(USE___OPTIMIZE__ > 0,
                               And(DEF_ENABLE_FEATURE_MDEV_CONF,
                           

 11%|████████▊                                                                          | 9/85 [00:00<00:02, 37.28it/s]

 [USE___OPTIMIZE__ = 1,
 USE__FORTIFY_SOURCE = 1,
 DEF___STRICT_ANSI__ = True,
 DEF___CORRECT_ISO_CPP_STRING_H_PROTO = True,
 DEF___OPTIMIZE__ = True,
 DEF__FORTIFY_SOURCE = True,
 DEF_ENABLE_FEATURE_MDEV_RENAME = True,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP = True,
 DEF_ENABLE_FEATURE_MDEV_CONF = True]
[DEF_ENABLE_FEATURE_MDEV_CONF,
 DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP,
 DEF_ENABLE_FEATURE_MDEV_RENAME,
 Or(And(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                     And(DEF_ENABLE_FEATURE_MDEV_CONF,
                         And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                             Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))),
                 And(DEF__FORTIFY_SOURCE,
                     And(Not(USE__FORTIFY_SOURCE > 0),
                         And(DEF_ENABLE_FEATURE_MDEV_CONF,
                             And(DEF_ENABLE_FEATURE_MDEV_RENAME,
                                 Not(DEF_ENABLE_FEATURE_MDEV_RENAME_REGEXP)))))),
              And(DEF__FORTIFY_SOURCE,
          

 20%|████████████████▍                                                                 | 17/85 [00:00<00:01, 48.62it/s]

[Not(DEF_CONFIG_FEATURE_CLEAN_UP),
 Or(And(DEF_CONFIG_FEATURE_CLEAN_UP,
        Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         DEF_CONFIG_FEATURE_CLEAN_UP)),
                 And(DEF___STRICT_ANSI__,
                     And(DEF__FORTIFY_SOURCE,
                         And(Not(USE__FORTIFY_SOURCE > 0),
                             DEF_CONFIG_FEATURE_CLEAN_UP)))),
              And(DEF___STRICT_ANSI__,
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(Not(DEF___OPTIMIZE__),
                              DEF_CONFIG_FEATURE_CLEAN_UP))))),
           And(DEF___STRICT_ANSI__,
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(Not(USE___OPTIMIZE__ > 0),
                               DEF_CONFIG_FEATURE_CLEAN_UP)))))),
        Not(And(DEF___STRIC

[DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF,
 DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
 Or(And(And(DEF___STRICT_ANSI__,
            And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___OPTIMIZE__,
                        And(USE___OPTIMIZE__ > 0,
                            And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                                Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF)))))))))] False
[DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF,
 Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF),
 Or(And(And(DEF___STRICT_ANSI__,
            And(DEF__FORTIFY_SOURCE,
                And(USE__FORTIFY_SOURCE > 0,
                    And(DEF___OPTIMIZE__,
                        And(USE___OPTIMIZE__ > 0,
                            And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                                Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF)))))))))] False
[Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF),
 No

 26%|█████████████████████▏                                                            | 22/85 [00:00<00:01, 45.25it/s]

[Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF),
 Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF),
 Or(And(Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                             Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF)))),
                 And(DEF___STRICT_ANSI__,
                     And(DEF__FORTIFY_SOURCE,
                         And(Not(USE__FORTIFY_SOURCE > 0),
                             And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                                 Not(DEF_CONFIG_FEATURE_HDPARM_HDIO_SCAN_HWIF)))))),
              And(DEF___STRICT_ANSI__,
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(Not(DEF___OPTIMIZE__),
                              And(DEF_CONFIG_FEATURE_HDPARM_HDIO_UNREGISTER_HWIF,
                                  Not(DEF_CONFIG_FEATURE_HDPARM_HDIO

 39%|███████████████████████████████▊                                                  | 33/85 [00:00<00:00, 59.17it/s]

[DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING,
 Or(And(Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))),
                 And(DEF___STRICT_ANSI__,
                     And(DEF__FORTIFY_SOURCE,
                         And(Not(USE__FORTIFY_SOURCE > 0),
                             Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))))),
              And(DEF___STRICT_ANSI__,
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(Not(DEF___OPTIMIZE__),
                              Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING)))))),
           And(DEF___STRICT_ANSI__,
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(Not(USE___OPTIMIZE__ > 0),
                               Not(DEF_CONFIG_FEATURE_INSMOD

 46%|█████████████████████████████████████▌                                            | 39/85 [00:00<00:00, 51.14it/s]

[DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING,
 Or(And(Or(Or(Or(And(DEF___STRICT_ANSI__,
                     And(Not(DEF__FORTIFY_SOURCE),
                         Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))),
                 And(DEF___STRICT_ANSI__,
                     And(DEF__FORTIFY_SOURCE,
                         And(Not(USE__FORTIFY_SOURCE > 0),
                             Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING))))),
              And(DEF___STRICT_ANSI__,
                  And(DEF__FORTIFY_SOURCE,
                      And(USE__FORTIFY_SOURCE > 0,
                          And(Not(DEF___OPTIMIZE__),
                              Not(DEF_CONFIG_FEATURE_INSMOD_VERSION_CHECKING)))))),
           And(DEF___STRICT_ANSI__,
               And(DEF__FORTIFY_SOURCE,
                   And(USE__FORTIFY_SOURCE > 0,
                       And(DEF___OPTIMIZE__,
                           And(Not(USE___OPTIMIZE__ > 0),
                               Not(DEF_CONFIG_FEATURE_INSMOD

 54%|████████████████████████████████████████████▍                                     | 46/85 [00:01<00:00, 43.48it/s]

[DEF_CROSS_COMPILE,
 DEF_WIN32,
 Not(DEF_OS2),
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                            And(Not(DEF_CROSS_COMPILE),
                                And(Not(DEF_OS2),
                                    Not(DEF_NEED_ENHANCED_ESCAPES))))))))))] False
[DEF_CROSS_COMPILE,
 Not(DEF_WIN32),
 DEF_OS2,
 Or(And(And(DEF__FORTIFY_SOURCE,
            And(USE__FORTIFY_SOURCE > 0,
                And(DEF___OPTIMIZE__,
                    And(USE___OPTIMIZE__ > 0,
                        And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                            And(Not(DEF_CROSS_COMPILE),
                                And(Not(DEF_OS2),
                                    Not(DEF_NEED_ENHANCED_ESCAPES))))))))))] False
[DEF_CROSS_COMPILE,
 Not(DEF_WIN32),
 Not(DEF_OS2),
 Or(And(And(DEF__F

 60%|█████████████████████████████████████████████████▏                                | 51/85 [00:01<00:00, 34.22it/s]

[Not(DEF_CROSS_COMPILE),
 DEF_WIN32,
 Not(DEF_OS2),
 Or(And(Or(Or(Or(And(Not(DEF__FORTIFY_SOURCE),
                     And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                         And(Not(DEF_CROSS_COMPILE),
                             And(Not(DEF_OS2),
                                 Not(DEF_NEED_ENHANCED_ESCAPES))))),
                 And(DEF__FORTIFY_SOURCE,
                     And(Not(USE__FORTIFY_SOURCE > 0),
                         And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                             And(Not(DEF_CROSS_COMPILE),
                                 And(Not(DEF_OS2),
                                     Not(DEF_NEED_ENHANCED_ESCAPES))))))),
              And(DEF__FORTIFY_SOURCE,
                  And(USE__FORTIFY_SOURCE > 0,
                      And(Not(DEF___OPTIMIZE__),
                          And(Not(DEF___CORRECT_ISO_CPP_STRING_H_PROTO),
                              And(Not(DEF_CROSS_COMPILE),
                                  And(Not(DEF_

[DEF_APR_HAS_SHARED_MEMORY,
 DEF_APU_HAS_LDAP,
 Or(And(Not(And(DEF_APU_HAS_LDAP,
                Not(DEF_APR_HAS_SHARED_MEMORY))),
        And(DEF_APU_HAS_LDAP, DEF_APR_HAS_SHARED_MEMORY),
        Not(Or(Or(Or(Or(And(Not(DEF___STRICT_ANSI__),
                            And(Not(DEF__FORTIFY_SOURCE),
                                And(DEF_APU_HAS_LDAP,
                                    DEF_APR_HAS_SHARED_MEMORY))),
                        And(Not(DEF___STRICT_ANSI__),
                            And(DEF__FORTIFY_SOURCE,
                                And(Not(USE__FORTIFY_SOURCE >
                                        0),
                                    And(DEF_APU_HAS_LDAP,
                                        DEF_APR_HAS_SHARED_MEMORY))))),
                     And(Not(DEF___STRICT_ANSI__),
                         And(DEF__FORTIFY_SOURCE,
                             And(USE__FORTIFY_SOURCE > 0,
                                 And(Not(DEF___OPTIMIZE__),
                 

 89%|█████████████████████████████████████████████████████████████████████████▎        | 76/85 [00:01<00:00, 73.28it/s]

[DEF___OPTIMIZE__ = True,
 USE__FORTIFY_SOURCE = 1,
 USE___OPTIMIZE__ = 0,
 DEF__FORTIFY_SOURCE = True,
 DEF___STRICT_ANSI__ = True,
 DEF_HAVE_TLSV1_X = True]
[DEF_ENABLE_AUTO_BED_LEVELING,
 Or(And(DEF_ENABLE_AUTO_BED_LEVELING))] [DEF_ENABLE_AUTO_BED_LEVELING = True]
[DEF_CONFIG_SND_FSI_DA7210,
 DEF_CONFIG_I2C,
 DEF_CONFIG_SND_FSI_AK4642,
 Or(And(Or(And(Not(DEF_CONFIG_SND_FSI_DA7210),
               DEF_CONFIG_SND_SOC_DA7210),
           DEF_CONFIG_SND_FSI_DA7210)))] [DEF_CONFIG_SND_FSI_DA7210 = True,
 DEF_CONFIG_SND_SOC_DA7210 = False,
 DEF_CONFIG_SND_FSI_AK4642 = True,
 DEF_CONFIG_I2C = True]
[DEF_CONFIG_SND_FSI_DA7210,
 DEF_CONFIG_I2C,
 Not(DEF_CONFIG_SND_FSI_AK4642),
 Or(And(Or(And(Not(DEF_CONFIG_SND_FSI_DA7210),
               DEF_CONFIG_SND_SOC_DA7210),
           DEF_CONFIG_SND_FSI_DA7210)))] [DEF_CONFIG_SND_FSI_DA7210 = True,
 DEF_CONFIG_SND_SOC_DA7210 = False,
 DEF_CONFIG_SND_FSI_AK4642 = False,
 DEF_CONFIG_I2C = True]
[DEF_CONFIG_SND_FSI_DA7210,
 DEF_CONFIG_I2C,
 DEF_CONFIG_S

100%|██████████████████████████████████████████████████████████████████████████████████| 85/85 [00:01<00:00, 53.33it/s]

[Not(DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES),
 DEF_CONFIG_UNIX98_PTYS,
 Or(And(DEF_CONFIG_UNIX98_PTYS))] [DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES = False,
 DEF_CONFIG_UNIX98_PTYS = True]
[DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES,
 DEF_CONFIG_UNIX98_PTYS,
 Or(And(DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES))] [DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES = True,
 DEF_CONFIG_UNIX98_PTYS = True]
[DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES,
 Not(DEF_CONFIG_UNIX98_PTYS),
 Or(And(DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES))] [DEF_CONFIG_DEVPTS_MULTIPLE_INSTANCES = True,
 DEF_CONFIG_UNIX98_PTYS = False]
-----------
Number of desugared results: 85
Number of baseline results: 62
Number of exact matches: 33
Number of partial matches: 2
Number of unmatched: 50





In [6]:
print(len(set([(e['message'], e['input_file'], e['input_line']) for e in baselines])))

42


In [7]:
results = dict()
for e in baselines:
    key = (e['message'], e['input_file'], e['input_line'])
    if key not in results:
        results[key] = []
    results[key].append(e)

with open("C:/Users/kisam/dedup_varbugsBaselineClang.json", 'w') as f:
    json.dump([v[0] for v in results.values()], f, indent=2)

At this point in the notebook, we have a few structures.
- summary: A dictionary mapping 3-tuples corresponding to results to a list of pairs of results.
- lonely_baselines: A list of baseline results for which no matching experimental result was found.
- lonely_experimental_results: A list of experimental results for which no matching baseline was found.

# Sample

This code randomly samples a result from each classification and prints it for inspection.

In [8]:
print()




In [9]:
print(json.dumps({"summary": {str(k): len(summary[k]) for k in summary.keys()}}))

{"summary": {}}


In [10]:
import random
for k, v in filter(lambda k: (k[0][0] or k[0][1]) and not k[0][2], summary.items()): # == str((False, False, False)), summary.items()):
    print(str(k))
    print(json.dumps(random.sample(v, k=max(1, len(v))), indent=2))
    print("-----------------------------------------------")
#{k: v for k, v in summary.items() if k != str((False, False, False))}}, indent=4))
print(f"Lonely baselines: {len(lonely_baselines)}, Lonely exps: {len(lonely_experimental_results)}")

Lonely baselines: 62, Lonely exps: 85


In [11]:
print(f"Types of lonely baselines: \n" + json.dumps([s for s in sorted(lonely_baselines, key = lambda x: x['sanitized_message'])], indent=2))

Types of lonely baselines: 
[
  {
    "id": "1",
    "input_file": "/targets/VarBugsPatches/APACHE/1b48bb3c1af.c",
    "input_line": 14,
    "original_line": "ERROR",
    "function_line_range": "GLOBAL:1:25",
    "message": "1st function call argument is an uninitialized value",
    "sanitized_message": "1st function call argument is an uninitialized value",
    "presence_condition": null,
    "feasible": null,
    "configuration": [
      "DEF_APR_HAS_SHARED_MEMORY",
      "DEF_APU_HAS_LDAP"
    ],
    "analysis_time": 0.32108020782470703,
    "desugaring_time": null,
    "get_recommended_space": true,
    "remove_errors": false,
    "verified": null,
      [
        11,
        14,
        22
      ]
    ],
  },
  {
    "id": "6",
    "input_file": "/targets/VarBugsPatches/LINUX/7acf6cd80b2.c",
    "input_line": 44,
    "original_line": "ERROR",
    "function_line_range": "GLOBAL:1:55",
    "message": "1st function call argument is an uninitialized value",
    "sanitized_message": "1

In [12]:
print(json.dumps([e for e in experimental_results if "BUSYBOX/eef" in e['input_file']], indent=2))

[
  {
    "id": "0",
    "input_file": "/targets/VarBugsPatches/BUSYBOX/eef2317b9f5.desugared.c",
    "input_line": 926,
    "original_line": "[30:31)",
    "function_line_range": "[5:36)",
    "message": "Potential leak of memory pointed to by '__buf_649'",
    "sanitized_message": "Potential leak of memory pointed to by 'buf'",
    "presence_condition": "Or(And((DEF_CONFIG_FEATURE_CLEAN_UP),Or(Or(Or(And((DEF___STRICT_ANSI__) ,And( Not(DEF__FORTIFY_SOURCE) , (DEF_CONFIG_FEATURE_CLEAN_UP) )),And( (DEF___STRICT_ANSI__) ,And( (DEF__FORTIFY_SOURCE) ,And( Not(USE__FORTIFY_SOURCE > 0) , (DEF_CONFIG_FEATURE_CLEAN_UP) )))),And( (DEF___STRICT_ANSI__) ,And( (DEF__FORTIFY_SOURCE) ,And( (USE__FORTIFY_SOURCE > 0) ,And( Not(DEF___OPTIMIZE__) , (DEF_CONFIG_FEATURE_CLEAN_UP) ))))),And( (DEF___STRICT_ANSI__) ,And( (DEF__FORTIFY_SOURCE) ,And( (USE__FORTIFY_SOURCE > 0) ,And( (DEF___OPTIMIZE__) ,And( Not(USE___OPTIMIZE__ > 0) , (DEF_CONFIG_FEATURE_CLEAN_UP))))))),Not(And((DEF___STRICT_ANSI__) ,And( (DEF_