Merge pull request #120 from ethereum/perf/codegen_for_prestates

Perf/codegen for prestates
ethereum · Nov 21, 2018 · 75ba4de · 75ba4de
2 parents 935a383 + 119c9a1
commit 75ba4de
Show file tree

Hide file tree

Showing 3 changed files with 117 additions and 41 deletions.
diff --git a/evmlab/tools/statetests/templates/statetest.py b/evmlab/tools/statetests/templates/statetest.py
@@ -3,20 +3,16 @@
 # Author : <github.com/tintinweb>
 import random
 import json
+import logging
 from types import SimpleNamespace
 from evmlab.tools.statetests import rndval, randomtest
 from evmlab.tools.statetests.rndval.base import WeightedRandomizer
 
 from evmlab.tools.statetests.rndval import RndCodeBytes
 
-PRECOMPILES = ["0x0000000000000000000000000000000000000001",
-                "0x0000000000000000000000000000000000000002",
-                "0x0000000000000000000000000000000000000003",
-                "0x0000000000000000000000000000000000000004",
-                "0x0000000000000000000000000000000000000005",
-                "0x0000000000000000000000000000000000000006",
-                "0x0000000000000000000000000000000000000007",
-                "0x0000000000000000000000000000000000000008"]
+
+logger = logging.getLogger("evmlab.tools.statetest")
+
 
 class Account(object):
 
@@ -55,6 +51,9 @@ def __init__(self, nonce=None, codegenerators={}, datalength=None,
         self.codegens = codegenerators  # sets _codegenerators and _codegenerators_weighted
         self.datalength = datalength  # sets _datalength
 
+        # other
+        self._fill_counter = 0  # track how often we've filled from this template
+
         ### info
         self._info = SimpleNamespace(fuzzer="evmlab",
                                      comment=self._config_get("info.comment", "evmlab"),
@@ -112,17 +111,25 @@ def _random_storage(self, _min=0, _max=10):
 
 
     def _autofill_prestates_from_transaction(self, tx):
+        logger.debug("autofill from tx.to")
+
         if tx.to in self.pre:
-            # already there
-            return self
+            # prestate already exists, renew it?
+            if self._fill_counter % self._config_getint("prestate.txto.renew.every.x.rounds", default=1) != 0:
+                # do not renew
+                logger.debug("autofill from tx.to - not renewing prestate due to prestate.txto.renew.every.x.rounds")
+                return
 
-        self._autofill_prestate(tx.to)
+        # force renewal of prestate
+        self._autofill_prestate(tx.to, force=True)
 
         return self
 
-    def _autofill_prestate(self, address):
-        if address in self.pre:
+    def _autofill_prestate(self, address, force=False):
+        logger.debug("autofill prestate")
+        if address in self.pre and not force:
             # already there
+            logger.debug("autofill prestate - skipping address already exists (and not using force)")
             return self
 
         if address.replace("0x","") not in rndval.RndAddress.addresses[rndval.RndAddressType.SENDING_ACCOUNT]+rndval.RndAddress.addresses[rndval.RndAddressType.STATE_ACCOUNT]:
@@ -151,8 +158,14 @@ def _autofill_prestate(self, address):
 
         return self
 
-    def _autofill_prestates_from_stack_arguments(self):
+    def _autofill_prestates_from_stack_arguments(self, tx):
         # todo: hacky hack
+        logger.debug("autofill from stack")
+        # config
+        config_renew_every_x_rounds = self._config_getint("prestate.other.renew.every.x.rounds", default=1) or 1  # 1== every round
+        config_renew_limit = self._config_getint("prestate.other.renew.limit.per.round", default=0)
+
+        nr_of_prestates_renewed_this_round = nr_of_prestates_added_this_round = 0
         all_addresses = set()
 
         for cg in self._codegenerators.values():
@@ -163,9 +176,38 @@ def _autofill_prestates_from_stack_arguments(self):
                 #print(ae)
                 pass
 
+        # do not handle precompiled accounts.
+        # remove tx.to to avoid renewing it. this is handled in autifille
+        all_addresses = list(all_addresses.difference(rndval.RndAddress.addresses[rndval.RndAddressType.PRECOMPILED] + [tx.to.replace("0x","")]))
+
+        # shuffle list to avoid bailing always on the same objects (set is ordered)
+        random.shuffle(all_addresses)
+
         for addr in all_addresses:
             #print(addr)
-            self._autofill_prestate(addr)
+            if "0x%s"%addr.replace("0x","") in self.pre:
+                # address exists, renew it in this round?
+                if self._fill_counter % config_renew_every_x_rounds != 0:
+                    # do not renew, skip
+                    logger.debug(
+                        "autofill from stack - not renewing prestate due to prestate.other.renew.every.x.rounds")
+                    continue
+                # address exists, did we already hit the renewal limit? only renew up to <limit> accounts (but add new ones)
+                # config_renew_limit == 0 - disabled, otherwise max nr of accounts to renew in this filling round
+                if config_renew_limit and nr_of_prestates_renewed_this_round >= config_renew_limit:
+                    # do not renew, skip
+                    logger.debug(
+                        "autofill from stack - not renewing prestate due to prestate.other.renew.limit.per.round")
+                    continue
+                nr_of_prestates_renewed_this_round += 1
+            # force overwriting the prestate; dups are handled in the loop.
+            self._autofill_prestate(addr, force=True)
+            nr_of_prestates_added_this_round += 1
+
+        logger.info("nr_of_prestates_added_this_round = %d" % nr_of_prestates_added_this_round)
+        logger.info("nr_of_prestates_renewed_this_round = %d" % nr_of_prestates_renewed_this_round)
+
+
 
     def _build(self):
         # clone the tx namespace and replace the generator with a concrete value (we can then refer to that value later)
@@ -181,7 +223,7 @@ def _build(self):
             self._autofill_prestates_from_transaction(tx)
 
         if self._fill_prestate_for_args:
-            self._autofill_prestates_from_stack_arguments()
+            self._autofill_prestates_from_stack_arguments(tx)
 
         self.add_prestate(address=env.currentCoinbase, code="")
 
@@ -247,6 +289,16 @@ def add_prestate(self, address, balance=None, code=None, nonce=None, storage=Non
                       storage=storage)
         self.pre[acc.address] = acc
 
+    def add_precomipled_prestates(self, force=False):
+        # It's better to already have the precompiles there, otherwise it just addres
+        # false positives due to the precompiles not existing in the trie.
+        # That will lead to consensus errors, but it's not an issue on mainnet,
+        # because all precompiles already exist there
+        for addr in rndval.RndAddress.addresses[rndval.RndAddressType.PRECOMPILED]:
+            addr = "0x%s"%addr.replace("0x","")
+            if addr not in self._pre or force:
+                self.add_prestate(address=addr, balance="0x01", code="")
+
     def pick_codegen(self, name=None):
         if name:
             return self._codegenerators[name]
@@ -268,20 +320,9 @@ def __iter__(self):
     def json(self):
         return json.dumps(self.__dict__, cls=randomtest.RandomTestsJsonEncoder)
 
-    def fill(self, reset_prestate=False):
-        # todo: performance
-        if reset_prestate:
-            self.pre = {}
-            # It's better to already have the precompiles there, otherwise it just addres
-            # false positives due to the precompiles not existing in the trie. 
-            # That will lead to consensus errors, but it's not an issue on mainnet, 
-            # because all precompiles already exist there
-            for a in PRECOMPILES:
-                self.pre[a] = Account(address = a, 
-                    balance = "0x01", nonce = "0x00")
-
-
-            # will be filled by _build
+    def fill(self):
+        self._fill_counter += 1
+        # will be filled by _build
         return json.loads(self.json())
 
 

diff --git a/statetests.ini b/statetests.ini
@@ -46,6 +46,13 @@ info.comment = evmlab statetest
 #prestate.random.code.length.min = 50
 #prestate.random.code.length.max = 500
 
+## performance: prestate regeneration behavior
+# renew.every.x.rounds  ... 1 = always
+# renew.limit.per.round ... max number of existing prestates to regenerate in a round. 0 = all
+#prestate.txto.renew.every.x.rounds = 1
+#prestate.other.renew.every.x.rounds = 1
+#prestate.other.renew.limit.per.round = 0
+
 # transaction gas limit
 #transaction.gaslimit.random.min = 476000
 #transaction.value.random.min = 0
@@ -62,7 +69,7 @@ engine.RndCodeSmart2.enabled = true
 ## probabilities for each codegen to be chosen
 engine.RndCodeBytes.weight = 5
 engine.RndCodeInstr.weight = 25
-engine.RndCodeSmart2.weight = 75
+engine.RndCodeSmart2.weight = 70
 
 # [[RndCodeInstr]]
 # 99.0%

diff --git a/utilities/fuzzer.py b/utilities/fuzzer.py
@@ -92,6 +92,18 @@ def resolve(path):
             if value is not None:
                 self._config.set(uname, arg, str(value))
 
+        for override in self.cmdline_args.set_config:
+            if "=" not in override:
+                logger.warning("skipping config override (format error): %s"%override)
+                continue
+            key, value = override.strip().split("=",1)
+            section, key = key.strip().split(".",1)
+
+            logger.info("overriding: [%s] %s=%s"%(section, key,value))
+            self._config.set(section.strip(), key.strip(), value.strip())
+
+
+
         self.force_save = self._config.get(uname, 'force_save', fallback=False)
         self.enable_reporting = self._config.get(uname, 'enable_reporting', fallback=False)
         self.docker_force_update_image = self._config.get(uname, 'docker_force_update_image', fallback=None)
@@ -330,7 +342,9 @@ def postprocess_test(self, test, reporting=False):
             ))
 
     def startFuzzing(self):
+        print_stats_every_x_seconds = 90
         self.stats["start_time"] = time.time()
+        next_stats_print = self.stats["start_time"] + print_stats_every_x_seconds
         # This is the max cap of paralellism, it's just to prevent
         # things going out of hand if tests start piling up
         # We don't expect to actually reach it
@@ -394,6 +408,15 @@ def startFuzzing(self):
                     self.stats["num_active_tests"] = self.stats["num_active_tests"] - 1
                     self.postprocess_test(test, reporting=self._fuzzer._config.enable_reporting)
 
+            if time.time()> next_stats_print:
+                logger.info("=" * 25)
+                logger.info("current status: %r"%self.status())
+                logger.info("tracelength distribution (top 10): %r" % dict(collections.Counter(self.traceLengths).most_common(10)))
+                logger.info("=" * 25)
+                next_stats_print = time.time() + print_stats_every_x_seconds
+
+
+
     def dry_run(self):
         tstart = time.time()
         self.stats["start_time"] = tstart
@@ -465,6 +488,7 @@ def __init__(self, config=None):
                                                               fill_prestate_for_tx_to=True,
                                                               _config=self._config)
         self.statetest_template.info.fuzzer = "evmlab tin"
+        self.statetest_template.add_precomipled_prestates()
 
     def docker_remove_image(self, image, force=True):
         self._dockerclient.images.remove(image=image, force=force)
@@ -541,7 +565,8 @@ def generate_tests(self):
         def createATest():
             counter = 0
             while True:
-                test_obj = self.statetest_template.fill(reset_prestate=True)
+                # prestates are reused and regenerated according to the settings in prestate.txto.*, prestate.other.*
+                test_obj = self.statetest_template.fill()
                 s = StateTest(test_obj, counter, config=self._config)
                 ## testing
                 # print(test_obj.keys())
@@ -566,7 +591,7 @@ def benchmark(self, method=None, duration=None):
         counter = 0
 
         def default_method():
-            return self.statetest_template.fill(reset_prestate=True)
+            return self.statetest_template.fill()
 
         method = method or default_method
 
@@ -582,7 +607,7 @@ def default_method():
             print("to: %s --> pre: %r" % (test_obj["randomStatetest"]["transaction"]["to"],
                                           set(test_obj["randomStatetest"]["pre"].keys())))
             s_per_test = x1-x0
-            tot_per_s = counter / (x1 - start)
+            tot_per_s = counter / (x1 - start + 1e-30)  # avoid div/0
             print("%d %f (tot %f/s)" % (counter, s_per_test, tot_per_s))
 
             counter = counter + 1
@@ -701,8 +726,9 @@ def end_processes(self, test):
             if tracelen==0:
                 self._num_zero_traces += 1
             t2 = time.time()
-            logger.info("Processed %s steps for %s on test %s, pTime:%.02f ms "
-                        % (tracelen, client_name, test.identifier, 1000 * (t2 - t1)))
+            logger.info("Processed %s steps for %s on test %s, pTime:%.02f ms (depth: %s, ConstantinopleOps: %s)"
+                        % (tracelen, client_name, test.identifier, 1000 * (t2 - t1),
+                        stats.result().get("maxDepth","nA"), stats.result().get("constatinopleOps","nA")))
 
         # print(stats)
         # print(canon_steps)
@@ -825,13 +851,14 @@ def configFuzzer():
     # <required> configuration file: statetests.ini
     parser.add_argument("-c", "--configfile", default="statetests.ini", required=True,
                         help="path to configuration file (default: statetests.ini)")
+    parser.add_argument("-s", "--set-config", default=[], nargs='*', help="override settings in ini as <section>.<value>=<value>")
     parser.add_argument("-D", "--dry-run", default=False, action="store_true",
                         help="Simulate and print the output instead of running it with the docker backend (default: False)")
     parser.add_argument("-B", "--benchmark", default=False, action="store_true",
                         help="Benchmark test generation (default: False)")
 
     grp_artefacts = parser.add_argument_group('Configure Output Artefacts and Reporting')
-    grp_artefacts.add_argument("-x", "--force-save", default=None, action="store_true",
+    grp_artefacts.add_argument("-x", "--preserve-files", default=None, action="store_true",
                                help="Keep tracefiles/logs/testfiles for non-failing testcases (watch disk space!) (default: False)")
     grp_artefacts.add_argument("-r", "--enable-reporting", default=None, action="store_true",
                                help="Output testrun statistics (num of passes/fails and speed (default: False)")
@@ -863,8 +890,8 @@ def configFuzzer():
 
         # benchmark new method
         logger.info("new method: %ssec duration"%duration)
-        avg = fuzzer.benchmark(duration=duration)
-        logger.info("new method avg generation time: %f (%f tests/s)" % (avg, 1 / avg))
+        avg_new = fuzzer.benchmark(duration=duration)
+
 
         # benchmark old method
         from evmlab.tools.statetests import templates
@@ -875,8 +902,9 @@ def old_method():
             return json.loads(json.dumps(t, cls=randomtest.RandomTestsJsonEncoder))
 
         logger.info("old method: %ssec duration" % duration)
-        avg = fuzzer.benchmark(old_method, duration=duration)
-        logger.info("old method avg generation time: %f (%f tests/s)" % (avg, 1/avg))
+        avg_old = fuzzer.benchmark(old_method, duration=duration)
+        logger.info("old method avg generation time: %f (%f tests/s)" % (avg_old, 1/avg_old))
+        logger.info("new method avg generation time: %f (%f tests/s)" % (avg_new, 1 / avg_new))
 
         sys.exit(0)