Merge pull request #1114 from firesim/build-driver-command

add firesim builddriver command; plus a few metasim fixes and doc updates
firesim · Jul 6, 2022 · b0a234d · b0a234d
2 parents 6f6339f + d6beb65
commit b0a234d
Show file tree

Hide file tree

Showing 7 changed files with 217 additions and 73 deletions.
diff --git a/deploy/firesim b/deploy/firesim
@@ -292,6 +292,11 @@ def buildbitstream(build_config_file: BuildConfigFile) -> None:
     # run builds, then terminate instances
     execute(parallel_build_helper, build_config_file, hosts=build_config_file.build_ip_set)
 
+@register_task
+def builddriver(runtime_conf: RuntimeConfig) -> None:
+    """ Only perform the driver build (host-processor side of an FPGA sim or
+    an entire metasim)."""
+    runtime_conf.build_driver()
 
 @register_task
 # XXX this needs to be renamed or rethought, perhaps this is a backend-specific task?

diff --git a/deploy/runtools/firesim_topology_with_passes.py b/deploy/runtools/firesim_topology_with_passes.py
@@ -396,6 +396,10 @@ def infrasetup_node_wrapper(run_farm: RunFarm) -> None:
         execute(instance_liveness, hosts=all_run_farm_ips)
         execute(infrasetup_node_wrapper, self.run_farm, hosts=all_run_farm_ips)
 
+    def build_driver_passes(self) -> None:
+        """ Only run passes to build drivers. """
+        self.pass_build_required_drivers()
+
     def boot_simulation_passes(self, use_mock_instances_for_testing: bool, skip_instance_binding: bool = False) -> None:
         """ Passes that setup for boot and boot the simulation.
         skip instance binding lets users not call the binding pass on the run_farm

diff --git a/deploy/runtools/runtime_config.py b/deploy/runtools/runtime_config.py
@@ -289,7 +289,8 @@ def __init__(self, name: str, build_recipe_dict: Dict[str, Any],
                  metasimulation_only_plusargs: str,
                  metasimulation_only_vcs_plusargs: str) -> None:
         self.name = name
-        self.agfi = "Metasim" # for __str__ to work
+        self.agfi = None
+        self.xclbin = None
         self.deploytriplet = build_recipe_dict['DESIGN'] + "-" + build_recipe_dict['TARGET_CONFIG'] + "-" + build_recipe_dict['PLATFORM_CONFIG']
 
         self.customruntimeconfig = build_recipe_dict['metasim_customruntimeconfig']
@@ -361,8 +362,12 @@ class RuntimeHWDB:
     """ This class manages the hardware configurations that are available
     as endpoints on the simulation. """
     hwconf_dict: Dict[str, RuntimeHWConfig]
+    config_file_name: str
+    simulation_mode_string: str
 
     def __init__(self, hardwaredbconfigfile: str) -> None:
+        self.config_file_name = hardwaredbconfigfile
+        self.simulation_mode_string = "FPGA simulation"
 
         agfidb_configfile = None
         with open(hardwaredbconfigfile, "r") as yaml_file:
@@ -372,7 +377,13 @@ def __init__(self, hardwaredbconfigfile: str) -> None:
 
         self.hwconf_dict = {s: RuntimeHWConfig(s, v) for s, v in agfidb_dict.items()}
 
+    def keyerror_message(self, name: str) -> str:
+        """ Return the error message for lookup errors."""
+        return f"'{name}' not found in '{self.config_file_name}', which is used to specify target design descriptions for {self.simulation_mode_string}s."
+
     def get_runtimehwconfig_from_name(self, name: str) -> RuntimeHWConfig:
+        if name not in self.hwconf_dict:
+            raise KeyError(self.keyerror_message(name))
         return self.hwconf_dict[name]
 
     def __str__(self) -> str:
@@ -386,6 +397,8 @@ def __init__(self, build_recipes_config_file: str,
                  metasim_host_simulator: str,
                  metasimulation_only_plusargs: str,
                  metasimulation_only_vcs_plusargs: str) -> None:
+        self.config_file_name = build_recipes_config_file
+        self.simulation_mode_string = "Metasimulation"
 
         recipes_configfile = None
         with open(build_recipes_config_file, "r") as yaml_file:
@@ -584,6 +597,10 @@ def infrasetup(self) -> None:
         use_mock_instances_for_testing = False
         self.firesim_topology_with_passes.infrasetup_passes(use_mock_instances_for_testing)
 
+    def build_driver(self) -> None:
+        """ directly called by top-level builddriver command. """
+        self.firesim_topology_with_passes.build_driver_passes()
+
     def boot(self) -> None:
         """ directly called by top-level boot command. """
         use_mock_instances_for_testing = False

diff --git a/docs/Advanced-Usage/Debugging-in-Software/RTL-Simulation.rst b/docs/Advanced-Usage/Debugging-in-Software/RTL-Simulation.rst
@@ -3,13 +3,14 @@
 Debugging & Testing with Metasimulation
 =========================================
 
-When we speak of RTL simulation in FireSim, we are generally referring to
-`metasimulation`: simulating the FireSim simulator's RTL, typically using VCS or
-Verilator. In contrast, we we'll refer to native simulation of the target's RTL
-as `target-level` simulation. Target-level simulation in Chipyard is described at length
-`here <https://chipyard.readthedocs.io/en/latest/Simulation/Software-RTL-Simulation.html>`_.
-
-Meta-simulation is the most productive way to catch bugs
+When discussing RTL simulation in FireSim, we are generally referring to
+`metasimulation`: simulating the FireSim simulator's RTL, typically using VCS
+or Verilator. In contrast, we'll refer to simulation of the target's unmodified
+(by GoldenGate decoupling, host and target transforms) RTL as `target-level`
+simulation. Target-level simulation in Chipyard is described at length `here
+<https://chipyard.readthedocs.io/en/latest/Simulation/Software-RTL-Simulation.html>`_.
+
+Metasimulation is the most productive way to catch bugs
 before generating an AGFI, and a means for reproducing bugs seen on the FPGA.
 By default, metasimulation uses an abstract but fast model of the host: the
 FPGA's DRAM controllers are modeled with DRAMSim2, the PCI-E subsystem is not
@@ -18,48 +19,54 @@ verilog DPI. Since FireSim simulations are robust against timing differences
 across hosts, target behavior observed in an FPGA-hosted simulation should be
 exactly reproducible in a metasimulation.
 
-Generally, meta-simulators are only slightly slower than target-level
-ones. This illustrated in the chart below.
+As a final note, metasimulations are generally only slightly slower than
+target-level simulations. Example performance numbers can be found at
+:ref:`metasimulation-performance`.
 
-====== ===== =======  ========= ============= =============
-Type   Waves VCS      Verilator Verilator -O1 Verilator -O2
-====== ===== =======  ========= ============= =============
-Target Off   4.8 kHz  3.9 kHz   6.6 kHz       N/A
-Target On    0.8 kHz  3.0 kHz   5.1 kHz       N/A
-Meta   Off   3.8 kHz  2.4 kHz   4.5 kHz       5.3 KHz
-Meta   On    2.9 kHz  1.5 kHz   2.7 kHz       3.4 KHz
-====== ===== =======  ========= ============= =============
+.. _metasimulation-supported-host-sims:
 
-Note that using more aggressive optimization levels when compiling the
-Verilated-design dramatically lengthens compile time:
+Supported Host Simulators
+----------------------------------------------------
 
-====== ===== =======  ========= ============= =============
-Type   Waves VCS      Verilator Verilator -O1 Verilator -O2
-====== ===== =======  ========= ============= =============
-Meta   Off   35s      48s       3m32s         4m35s
-Meta   On    35s      49s       5m27s         6m33s
-====== ===== =======  ========= ============= =============
+Currently, the following host simulators are supported for metasimulation:
 
-Notes: Default configurations of a single-core, Rocket-based instance running
-rv64ui-v-add. Frequencies are given in target-Hz. Presently, the default
-compiler flags passed to Verilator and VCS differ from level to level. Hence,
-these numbers are only intended to ball park simulation speeds, not provide a
-scientific comparison between simulators. VCS numbers collected on a local Berkeley machine,
-Verilator numbers collected on a c4.4xlarge. (metasimulation Verilator version: 4.002, target-level
-Verilator version: 3.904)
+* `Verilator <https://www.veripool.org/verilator/>`_
+
+  * FOSS, automatically installed during FireSim setup.
+
+  * Referred to throughout the codebase as ``verilator``.
+
+* `Synopsys VCS <https://www.synopsys.com/verification/simulation/vcs.html>`_
 
+  * License required.
 
-Running Metasimulations Through The FireSim Manager
+  * Referred to throughout the codebase as ``vcs``.
+
+
+Pull requests to add support for other simulators are welcome.
+
+
+Running Metasimulations using the FireSim Manager
 ----------------------------------------------------
 
-In addition to the default ``make`` API to run metasimulations,
-there is now support in the FireSim manager for distributed metasimulations.
-Assuming you have a pre-setup ``config_runtime.yaml`` that is setup for FPGA-accelerated simulations,
-a few modifications can convert it to distributed metasimulation.
+The FireSim manager supports running metasimulations using the standard
+``firesim {launchrunfarm, infrasetup, runworkload, terminaterunfarm}`` flow
+that is also used for FPGA-accelerated simulations. Rather than using FPGAs,
+these metasimulations run within one of the aforementioned software simulators
+(:ref:`metasimulation-supported-host-sims`) on standard compute hosts (i.e.
+those without FPGAs). This allows users to write a single definition of
+a target (configured design and software workload), while seamlessly moving
+between software-only metasimulations and FPGA-accelerated simulations.
 
-Modify the existing ``metasimulation`` mapping in ``config_runtime.yaml`` to the following:
+As an example, if you have the default ``config_runtime.yaml`` that is setup for
+FPGA-accelerated simulations (e.g. the one used for the 8-node networked
+simulation from the :ref:``cluster-sim`` section), a few modifications to the
+configuration files can convert it to running a distributed metasimulation.
 
-::
+First, modify the existing ``metasimulation`` mapping in
+``config_runtime.yaml`` to the following:
+
+.. code-block:: yaml
 
     metasimulation:
         metasimulation_enabled: true
@@ -70,14 +77,36 @@ Modify the existing ``metasimulation`` mapping in ``config_runtime.yaml`` to the
         # plusargs passed to the simulator ONLY FOR vcs metasimulations
         metasimulation_only_vcs_plusargs: "+vcs+initreg+0 +vcs+initmem+0"
 
-This will enable you to run Verilator metasimulations for the given ``config_runtime.yaml``.
-This includes you being able to run NIC simulations, and use existing FireSim debugging tools (i.e. AutoCounter, TracerV, etc).
 
-The number of metasimulations that are run is determined by the Run Farm in conjunction with the ``topology`` in ``config_runtime.yaml``.
-When you are specifying a Run Farm host to use using the ``run_farm_hosts_to_use`` mapping, a specification must include a number of metasimulations
-to support (i.e. ``num_metasims``). For example, in the AWS EC2 ``aws_ec2.yaml`` run farm case:
-
-::
+This configures the manager to run Verilator-hosted metasimulations (without
+waveform generation) for the target specified in ``config_runtime.yaml``.  When
+in metasimulation mode, the ``default_hw_config`` that you specify in
+``target_config`` references an entry in ``config_build_recipes.yaml`` instead
+of an entry in ``config_hwdb.ini``.
+
+As is the case when the manager runs FPGA-accelerated simulations, the number
+of metasimulations that are run is determined by the parameters in the
+``target_config`` section, e.g. ``topology`` and ``no_net_num_nodes``. Many
+parallel metasimulations can then be run by writing a FireMarshal workload with
+a corresponding number of jobs.
+
+In metasimulation mode, the run farm configuration must be able to support the
+required number of metasimulations (see
+:ref:`run-farm-config-in-config-runtime` for details). The ``num_metasims``
+parameter on a run farm host specification defines how many metasimulations are
+allowed to run on a particular host. This corresponds with the ``num_fpgas``
+parameter used in FPGA-accelerated simulation mode. However ``num_metasims``
+does not correspond as tightly with any physical property of the host; it can
+be tuned depending on the complexity of your design and the compute/memory
+resources on a host.
+
+For example, in the case of the AWS EC2 run farm (``aws_ec2.yaml``), we define
+three instance types (``z1d.{3, 6, 12}xlarge``) by default that loosely
+correspond with ``f1.{2, 4, 16}xlarge`` instances, but instead have no FPGAs
+and run only metasims (of course, the ``f1.*`` instances could run metasims,
+but this would be wasteful):
+
+.. code-block:: yaml
 
     run_farm_hosts_to_use:
         - z1d.3xlarge: 0
@@ -98,31 +127,57 @@ to support (i.e. ``num_metasims``). For example, in the AWS EC2 ``aws_ec2.yaml``
             num_metasims: 8
             use_for_switch_only: false
 
-In this case, the Run Farm will use a ``z1d.12xlarge`` instance to host
-8 metasimulations (determined by the specification).
-
-Other than these changes, the rest of the manager is the same between FPGA simulations and
-metasimulations.
-In other words, outputs are stored in ``deploy/result-workload``, FireMarshal SW workloads are used,
-screen sessions are run, etc.
 
-If you are interested in getting a waveform back from the metasimulations
-when running with ``*-debug``, make sure to add ``waveform.vpd`` to the ``common_simulation_outputs`` area of the workload JSON file.
-Additionally, unlike the normal FPGA simulation case, there are two output logs.
-First, there is a ``metasim_stderr.out`` file that holds ``stderr`` coming out of the metasimulation.
-Second, there is a ``uartlog`` file that holds the ``stdout`` from the metasimulation (like normal FPGA simulations).
-If you want to copy them back, you must also add them to the ``common_simulation_outputs`` of the workload JSON.
-
-Understanding A Metasimulation Waveform
+In this case, the run farm will use a ``z1d.12xlarge`` instance to host
+8 metasimulations.
+
+To generate waveforms in a metasimulation, change
+``metasimulation_host_simulator`` to a simulator ending in ``-debug`` (e.g.
+``verilator-debug``).  When running with a simulator with waveform generation,
+make sure to add ``waveform.vpd`` to the ``common_simulation_outputs`` area of
+your workload JSON file, so that the waveform is copied back to your manager
+host when the simulation completes.
+
+A last notable point is that unlike the normal FPGA simulation case, there are
+two output logs in metasimulations.  There is the expected ``uartlog`` file
+that holds the ``stdout`` from the metasimulation (as in FPGA-based
+simulations).  However, there will also be a ``metasim_stderr.out`` file that
+holds ``stderr`` coming out of the metasimulation, commonly populated by
+``printf`` calls in the RTL, including those that are not marked for ``printf``
+synthesis.  If you want to copy ``metasim_stderr.out`` to your manager when
+a simulation completes, you must add it to the ``common_simulation_outputs`` of
+the workload JSON.
+
+Other than the changes discussed in this section, manager behavior is identical
+between FPGA-based simulations and metasimulations. For example, simulation
+outputs are stored in ``deploy/results-workload/`` on your manager host,
+FireMarshal workload definitions are used to supply target software, etc.  All
+standard manager functionality is supported in metasimulations, including
+running networked simulations and using existing FireSim debugging tools (i.e.
+AutoCounter, TracerV, etc).
+
+Once the configuration changes discussed thus far in this section are made, the
+standard ``firesim {launchrunfarm, infrasetup, runworkload, terminaterunfarm}``
+set of commands will run metasimulations.
+
+If you are planning to use FireSim metasimulations as your primary simulation
+tool while developing a new target design, see the (optional) ``firesim
+builddriver`` command, which can build metasimulations through the manager
+without requiring run farm hosts to be launched or accessible. More about this
+command is found in the :ref:`firesim-builddriver` section.
+
+
+Understanding a Metasimulation Waveform
 ----------------------------------------
 
 Module Hierarchy
 ++++++++++++++++
 To build out a simulator, Golden Gate adds multiple layers of module hierarchy
-to the target design and performs additional hierarchy mutations to implement bridges and
-resource optimizations. Metasimulation uses the ``FPGATop`` module as the
-top-level module, which excludes the platform shim layer (``F1Shim``, for EC2 F1).
-The original top-level of the input design is nested three levels below FPGATop:
+to the target design and performs additional hierarchy mutations to implement
+bridges and resource optimizations. Metasimulation uses the ``FPGATop`` module
+as the top-level module, which excludes the platform shim layer (``F1Shim``,
+for EC2 F1).  The original top-level of the input design is nested three levels
+below FPGATop:
 
 .. figure:: /img/metasim-module-hierarchy.png
 
@@ -172,10 +227,13 @@ designs, including all of the MIDAS examples and a handful of Chipyard-based
 designs. This is described in greater detail
 in the :ref:`Developer documentation <Scala Integration Tests>`.
 
-Running Metasimulations Through Make
+Running Metasimulations through Make
 ------------------------------------
 
-Meta-simulations are run out of the ``firesim/sim`` directory.
+.. Warning:: This section is for advanced developers; most metasimulation users
+   should use the manager-based metasimulation flow described above.
+
+Metasimulations are run out of the ``firesim/sim`` directory.
 
 ::
 
@@ -229,26 +287,64 @@ Run all RISCV-tools assembly and benchmark tests on a Verilated simulator.
     make -j run-asm-tests
     make -j run-bmark-tests
 
-Run all RISCV-tools assembly and benchmark tests on a Verilated simulator with waveform dumping.
+Run all RISCV-tools assembly and benchmark tests on a Verilated simulator with
+waveform dumping.
 
 ::
 
     make verilator-debug
     make -j run-asm-tests-debug
     make -j run-bmark-tests-debug
 
-Run rv64ui-p-simple (a single assembly test) on a Verilated simulator.
+Run ``rv64ui-p-simple`` (a single assembly test) on a Verilated simulator.
 
 ::
 
     make
     make $(pwd)/output/f1/FireSim-FireSimRocketConfig-BaseF1Config/rv64ui-p-simple.out
 
-Run rv64ui-p-simple (a single assembly test) on a VCS simulator with waveform dumping.
+Run ``rv64ui-p-simple`` (a single assembly test) on a VCS simulator with
+waveform dumping.
 
 ::
 
     make vcs-debug
     make EMUL=vcs $(pwd)/output/f1/FireSim-FireSimRocketConfig-BaseF1Config/rv64ui-p-simple.vpd
 
 
+.. _metasimulation-performance:
+
+Metasimulation vs. Target simulation performance
+---------------------------------------------------------
+
+Generally, metasimulations are only slightly slower than target-level
+simulations. This is illustrated in the chart below.
+
+====== ===== =======  ========= ============= =============
+Type   Waves VCS      Verilator Verilator -O1 Verilator -O2
+====== ===== =======  ========= ============= =============
+Target Off   4.8 kHz  3.9 kHz   6.6 kHz       N/A
+Target On    0.8 kHz  3.0 kHz   5.1 kHz       N/A
+Meta   Off   3.8 kHz  2.4 kHz   4.5 kHz       5.3 KHz
+Meta   On    2.9 kHz  1.5 kHz   2.7 kHz       3.4 KHz
+====== ===== =======  ========= ============= =============
+
+Note that using more aggressive optimization levels when compiling the
+Verilated-design dramatically lengthens compile time:
+
+====== ===== =======  ========= ============= =============
+Type   Waves VCS      Verilator Verilator -O1 Verilator -O2
+====== ===== =======  ========= ============= =============
+Meta   Off   35s      48s       3m32s         4m35s
+Meta   On    35s      49s       5m27s         6m33s
+====== ===== =======  ========= ============= =============
+
+Notes: Default configurations of a single-core, Rocket-based instance running
+``rv64ui-v-add``. Frequencies are given in target-Hz. Presently, the default
+compiler flags passed to Verilator and VCS differ from level to level. Hence,
+these numbers are only intended to give ball park simulation speeds, not provide
+a scientific comparison between simulators. VCS numbers collected on a local
+Berkeley machine, Verilator numbers collected on a ``c4.4xlarge``.
+(metasimulation Verilator version: 4.002, target-level Verilator version:
+3.904)
+