ceph · harriscr · Jul 8, 2025 · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,6 @@
 *.pyc
 *.pyo
+*.venv
+*.code-workspace
+.devcontainer
+pyproject.toml
diff --git a/benchmarkfactory.py b/benchmarkfactory.py
@@ -1,7 +1,5 @@
-import copy
-import itertools
-
 import settings
+from common import all_configs
 from benchmark.radosbench import Radosbench
 from benchmark.fio import Fio
 from benchmark.hsbench import Hsbench
@@ -22,33 +20,6 @@ def get_all(archive, cluster, iteration):
             current.update(default)
             yield get_object(archive, cluster, benchmark, current)
 
-
-def all_configs(config):
-    """
-    return all parameter combinations for config
-    config: dict - list of params
-    iterate over all top-level lists in config
-    """
-    cycle_over_lists = []
-    cycle_over_names = []
-    default = {}
-
-    for param, value in list(config.items()):
-        # acceptable applies to benchmark as a whole, no need to it to
-        # the set for permutation
-        if param == 'acceptable':
-            default[param] = value
-        elif isinstance(value, list):
-            cycle_over_lists.append(value)
-            cycle_over_names.append(param)
-        else:
-            default[param] = value
-
-    for permutation in itertools.product(*cycle_over_lists):
-        current = copy.deepcopy(default)
-        current.update(list(zip(cycle_over_names, permutation)))
-        yield current
-
 def get_object(archive, cluster, benchmark, bconfig):
     benchmarks = {
         'nullbench': Nullbench,

diff --git a/cli_options.py b/cli_options.py
@@ -0,0 +1,56 @@
+"""
+A class to encapsulate a set of configuration options that can be used to
+construct the CLI to use to run a benchmark
+"""
+
+from collections import UserDict
+from logging import Logger, getLogger
+from typing import Optional
+
+log: Logger = getLogger("cbt")
+
+
+class CliOptions(UserDict[str, Optional[str]]):
+    """
+    Thic class encapsulates a set of CLI options that can be passed to a
+    command line invocation. It is based on a python dictionary, but with
+    behaviour modified so that duplicate keys do not update the original.
+    """
+
+    def __setitem__(self, key: str, value: Optional[str]) -> None:
+        """
+        Add an entry to the configuration.
+        Will report an error if key already exists
+        """
+        if key not in self.data.keys():
+            self.data[key] = value
+        else:
+            log.debug("Not adding %s:%s to configuration. A value is already set", key, value)
+
+    def __update__(self, key_value_pair: tuple[str, str]) -> None:
+        """
+        Update an existing entry in the configuration.
+        If the entry exists then don't update it
+        """
+        key, value = key_value_pair
+        if key not in self.data.keys():
+            self.data[key] = value
+        else:
+            log.debug("Not Updating %s:%s in configuration. Value already exists", key, value)
+
+    def __getitem__(self, key: str) -> Optional[str]:
+        """
+        Get the value for key in the configuration.
+        Return None and log a warning if the key does not exist
+        """
+        if key in self.data.keys():
+            return self.data[key]
+        else:
+            log.debug("Key %s does not exist in configuration", key)
+            return None
+
+    def clear(self) -> None:
+        """
+        Clear the configuration
+        """
+        self.data = {}
diff --git a/command/__init__.py b/command/__init__.py
diff --git a/command/command.py b/command/command.py
@@ -0,0 +1,94 @@
+"""
+A class to deal with a command that will run a single instance of
+a benchmark executable
+
+It will return the full executable string that can be used to run a
+cli command using whatever method the Benchmark chooses
+"""
+
+from abc import ABCMeta, abstractmethod
+from logging import Logger, getLogger
+from typing import Optional
+
+from cli_options import CliOptions
+
+log: Logger = getLogger("cbt")
+
+
+class Command(metaclass=ABCMeta):
+    """
+    A class that encapsulates a single CLI command that can be run on a
+    system
+    """
+
+    def __init__(self, options: dict[str, str]) -> None:
+        self._executable: Optional[str] = None
+        self._output_directory: str = ""
+        self._options: CliOptions = self._parse_options(options)
+
+    @abstractmethod
+    def _parse_options(self, options: dict[str, str]) -> CliOptions:
+        """
+        Take the options passed in from the configuration yaml file and
+        convert them to a list of key/value pairs that match the parameters
+        to pass to the benchmark executable
+        """
+
+    @abstractmethod
+    def _generate_full_command(self) -> str:
+        """
+        generate the full cli command that will be sent to the client
+        to run the benchmark
+        """
+
+    @abstractmethod
+    def _parse_global_options(self, options: dict[str, str]) -> CliOptions:
+        """
+        Parse the set of global options into the correct format for the command type
+        """
+
+    @abstractmethod
+    def _generate_output_directory_path(self) -> str:
+        """
+        Generate the part of the output directory that is relevant to this
+        specific command.
+
+        The format is dependent on the specific Command implementation
+        """
+
+    def get(self) -> str:
+        """
+        get the full cli string that can be sent to a system.
+
+        This string contains all the options for a single run of the
+        benchmark executable
+        """
+        if self._executable is None:
+            log.error("Executable has not yet been set for this command.")
+            return ""
+
+        return self._generate_full_command()
+
+    def get_output_directory(self) -> str:
+        """
+        Return the output directory that will be used for this command
+        """
+        return self._generate_output_directory_path()
+
+    def set_executable(self, executable_path: str) -> None:
+        """
+        set the executable to be used for this command
+        """
+        self._executable = executable_path
+
+    def set_global_options(self, global_options: dict[str, str]) -> None:
+        """
+        Update the global options
+        """
+        self._options.update(self._parse_global_options(global_options))
+
+    def update_options(self, new_options: dict[str, str]) -> None:
+        """
+        Update the command with the new_options dictionary
+        """
+        self._options.update(new_options)
diff --git a/command/fio_command.py b/command/fio_command.py
@@ -0,0 +1,184 @@
+"""
+A class to deal with a command that will run a single instance of the
+FIO I/O exerciser
+
+It will return the full executable string that can be used to run a
+cli command using whatever method the calling Benchmark chooses.
+
+It deals with the FIO options that are common to all I/O engine types. For
+options that are specific to a particular I/O engine e.g. rbd a subclass
+should be created that parses these options
+"""
+
+from abc import ABCMeta, abstractmethod
+from logging import Logger, getLogger
+from typing import Optional
+
+from cli_options import CliOptions
+from command.command import Command
+
+log: Logger = getLogger("cbt")
+
+
+class FioCommand(Command, metaclass=ABCMeta):
+    """
+    The FIO command class. This class represents a single FIO command
+    line that can be run on a local or remote client system.
+    """
+
+    _REQUIRED_OPTIONS = {"invalidate": "0", "direct": "1"}
+    _DIRECT_TRANSLATIONS: list[str] = ["numjobs", "iodepth"]
+
+    def __init__(self, options: dict[str, str], workload_output_directory: str) -> None:
+        self._target_number: int = int(options["target_number"])
+        self._total_iodepth: Optional[str] = options.get("total_iodepth", None)
+        self._workload_output_directory: str = workload_output_directory
+        super().__init__(options)
+
+    @abstractmethod
+    def _parse_ioengine_specific_parameters(self, options: dict[str, str]) -> dict[str, str]:
+        """
+        Get any options that are specific to the I/O engine being used
+        for this fio run and add them to the CliOptons for this workload
+        """
+
+    def _parse_global_options(self, options: dict[str, str]) -> CliOptions:
+        global_options: CliOptions = CliOptions(options)
+
+        return global_options
+
+    def _parse_options(self, options: dict[str, str]) -> CliOptions:
+        fio_cli_options: CliOptions = CliOptions()
+
+        fio_cli_options.update(self._parse_ioengine_specific_parameters(options))
+        fio_cli_options.update(self._REQUIRED_OPTIONS)
+        for option in self._DIRECT_TRANSLATIONS:
+            fio_cli_options[option] = options[option] if option in options.keys() else ""
+
+        fio_cli_options["rw"] = options.get("mode", "write")
+        fio_cli_options["output-format"] = options.get("fio_out_format", "json,normal")
+
+        fio_cli_options["numjobs"] = options.get("numjobs", "1")
+        fio_cli_options["bs"] = options.get("op_size", "4194304")
+        fio_cli_options["end_fsync"] = f"{options.get('end_fsync', '0')}"
+
+        if options.get("random_distribution", None) is not None:
+            fio_cli_options["random_distribution"] = options.get("random_distribution", None)
+
+        if options.get("log_avg_msec", None) is not None:
+            fio_cli_options["log_avg_msec"] = options.get("log_avg_msec", None)
+
+        if options.get("time", None) is not None:
+            fio_cli_options["runtime"] = options.get("time", None)
+
+        if options.get("ramp", None) is not None:
+            fio_cli_options["ramp_time"] = options.get("ramp", None)
+
+        if options.get("rate_iops", None) is not None:
+            fio_cli_options["rate_iops"] = options.get("rate_iops", None)
+
+        if bool(options.get("time_based", False)) is True:
+            fio_cli_options["time_based"] = ""
+
+        if bool(options.get("no_sudo", False)) is False:
+            fio_cli_options["sudo"] = ""
+
+        if options.get("norandommap", None) is not None:
+            fio_cli_options["norandommap"] = ""
+
+        if "recovery_test" in options.keys():
+            fio_cli_options["time_based"] = ""
+
+        # Secondary options
+        if fio_cli_options["rw"] == "readwrite" or fio_cli_options["rw"] == "randrw":
+            read_percent: str = options.get("rwmixread", "50")
+            write_percent: str = f"{100 - int(read_percent)}"
+            fio_cli_options["rwmixread"] = read_percent
+            fio_cli_options["rwmixwrite"] = write_percent
+
+        if bool(options.get("log_iops", True)):
+            fio_cli_options["log_iops"] = ""
+
+        if bool(options.get("log_bw", True)):
+            fio_cli_options["log_bw"] = ""
+
+        if bool(options.get("log_lat", True)):
+            fio_cli_options["log_lat"] = ""
+
+        processes_per_volume: int = int(options.get("procs_per_volume", 1))
+
+        fio_cli_options["name"] = self._get_job_name(options["name"], processes_per_volume)
+
+        return fio_cli_options
+
+    def _generate_full_command(self) -> str:
+        command: str = ""
+
+        output_file: str = f"{self._generate_output_directory_path()}/output.{self._target_number:d}"
+        self._setup_logging(output_file)
+
+        if "sudo" in self._options.keys():
+            command += "sudo "
+            del self._options["sudo"]
+
+        command += f"{self._executable} "
+
+        for name, value in self._options.items():
+            if name == "name" and value is not None:
+                for jobname in value.strip().split(" "):
+                    command += f"--{name}={jobname} "
+            elif value != "":
+                command += f"--{name}={value} "
+            else:
+                command += f"--{name} "
+
+        command += f"> {output_file}"
+
+        return command
+
+    def _generate_output_directory_path(self) -> str:
+        """
+        For an FIO command the output format is:
+        numjobs-<numjobs>/total_iodepth-<total_iodepth>/iodepth-<iodepth>
+        if total_iodepth was used in the options, otherwise:
+        numjobs-<numjobs>/iodepth-<iodepth>
+        """
+        output_path: str = f"{self._workload_output_directory}/numjobs-{int(str(self._options['numjobs'])):03d}/"
+
+        if self._total_iodepth is not None:
+            output_path += f"total_iodepth-{self._total_iodepth}/"
+
+        output_path += f"iodepth-{int(str(self._options['iodepth'])):06d}"
+
+        return output_path
+
+    def _get_job_name(self, parent_workload_name: str, processes_per_volume: int) -> str:
+        """
+        Get the name for this job to give to FIO
+        This is of the format:
+
+        cbt-<workload_name>-<hostname>-<process_number>
+        """
+
+        job_name: str = ""
+
+        for process_number in range(processes_per_volume):
+            job_name += f"cbt-fio-{parent_workload_name}-`hostname`-file-{process_number} "
+
+        return job_name
+
+    def _setup_logging(self, output_file_name: str) -> None:
+        """
+        Set up the additional FIO log paths if required
+        """
+        if "log_iops" in self._options.keys():
+            self._options.pop("log_iops")
+            self._options["write_iops_log"] = output_file_name
+
+        if "log_bw" in self._options.keys():
+            self._options.pop("log_bw")
+            self._options["write_bw_log"] = output_file_name
+
+        if "log_lat" in self._options.keys():
+            self._options.pop("log_lat")
+            self._options["write_lat_log"] = output_file_name