From a9665f0dbc3fdd53902e3414ebd323472845e112 Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Fri, 5 Apr 2024 00:07:47 -0700 Subject: [PATCH] stdlib: Massive refactor of stats Change-Id: I9c52130ec76eaad551d59b235471dd940d025156 WIP: Come up with better description for this. --- src/python/m5/ext/pystats/abstract_stat.py | 3 + src/python/m5/ext/pystats/group.py | 79 +++++- .../m5/ext/pystats/serializable_stat.py | 2 + src/python/m5/ext/pystats/statistic.py | 2 +- src/python/m5/stats/gem5stats.py | 164 +++++++---- tests/gem5/stats/configs/array_stats.py | 260 ++++++++++++++++++ tests/gem5/stats/test_array_stats.py | 44 +++ 7 files changed, 490 insertions(+), 64 deletions(-) create mode 100644 tests/gem5/stats/configs/array_stats.py create mode 100644 tests/gem5/stats/test_array_stats.py diff --git a/src/python/m5/ext/pystats/abstract_stat.py b/src/python/m5/ext/pystats/abstract_stat.py index bae327fcf92..c94346dbcaf 100644 --- a/src/python/m5/ext/pystats/abstract_stat.py +++ b/src/python/m5/ext/pystats/abstract_stat.py @@ -99,3 +99,6 @@ def find(self, regex: Union[str, Pattern]) -> List["AbstractStat"]: return self.children( lambda _name: re.match(pattern, _name), recursive=True ) + + def __getitem__(self, index: Union[int, str, float]) -> "AbstractStat": + return self.__dict__[str(index)] diff --git a/src/python/m5/ext/pystats/group.py b/src/python/m5/ext/pystats/group.py index 5b2e760b32c..6e5118b57ed 100644 --- a/src/python/m5/ext/pystats/group.py +++ b/src/python/m5/ext/pystats/group.py @@ -25,6 +25,8 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from typing import ( + Any, + Callable, Dict, List, Optional, @@ -42,23 +44,78 @@ class Group(AbstractStat): map of labeled Groups, Statistics, Lists of Groups, or List of Statistics. """ - type: Optional[str] time_conversion: Optional[TimeConversion] def __init__( self, - type: Optional[str] = None, + type: str = "Group", time_conversion: Optional[TimeConversion] = None, - **kwargs: Dict[ - str, Union["Group", Statistic, List["Group"], List["Statistic"]] - ], + **kwargs: Dict, ): - if type is None: - self.type = "Group" - else: - self.type = type - - self.time_conversion = time_conversion + self.type = type + if time_conversion: + self.time_conversion = time_conversion for key, value in kwargs.items(): setattr(self, key, value) + + def __getitem__(self, index: Union[int, str, float]) -> AbstractStat: + if not hasattr(self, str(index)): + raise KeyError(f"Index {index} not found in Group") + return getattr(self, str(index)) + + +class SimObjectGroup(Group): + """ + TODO: Add description + """ + + def __init__(self, **kwargs: Dict[str, Union[Group, Statistic]]): + super().__init__(type="SimObject", **kwargs) + + +class SimObjectVectorGroup(Group): + """ + TODO: Add description + """ + + def __init__(self, value: List[AbstractStat], **kwargs: Dict[str, Any]): + assert isinstance(value, list), "Value must be a list" + super().__init__(type="SimObjectVector", value=value, **kwargs) + + def __getitem__(self, index: Union[int, str, float]) -> AbstractStat: + if not isinstance(index, int): + raise KeyError( + f"Index {index} not found in int. Cannot index Array with non-int" + ) + return self.value[index] + + def __iter__(self): + return iter(self.value) + + def __len__(self): + return len(self.value) + + def get_all_stats_of_name(self, name: str) -> List[AbstractStat]: + """ + Get all the stats in the vector of that name. Useful for performing + operations on all the stats of the same name in a vector. + """ + to_return = [] + for stat in self.value: + if hasattr(stat, name): + to_return.append(getattr(stat, name)) + + # If the name is in the format "sim.bla.whatever", we are looking for + # the "bla.whatever" stats in the "sim" group. + name_split = name.split(".") + if len(name_split) == 1: + return to_return + + if name_split[0] not in self: + return to_return + + to_return.extend( + self[name_split[0]].get_all_stats_of_name(".".join(name_split[1:])) + ) + return to_return diff --git a/src/python/m5/ext/pystats/serializable_stat.py b/src/python/m5/ext/pystats/serializable_stat.py index 4520f1ab08d..8f41a97f3b9 100644 --- a/src/python/m5/ext/pystats/serializable_stat.py +++ b/src/python/m5/ext/pystats/serializable_stat.py @@ -93,6 +93,8 @@ def __process_json_value( return d elif isinstance(value, StorageType): return str(value.name) + elif isinstance(value, List): + return [self.__process_json_value(v) for v in value] return None diff --git a/src/python/m5/ext/pystats/statistic.py b/src/python/m5/ext/pystats/statistic.py index d2318193f37..c4f720a2964 100644 --- a/src/python/m5/ext/pystats/statistic.py +++ b/src/python/m5/ext/pystats/statistic.py @@ -95,7 +95,7 @@ class Vector(Statistic): def __init__( self, - value: Dict[Union[str, int, float], Scalar], + value: Dict[str, Scalar], type: Optional[str] = None, description: Optional[str] = None, ): diff --git a/src/python/m5/stats/gem5stats.py b/src/python/m5/stats/gem5stats.py index e0381fef0ef..328991d639a 100644 --- a/src/python/m5/stats/gem5stats.py +++ b/src/python/m5/stats/gem5stats.py @@ -29,6 +29,8 @@ the Python Stats model. """ +import calendar +import re from datetime import datetime from typing import ( IO, @@ -41,6 +43,7 @@ from m5.ext.pystats.statistic import * from m5.ext.pystats.storagetype import * from m5.objects import * +from m5.params import SimObjectVector import _m5.stats @@ -83,33 +86,6 @@ def dump(self, roots: Union[List[SimObject], Root]) -> None: simstat.dump(fp=fp, **self.json_args) -def get_stats_group(group: _m5.stats.Group) -> Group: - """ - Translates a gem5 Group object into a Python stats Group object. A Python - statistic Group object is a dictionary of labeled Statistic objects. Any - gem5 object passed to this will have its ``getStats()`` and ``getStatGroups`` - function called, and all the stats translated (inclusive of the stats - further down the hierarchy). - - :param group: The gem5 _m5.stats.Group object to be translated to be a Python - stats Group object. Typically this will be a gem5 SimObject. - - :returns: The stats group object translated from the input gem5 object. - """ - - stats_dict = {} - - for stat in group.getStats(): - statistic = __get_statistic(stat) - if statistic is not None: - stats_dict[stat.name] = statistic - - for key in group.getStatGroups(): - stats_dict[key] = get_stats_group(group.getStatGroups()[key]) - - return Group(**stats_dict) - - def __get_statistic(statistic: _m5.stats.Info) -> Optional[Statistic]: """ Translates a _m5.stats.Info object into a Statistic object, to process @@ -300,8 +276,88 @@ def _prepare_stats(group: _m5.stats.Group): _prepare_stats(child) +def _process_simobject_object(simobject: SimObject) -> SimObjectGroup: + """ + Processes the stats of a SimObject, and returns a dictionary of the stats + for the SimObject with PyStats objects when appropriate. + + :param simobject: The SimObject to process the stats for. + + :returns: A dictionary of the PyStats stats for the SimObject. + """ + + assert isinstance( + simobject, SimObject + ), "simobject param must be a SimObject." + + stats = ( + { + "name:": simobject.get_name(), + } + if simobject.get_name() + else {} + ) + + for stat in simobject.getStats(): + val = __get_statistic(stat) + if val: + stats[stat.name] = val + + for name, child in simobject._children.items(): + to_add = _process_simobject_stats(child) + if to_add: + stats[name] = to_add + + for name, child in sorted(simobject.getStatGroups().items()): + # Note: We are using the name of the group to determine if we have + # already processed the group as a child simobject or a statistic. + # This is to avoid SimObjectVector's being processed twice. It is far + # from an ideal solution, but it works for now. + if not any( + re.compile(f"{to_match}" + r"\d*").search(name) + for to_match in stats.keys() + ): + stats[name] = Group(**_process_simobject_stats(child)) + + return SimObjectGroup(**stats) + + +def _process_simobject_stats( + simobject: Union[ + SimObject, SimObjectVector, List[Union[SimObject, SimObjectVector]] + ] +) -> Union[List[Dict], Dict]: + """ + Processes the stats of a SimObject, SimObjectVector, or List of either, and + returns a dictionary of the PySqtats for the SimObject. + + :param simobject: The SimObject to process the stats for. + + :returns: A dictionary of the stats for the SimObject. + """ + + if isinstance(simobject, SimObject): + return _process_simobject_object(simobject) + + if isinstance(simobject, Union[List, SimObjectVector]): + stats_list = [] + for obj in simobject: + stats_list.append(_process_simobject_stats(obj)) + return SimObjectVectorGroup(value=stats_list) + + raise TypeError( + "Object (" + str(simobject) + ") passed is not a " + "SimObject. " + __name__ + " only processes " + "SimObjects, SimObjectVector, or a list of SimObjects." + ) + + def get_simstat( - root: Union[SimObject, List[SimObject]], prepare_stats: bool = True + root: Union[ + Union[SimObject, SimObjectVector], + List[Union[SimObject, SimObjectVector]], + ], + prepare_stats: bool = True, ) -> SimStat: """ This function will return the SimStat object for a simulation given a @@ -321,40 +377,44 @@ def get_simstat( :Returns: The SimStat Object of the current simulation. """ - stats_map = {} - creation_time = datetime.now() - time_converstion = None # TODO https://gem5.atlassian.net/browse/GEM5-846 + + creation_time = Scalar( + value=calendar.timegm(datetime.now().timetuple()), + description="Unix Timestamp of SimStats Creation", + unit="seconds", + datatype=StorageType["f64"], + ) final_tick = Root.getInstance().resolveStat("finalTick").value sim_ticks = Root.getInstance().resolveStat("simTicks").value - simulated_begin_time = int(final_tick - sim_ticks) - simulated_end_time = int(final_tick) + simulated_begin_time = Scalar( + value=int(final_tick - sim_ticks), + description="Tick these stats began recording", + unit="Tick", + datatype=StorageType["f64"], + ) + simulated_end_time = Scalar( + value=int(final_tick), + unit="Tick", + description="Tick these stats stopped recording", + datatype=StorageType["f64"], + ) if prepare_stats: _m5.stats.processDumpQueue() - for r in root: - if isinstance(r, Root): - # The Root is a special case, we jump directly into adding its - # constituent Groups. - if prepare_stats: - _prepare_stats(r) - for key in r.getStatGroups(): - stats_map[key] = get_stats_group(r.getStatGroups()[key]) - elif isinstance(r, SimObject): - if prepare_stats: - _prepare_stats(r) - stats_map[r.get_name()] = get_stats_group(r) + if prepare_stats: + if isinstance(root, list): + for obj in root: + _prepare_stats(obj) else: - raise TypeError( - "Object (" + str(r) + ") passed is not a " - "SimObject. " + __name__ + " only processes " - "SimObjects, or a list of SimObjects." - ) + _prepare_stats(root) + + stats = _process_simobject_stats(root).__dict__ + stats["name"] = root.get_name() if root.get_name() else "root" return SimStat( creation_time=creation_time, - time_conversion=time_converstion, simulated_begin_time=simulated_begin_time, simulated_end_time=simulated_end_time, - **stats_map, + **stats, ) diff --git a/tests/gem5/stats/configs/array_stats.py b/tests/gem5/stats/configs/array_stats.py new file mode 100644 index 00000000000..bff2c580754 --- /dev/null +++ b/tests/gem5/stats/configs/array_stats.py @@ -0,0 +1,260 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import m5 +from m5.objects import ( + Root, + ScalarStatTester, + VectorStatTester, +) +from m5.stats.gem5stats import get_simstat + +root = Root(full_system=False) +root.stat_testers = [ + ScalarStatTester(name="placeholder", value=11), + ScalarStatTester( + name="placeholder", value=22, description="Index 2 desc." + ), + ScalarStatTester(name="placeholder", value=33), + VectorStatTester( + name="index_4", + values=[44, 55, 66], + description="A vector within a vector.", + ), +] +root.other_stat = ScalarStatTester(value=44) +root.stat_testers[0].sub_scaler = ScalarStatTester(value=101) + +m5.instantiate() +m5.simulate() + +# This is the JSON output of the stats. +# Obtained with `print(get_simstat(root).dumps(indent=2))`` +""" +{ + "creation_time": { + "value": 1712320158, + "type": "Scalar", + "description": "Unix Timestamp of SimStats Creation", + "unit": "seconds", + "datatype": "f64" + }, + "time_conversion": null, + "simulated_begin_time": { + "value": 0, + "type": "Scalar", + "description": "Tick these stats began recording", + "unit": "Tick", + "datatype": "f64" + }, + "simulated_end_time": { + "value": 18446744073709551616, + "type": "Scalar", + "description": "Tick these stats stopped recording", + "unit": "Tick", + "datatype": "f64" + }, + "type": "SimObject", + "simTicks": { + "value": 1.8446744073709552e+19, + "type": "Scalar", + "description": "Number of ticks simulated", + "unit": "Tick", + "datatype": "f64" + }, + "finalTick": { + "value": 1.8446744073709552e+19, + "type": "Scalar", + "description": "Number of ticks from beginning of simulation (restored from checkpoints and never reset)", + "unit": "Tick", + "datatype": "f64" + }, + "simFreq": { + "value": 1000000000000.0, + "type": "Scalar", + "description": "The number of ticks per simulated second", + "unit": "(Tick/Second)", + "datatype": "f64" + }, + "hostSeconds": { + "value": 0.00045200000000000004, + "type": "Scalar", + "description": "Real time elapsed on the host", + "unit": "Second", + "datatype": "f64" + }, + "hostMemory": { + "value": 404481088.0, + "type": "Scalar", + "description": "Number of bytes of host memory used", + "unit": "Byte", + "datatype": "f64" + }, + "stat_testers": { + "type": "SimObjectVector", + "value": [ + { + "type": "SimObject", + "name:": "stat_testers0", + "placeholder": { + "value": 11.0, + "type": "Scalar", + "description": "", + "unit": "Count", + "datatype": "f64" + }, + "sub_scaler": { + "type": "SimObject", + "name:": "sub_scaler", + "stat": { + "value": 101.0, + "type": "Scalar", + "description": "", + "unit": "Count", + "datatype": "f64" + } + } + }, + { + "type": "SimObject", + "name:": "stat_testers1", + "placeholder": { + "value": 22.0, + "type": "Scalar", + "description": "Index 2 desc.", + "unit": "Count", + "datatype": "f64" + } + }, + { + "type": "SimObject", + "name:": "stat_testers2", + "placeholder": { + "value": 33.0, + "type": "Scalar", + "description": "", + "unit": "Count", + "datatype": "f64" + } + }, + { + "type": "SimObject", + "name:": "stat_testers3", + "index_4": { + "value": { + "0": { + "value": 44.0, + "type": "Scalar", + "description": "A vector within a vector.", + "unit": "Count", + "datatype": "f64" + }, + "1": { + "value": 55.0, + "type": "Scalar", + "description": "A vector within a vector.", + "unit": "Count", + "datatype": "f64" + }, + "2": { + "value": 66.0, + "type": "Scalar", + "description": "A vector within a vector.", + "unit": "Count", + "datatype": "f64" + } + }, + "type": "Vector", + "description": "A vector within a vector." + } + } + ] + }, + "other_stat": { + "type": "SimObject", + "name:": "other_stat", + "stat": { + "value": 44.0, + "type": "Scalar", + "description": "", + "unit": "Count", + "datatype": "f64" + } + }, + "name": "root" +} +""" +simstat = get_simstat(root) + + +# 'stat_testers' is a list of SimObjects +assert hasattr(simstat, "stat_testers"), "No stat_testers attribute found." +assert len(simstat.stat_testers) == 4, "stat_testers list is not of length 3." + +# Accessable by index. +simobject = simstat.stat_testers[0] + +# We can directly access the statistic we're interested in and it's "str" +# representation should be the same as the value we set. In this case "11.0". +# TODO: A way to just get the int out instead of weridly converting to a +# string? +assert ( + str(simobject.placeholder) == "11.0" +), "placeholder value is not 11.0 ()." + +# They can also be accessed like so +# TODO: "Where is "stat" coming from? +assert ( + str(simstat["other_stat"]["stat"]) == "44.0" +), "other_stat value is not 44." + +# We can also access other state date like type and description. +assert simstat.stat_testers[1].placeholder.description == "Index 2 desc." +assert simstat.stat_testers[1].placeholder.type == "Scalar" + +# We iterate through the stats and sum things. +total = 0 +for simobject in simstat.stat_testers: + if hasattr(simobject, "placeholder"): + total += simobject.placeholder.value +assert total == 11 + 22 + 33, "Sum of values is not 66." + +# We can also get all the stats in a vector which conform to a certain name. +total = sum( + stats.value + for stats in simstat.stat_testers.get_all_stats_of_name("placeholder") +) +assert total == 11 + 22 + 33, "Sum of values is not 66." + +# Vector stats can also be accessed in this way. +assert ( + simstat.stat_testers[3].index_4[0].value == 44 +), "index_4[0] value is not 44." + +# Sub stats can be accessed, they are just children of SimObjects. +simstat.stat_testers[ + 0 +].sub_scaler.stat.value == 101, "sub_scaler value is not 101." diff --git a/tests/gem5/stats/test_array_stats.py b/tests/gem5/stats/test_array_stats.py new file mode 100644 index 00000000000..a8ba3abc8a6 --- /dev/null +++ b/tests/gem5/stats/test_array_stats.py @@ -0,0 +1,44 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from testlib import * + +gem5_verify_config( + name="pystat-array-test", + fixtures=(), + verifiers=[], + config=joinpath( + config.base_dir, + "tests", + "gem5", + "stats", + "configs", + "array_stats.py", + ), + config_args=[], + valid_isas=(constants.all_compiled_tag,), + length=constants.quick_tag, +)