Skip to content

Commit

Permalink
[GR-50424] Add custom rss percentile tracker, based on the ps command.
Browse files Browse the repository at this point in the history
PullRequest: mx/1725
  • Loading branch information
Andrija Kolic committed Jan 3, 2024
2 parents c250e12 + 4cea84e commit ee6fc8f
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 8 deletions.
14 changes: 7 additions & 7 deletions common.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@
"labsjdk-ee-21Debug": {"name": "labsjdk", "version": "ee-21.0.1+11-jvmci-23.1-b26-debug", "platformspecific": true },
"labsjdk-ee-21-llvm": {"name": "labsjdk", "version": "ee-21.0.1+11-jvmci-23.1-b26-sulong", "platformspecific": true },

"oraclejdk-latest": {"name": "jpg-jdk", "version": "23", "build_id": "1", "release": true, "platformspecific": true, "extrabundles": ["static-libs"]},
"labsjdk-ce-latest": {"name": "labsjdk", "version": "ce-23+1-jvmci-b01", "platformspecific": true },
"labsjdk-ce-latestDebug": {"name": "labsjdk", "version": "ce-23+1-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ce-latest-llvm": {"name": "labsjdk", "version": "ce-23+1-jvmci-b01-sulong", "platformspecific": true },
"labsjdk-ee-latest": {"name": "labsjdk", "version": "ee-23+1-jvmci-b01", "platformspecific": true },
"labsjdk-ee-latestDebug": {"name": "labsjdk", "version": "ee-23+1-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ee-latest-llvm": {"name": "labsjdk", "version": "ee-23+1-jvmci-b01-sulong", "platformspecific": true }
"oraclejdk-latest": {"name": "jpg-jdk", "version": "23", "build_id": "2", "release": true, "platformspecific": true, "extrabundles": ["static-libs"]},
"labsjdk-ce-latest": {"name": "labsjdk", "version": "ce-23+2-jvmci-b01", "platformspecific": true },
"labsjdk-ce-latestDebug": {"name": "labsjdk", "version": "ce-23+2-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ce-latest-llvm": {"name": "labsjdk", "version": "ce-23+2-jvmci-b01-sulong", "platformspecific": true },
"labsjdk-ee-latest": {"name": "labsjdk", "version": "ee-23+2-jvmci-b01", "platformspecific": true },
"labsjdk-ee-latestDebug": {"name": "labsjdk", "version": "ee-23+2-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ee-latest-llvm": {"name": "labsjdk", "version": "ee-23+2-jvmci-b01-sulong", "platformspecific": true }
},

"eclipse": {
Expand Down
2 changes: 1 addition & 1 deletion src/mx/_impl/mx.py
Original file line number Diff line number Diff line change
Expand Up @@ -19247,7 +19247,7 @@ def alarm_handler(signum, frame):
abort(1, killsig=signal.SIGINT)

# The version must be updated for every PR (checked in CI) and the comment should reflect the PR's issue
version = VersionSpec("7.5.1") # GR-51052 Reenable parallel compilation on darwin-amd64
version = VersionSpec("7.5.2") # GR-50424 Add custom rss percentile tracker, based on the ps command.

_mx_start_datetime = datetime.utcnow()
_last_timestamp = _mx_start_datetime
Expand Down
121 changes: 121 additions & 0 deletions src/mx/_impl/mx_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2811,11 +2811,132 @@ def map_command(self, cmd):
def get_rules(self, bmSuiteArgs):
return self.rss.get_rules(bmSuiteArgs) + self.psrecord.get_rules(bmSuiteArgs)

# Calculates percentile rss metrics from the rss samples gathered by ps_poller.
class RssPercentilesTracker(Tracker):
# rss metric will be calculated for these percentiles
interesting_percentiles = [100, 99, 98, 97, 96, 95, 90, 75, 50, 25]
# the time period between two polls, in seconds
poll_interval = 0.1

def __init__(self, bmSuite, skip=0):
super().__init__(bmSuite)
self.most_recent_text_output = None
self.skip = skip # the number of RSS entries to skip from each poll (used to skip entries of other trackers)

def map_command(self, cmd):
if not _use_tracker:
return cmd

if mx.get_os() != "linux" and mx.get_os() != "darwin":
mx.warn(f"Ignoring the '{self.__class__.__name__}' tracker since it is not supported on {mx.get_os()}")
return cmd

import datetime
bench_name = self.bmSuite.currently_running_benchmark() if self.bmSuite else "benchmark"
if self.bmSuite:
bench_name = f"{self.bmSuite.name()}-{bench_name}"
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
text_output = os.path.join(os.getcwd(), f"ps_{bench_name}_{ts}.txt")

self.most_recent_text_output = text_output
ps_poller_script_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ps_poller.py")
return ["python3", ps_poller_script_path, "-f", text_output, "-i", str(RssPercentilesTracker.poll_interval)] + cmd

def get_rules(self, bmSuiteArgs):
return [RssPercentilesTracker.RssPercentilesRule(self, bmSuiteArgs)]

class RssPercentilesRule(CSVBaseRule):
def __init__(self, tracker, bmSuiteArgs, **kwargs):
replacement = {
"benchmark": tracker.bmSuite.currently_running_benchmark(),
"bench-suite": tracker.bmSuite.benchSuiteName(bmSuiteArgs) if mx_benchmark_compatibility().bench_suite_needs_suite_args() else tracker.bmSuite.benchSuiteName(),
"config.vm-flags": ' '.join(tracker.bmSuite.vmArgs(bmSuiteArgs)),
"metric.name": "rss",
"metric.value": ("<metric_value>", int),
"metric.unit": "MB",
"metric.type": "numeric",
"metric.score-function": "id",
"metric.better": "lower",
"metric.percentile": ("<metric_percentile>", int),
"metric.iteration": 0
}
super().__init__(["rss_kb"], replacement, delimiter=' ', skipinitialspace=True, **kwargs)
self.tracker = tracker

def getCSVFiles(self, text):
file = self.tracker.most_recent_text_output
return [file] if file else []

def parseResults(self, text):
rows = super().parseResults(text)

temp_text_output = self.tracker.most_recent_text_output
if temp_text_output is not None:
os.remove(temp_text_output)
mx.log(f"Temporary output file {temp_text_output} deleted.")

values = []
acc = 0
skips_left = self.tracker.skip
# At every 'RSS' row append the previously accumulated value
# After the 'RSS' row, skip self.tracker.skip numerical rows (to ignore other trackers)
# After self.tracker.skip skips, accumulate all the numerical rows until the next 'RSS'
for r in rows:
if r["rss_kb"].isnumeric():
if skips_left == 0:
acc += float(r["rss_kb"])
else:
skips_left -= 1
else:
if r["rss_kb"] == "FAILED":
mx.warn(f"Tracker {self.tracker.__class__.__name__} failed at polling the benchmark process for RSS! No 'rss' metric will be emitted.")
return []
if acc > 0:
values.append(acc)
acc = 0
skips_left = self.tracker.skip
if acc > 0:
values.append(acc)

if len(values) == 0:
mx.log("\tDidn't get any RSS samples.")
return []

sorted_values = sorted(values)

def pc(k): # k-percentile with linear interpolation between closest ranks
x = (len(sorted_values) - 1) * k / 100
fr = int(x)
cl = int(x + 0.5)
v = sorted_values[fr] if fr == cl else sorted_values[fr] * (cl - x) + sorted_values[cl] * (x - fr)
v = v / 1024 # convert to MB
return {"metric_percentile": str(k), "metric_value": str(int(v))}

percentiles = [pc(perc) for perc in RssPercentilesTracker.interesting_percentiles]
for rss_percentile in percentiles:
mx.log(f"\t{rss_percentile['metric_percentile']}th RSS percentile (MB): {rss_percentile['metric_value']}")
return percentiles


class RssPercentilesAndMaxTracker(Tracker):
def __init__(self, bmSuite):
super().__init__(bmSuite)
self.rss_max_tracker = RssTracker(bmSuite)
self.rss_percentiles_tracker = RssPercentilesTracker(bmSuite, skip=1) # skip RSS of the 'time' command

def map_command(self, cmd):
return self.rss_percentiles_tracker.map_command(self.rss_max_tracker.map_command(cmd))

def get_rules(self, bmSuiteArgs):
return self.rss_max_tracker.get_rules(bmSuiteArgs) + self.rss_percentiles_tracker.get_rules(bmSuiteArgs)


_available_trackers = {
"rss": RssTracker,
"psrecord": PsrecordTracker,
"psrecord+maxrss": PsrecordMaxrssTracker,
"rsspercentiles": RssPercentilesTracker,
"rsspercentiles+maxrss": RssPercentilesAndMaxTracker,
}


Expand Down
108 changes: 108 additions & 0 deletions src/mx/_impl/ps_poller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# ----------------------------------------------------------------------------------------------------
#
# Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
# ----------------------------------------------------------------------------------------------------

import argparse
import datetime
import os
import subprocess
import sys
import time


def _parse_args(args):
parser = argparse.ArgumentParser(
prog="ps_poller",
description="Run target_cmd and periodically poll it for RSS using ps",
usage="ps_poller [OPTIONS] <target_cmd>",
epilog="The target_cmd process is ran in a new session, and the RSS data of every process in that session is summed up for each poll",
)

parser.add_argument("-f", "--output-file", help="File to which to write the polled RSS data (in KB)")
parser.add_argument(
"-i", "--poll-interval", type=float, help="Interval between subsequent polling, in seconds", default=0.1
)
parser.add_argument("target_cmd", nargs=argparse.REMAINDER, help="Command to run and poll for RSS data")

args = parser.parse_args()

output_file = args.output_file
if output_file is None:
ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
output_file = os.path.join(os.getcwd(), f"ps_poller_rss_samples_{ts}.txt")

return output_file, args.poll_interval, args.target_cmd


def _start_target_process(target_cmd):
print(f"Starting and attaching to command: \"{' '.join(target_cmd)}\"")
return subprocess.Popen(target_cmd, start_new_session=True)


def _poll_session(sid, out_file):
# Get RSS for every process in session
args = ["ps", "-g", str(sid), "-o", "rss"]
try:
ps_proc = subprocess.Popen(args, stdout=out_file)
ps_return_code = ps_proc.wait()
if ps_return_code != 0:
print(f"Command {ps_proc.args} failed with return code {return_code}!")
return ps_return_code
except:
print(f"An exception occurred when trying to start subprocess with {args}")
return 1


def main(args):
output_file, poll_interval, target_cmd = _parse_args(args)

with open(output_file, "w") as f:
target_proc = _start_target_process(target_cmd)
target_pid = target_proc.pid

start_time = time.time()
target_status = target_proc.poll()
poll_return_code = 0
while target_status is None and poll_return_code == 0: # target process not terminated
time.sleep(poll_interval)
poll_return_code = _poll_session(target_pid, f)
target_status = target_proc.poll()
end_time = time.time()

if poll_return_code != 0:
f.write("FAILED") # Communicate to tracker that the RSS polling was unsuccessful
print(
"Polling for RSS failed! Any samples gathered until this moment will be ignored. Waiting for the target process without RSS polling..."
)
target_status = target_proc.wait()
else:
print(f"Rss samples saved in file: {output_file}")
print(f"Elapsed time: {end_time - start_time:.2f}s")

print(f"Target process return code: {target_status}")
return target_status # Propagate target process exit code


if __name__ == "__main__":
sys.exit(main(sys.argv))

0 comments on commit ee6fc8f

Please sign in to comment.