Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partition SuperPMI replay task #66065

Merged
merged 2 commits into from
Mar 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 20 additions & 9 deletions src/coreclr/scripts/superpmi-replay.proj
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,34 @@
</ItemGroup>

<ItemGroup Condition="'$(Architecture)' == 'x64'">
<SPMI_Partition Include="win-x64" Platform="windows" Architecture="x64" />
<SPMI_Partition Include="win-arm64" Platform="windows" Architecture="arm64" />
<SPMI_Partition Include="unix-x64" Platform="Linux" Architecture="x64" />
<SPMI_Partition Include="linux-arm64" Platform="Linux" Architecture="arm64" />
<SPMI_Partition Include="osx-arm64" Platform="OSX" Architecture="arm64" />
<!-- Use 2 partitions for each run on an x64 machine -->
<SPMI_Partition Include="win-x64-1" Platform="windows" Architecture="x64" Partition="1" PartitionCount="2"/>
<SPMI_Partition Include="win-x64-2" Platform="windows" Architecture="x64" Partition="2" PartitionCount="2"/>
<SPMI_Partition Include="win-arm64-1" Platform="windows" Architecture="arm64" Partition="1" PartitionCount="2"/>
<SPMI_Partition Include="win-arm64-2" Platform="windows" Architecture="arm64" Partition="2" PartitionCount="2"/>
<SPMI_Partition Include="unix-x64-1" Platform="Linux" Architecture="x64" Partition="1" PartitionCount="2"/>
<SPMI_Partition Include="unix-x64-2" Platform="Linux" Architecture="x64" Partition="2" PartitionCount="2"/>
<SPMI_Partition Include="linux-arm64-1" Platform="Linux" Architecture="arm64" Partition="1" PartitionCount="2"/>
<SPMI_Partition Include="linux-arm64-2" Platform="Linux" Architecture="arm64" Partition="2" PartitionCount="2"/>
<SPMI_Partition Include="osx-arm64-1" Platform="OSX" Architecture="arm64" Partition="1" PartitionCount="2"/>
<SPMI_Partition Include="osx-arm64-2" Platform="OSX" Architecture="arm64" Partition="2" PartitionCount="2"/>
</ItemGroup>

<ItemGroup Condition="'$(Architecture)' == 'x86'">
<SPMI_Partition Include="win-x86" Platform="windows" Architecture="x86" />
<SPMI_Partition Include="unix-arm" Platform="Linux" Architecture="arm" />
<!-- The x86 machine replays are slower than x64, so use 3 partitions for each run on x86 -->
<SPMI_Partition Include="win-x86-1" Platform="windows" Architecture="x86" Partition="1" PartitionCount="3"/>
<SPMI_Partition Include="win-x86-2" Platform="windows" Architecture="x86" Partition="2" PartitionCount="3"/>
<SPMI_Partition Include="win-x86-3" Platform="windows" Architecture="x86" Partition="3" PartitionCount="3"/>
<SPMI_Partition Include="unix-arm-1" Platform="Linux" Architecture="arm" Partition="1" PartitionCount="3"/>
<SPMI_Partition Include="unix-arm-2" Platform="Linux" Architecture="arm" Partition="2" PartitionCount="3"/>
<SPMI_Partition Include="unix-arm-3" Platform="Linux" Architecture="arm" Partition="3" PartitionCount="3"/>
</ItemGroup>

<ItemGroup>
<HelixWorkItem Include="@(SPMI_Partition)">
<Command>$(WorkItemCommand) -arch %(HelixWorkItem.Architecture) -platform %(HelixWorkItem.Platform) -log_directory $(SuperpmiLogsLocation)</Command>
<Command>$(WorkItemCommand) -arch %(HelixWorkItem.Architecture) -platform %(HelixWorkItem.Platform) -partition %(HelixWorkItem.Partition) -partition_count %(HelixWorkItem.PartitionCount) -log_directory $(SuperpmiLogsLocation)</Command>
<Timeout>$(WorkItemTimeout)</Timeout>
<DownloadFilesFromResults>superpmi_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture).log</DownloadFilesFromResults>
<DownloadFilesFromResults>superpmi_%(HelixWorkItem.Platform)_%(HelixWorkItem.Architecture)_%(HelixWorkItem.Partition).log</DownloadFilesFromResults>
</HelixWorkItem>
</ItemGroup>
</Project>
67 changes: 61 additions & 6 deletions src/coreclr/scripts/superpmi_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
parser.add_argument("-platform", help="OS platform")
parser.add_argument("-jit_directory", help="path to the directory containing clrjit binaries")
parser.add_argument("-log_directory", help="path to the directory containing superpmi log files")
parser.add_argument("-partition", help="Partition number specifying which set of flags to use: between 1 and the `-partition_count` value")
parser.add_argument("-partition_count", help="Count of the total number of partitions we are using: should be <= 9 (number of jit_flags_all elements)")

jit_flags = [
jit_flags_all = [
"JitStressRegs=0",
# JitStressRegs=1 disabled due to https://github.com/dotnet/runtime/issues/65332
# "JitStressRegs=1",
Expand All @@ -38,6 +40,20 @@
"JitStressRegs=0x1000",
]

def split(a, n):
""" Splits array `a` in `n` partitions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding the Credit for SO post here.

Slightly modified from https://stackoverflow.com/a/2135920.

Args:
args (ArgParse): args parsed by arg parser

Returns:
args (CoreclrArguments)

"""
k, m = divmod(len(a), n)
return [a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)]


def setup_args(args):
""" Setup the args for SuperPMI to use.
Expand Down Expand Up @@ -69,9 +85,39 @@ def setup_args(args):

coreclr_args.verify(args,
"log_directory",
lambda log_directory: True,
lambda log_directory: os.path.isdir(log_directory),
"log_directory doesn't exist")

coreclr_args.verify(args,
"partition",
lambda partition: True,
"Unable to set partition")

coreclr_args.verify(args,
"partition_count",
lambda partition: True,
"Unable to set partition_count")

try:
coreclr_args.partition = int(coreclr_args.partition)
except ValueError as e:
print("Illegal `-partition` value: " + str(coreclr_args.partition))
sys.exit(1)

try:
coreclr_args.partition_count = int(coreclr_args.partition_count)
except ValueError as e:
print("Illegal `-partition_count` value: " + str(coreclr_args.partition_count))
sys.exit(1)

if coreclr_args.partition_count <= 0:
print("Illegal `-partition_count` value: " + str(coreclr_args.partition_count))
sys.exit(1)

if coreclr_args.partition < 1 or coreclr_args.partition > coreclr_args.partition_count:
print("Illegal `-partition` value: " + str(coreclr_args.partition))
sys.exit(1)

return coreclr_args


Expand All @@ -81,7 +127,6 @@ def main(main_args):
Args:
main_args ([type]): Arguments to the script
"""

python_path = sys.executable
cwd = os.path.dirname(os.path.realpath(__file__))
coreclr_args = setup_args(main_args)
Expand All @@ -94,9 +139,19 @@ def main(main_args):
os_name = "universal" if arch_name.startswith("arm") else os_name
jit_path = os.path.join(coreclr_args.jit_directory, 'clrjit_{}_{}_{}.dll'.format(os_name, arch_name, host_arch_name))

jit_flags_partitioned = split(jit_flags_all, coreclr_args.partition_count)
jit_flags = jit_flags_partitioned[coreclr_args.partition - 1] # partition number is 1-based

print("Running superpmi.py download")
run_command([python_path, os.path.join(cwd, "superpmi.py"), "download", "--no_progress", "-target_os", platform_name,
"-target_arch", arch_name, "-core_root", cwd, "-spmi_location", spmi_location], _exit_on_fail=True)
run_command([python_path,
os.path.join(cwd, "superpmi.py"),
"download",
"--no_progress",
"-target_os", platform_name,
"-target_arch", arch_name,
"-core_root", cwd,
"-spmi_location", spmi_location,
"-log_level", "debug"], _exit_on_fail=True)

failed_runs = []
for jit_flag in jit_flags:
Expand All @@ -122,7 +177,7 @@ def main(main_args):
failed_runs.append("Failure in {}".format(log_file))

# Consolidate all superpmi_*.logs in superpmi_platform_architecture.log
final_log_name = os.path.join(log_directory, "superpmi_{}_{}.log".format(platform_name, arch_name))
final_log_name = os.path.join(log_directory, "superpmi_{}_{}_{}.log".format(platform_name, arch_name, coreclr_args.partition))
print("Consolidating final {}".format(final_log_name))
with open(final_log_name, "a") as final_superpmi_log:
for superpmi_log in os.listdir(log_directory):
Expand Down