From 313de564cd1ee2a2f3a1fa208316f523b3da0ca2 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Thu, 14 Mar 2024 14:00:43 +0100 Subject: [PATCH] utils/calc+info: rework --best-memattr to allow multiple nodes --best-memattr was very strict when selecting best nodes. The node had to be the best for the entire input CPUs. On a dual-socket machine with HBM in each socket, each HBM is the best local but not the best remote. Hence we'd report no best for the entire machine. Now we return both HBM for the entire machine by default, but may go back to the previous behavior by adding ",strict" after the attribute name. Adding ",default" also allows to return all nodes if no best was found (if no attribute values are found). Thanks to Antoine Morvan for the report. Refs #652 Signed-off-by: Brice Goglin --- utils/hwloc/hwloc-calc.1in | 15 +++- utils/hwloc/hwloc-calc.c | 39 +++++---- utils/hwloc/hwloc-info.1in | 13 ++- utils/hwloc/hwloc-info.c | 35 ++++---- utils/hwloc/misc.h | 166 +++++++++++++++++++++++++++---------- 5 files changed, 190 insertions(+), 78 deletions(-) diff --git a/utils/hwloc/hwloc-calc.1in b/utils/hwloc/hwloc-calc.1in index 46770e0e49..f62fa19822 100644 --- a/utils/hwloc/hwloc-calc.1in +++ b/utils/hwloc/hwloc-calc.1in @@ -1,5 +1,5 @@ .\" -*- nroff -*- -.\" Copyright © 2010-2023 Inria. All rights reserved. +.\" Copyright © 2010-2024 Inria. All rights reserved. .\" Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. .\" See COPYING in top-level directory. .TH HWLOC-CALC "1" "%HWLOC_DATE%" "%PACKAGE_VERSION%" "%PACKAGE_NAME%" @@ -248,7 +248,7 @@ This option enables \fB\-\-local\-memory\fR. .TP \fB\-\-best\-memattr\fR Enable the listing of local memory nodes with \fB\-\-local\-memory\fR, -but only display the local node that has the best value for the memory +but only display the local nodes that have the best value for the memory attribute given by \fI\fR (or as an index). If the memory attribute values depend on the initiator, the hwloc-calc @@ -262,6 +262,15 @@ All existing attributes in the current topology may be listed with If combined with \fB\-\-object\-output\fR, the object index is prefixed with its type (e.g. \fINUMANode:0\fR instead of \fI0\fR). + +\fI\fR may be suffixed with flags to tune the selection of best nodes, +for instance as \fBbandwidth,strict,default\fR. +\fBdefault\fR means that all local nodes are reported if no best could be found. +\fBstrict\fR means that nodes are selected only if their performance is the best +for all the input CPUs. On a dual-socket machine with HBM in each socket, +both HBMs are the best for their local socket, but not for the remote socket. +Hence both HBM are also considered best for the entire machine by default, +but none if \fBstrict\fR. .TP \fB\-\-sep \fR Change the field separator in the output. @@ -393,7 +402,7 @@ whose locality is exactly equal to a Package: $ hwloc-calc --local-memory-flags 0 --physical-output pack:1 4,7 -To display the best-capacity NUMA node, by physical indexes, +To display the best-capacity NUMA node(s), by physical indexes, whose locality is exactly equal to a Package: $ hwloc-calc --local-memory-flags 0 --best-memattr capacity --physical-output pack:1 diff --git a/utils/hwloc/hwloc-calc.c b/utils/hwloc/hwloc-calc.c index 1132d107b0..7d1885f169 100644 --- a/utils/hwloc/hwloc-calc.c +++ b/utils/hwloc/hwloc-calc.c @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright © 2023 Université de Reims Champagne-Ardenne. All rights reserved. @@ -79,6 +79,7 @@ static struct hwloc_calc_level *hierlevels; static int local_numanodes = 0; static unsigned long local_numanode_flags = HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY | HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY; static hwloc_memattr_id_t best_memattr_id = (hwloc_memattr_id_t) -1; +static unsigned long best_node_flags = 0; static int showobjs = 0; static int no_smt = -1; static int singlify = 0; @@ -232,35 +233,33 @@ hwloc_calc_output(hwloc_topology_t topology, const char *sep, hwloc_bitmap_t set } else if (local_numanodes) { unsigned nrnodes; hwloc_obj_t *nodes; + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc_full(); /* show all nodes by default */ nrnodes = hwloc_bitmap_weight(hwloc_topology_get_topology_nodeset(topology)); nodes = malloc(nrnodes * sizeof(*nodes)); - if (nodes) { + if (nodeset && nodes) { int err; struct hwloc_location loc; loc.type = HWLOC_LOCATION_TYPE_CPUSET; loc.location.cpuset = set; err = hwloc_get_local_numanode_objs(topology, &loc, &nrnodes, nodes, local_numanode_flags); if (!err) { - unsigned i; + unsigned i, first = 1; if (best_memattr_id != (hwloc_memattr_id_t) -1) { - int best = hwloc_utils_get_best_node_in_array_by_memattr(topology, best_memattr_id, nrnodes, nodes, &loc); - if (best == -1) { - /* no perf info found, report nothing */ - nrnodes = 0; - } else { - /* only report the best nodes */ - nodes[0] = nodes[best]; - nrnodes = 1; - } + err = hwloc_utils_get_best_node_in_array_by_memattr(topology, best_memattr_id, nrnodes, nodes, &loc, best_node_flags, nodeset); + /* on error, nodeset is zeroed, and we report nothing below (except if default flag is set) */ } if (!sep) sep = ","; for(i=0; ios_index)) + continue; hwloc_obj_type_snprintf(type, sizeof(type), nodes[i], HWLOC_OBJ_SNPRINTF_FLAG_LONG_NAMES); idx = logicalo ? nodes[i]->logical_index : nodes[i]->os_index; - if (i>0) + if (first) + first = 0; + else printf("%s", sep); if (objecto) { char types[64]; @@ -270,8 +269,9 @@ hwloc_calc_output(hwloc_topology_t topology, const char *sep, hwloc_bitmap_t set printf("%u", idx); } } - free(nodes); } + free(nodes); + hwloc_bitmap_free(nodeset); printf("\n"); } else { @@ -685,6 +685,17 @@ int main(int argc, char *argv[]) } if (best_memattr_str) { + char *tmp; + tmp = strstr(best_memattr_str, ",default"); + if (tmp) { + memmove(tmp, tmp+8, strlen(tmp+8)+1); + best_memattr_flags |= HWLOC_UTILS_BEST_NODE_FLAG_DEFAULT; + } + tmp = strstr(best_memattr_str, ",strict"); + if (tmp) { + memmove(tmp, tmp+7, strlen(tmp+7)+1); + best_memattr_flags |= HWLOC_UTILS_BEST_NODE_FLAG_STRICT; + } best_memattr_id = hwloc_utils_parse_memattr_name(topology, best_memattr_str); if (best_memattr_id == (hwloc_memattr_id_t) -1) { fprintf(stderr, "unrecognized memattr %s\n", best_memattr_str); diff --git a/utils/hwloc/hwloc-info.1in b/utils/hwloc/hwloc-info.1in index 9e98d68e16..a9359ed2e4 100644 --- a/utils/hwloc/hwloc-info.1in +++ b/utils/hwloc/hwloc-info.1in @@ -137,10 +137,19 @@ This option enables \fB\-\-local\-memory\fR. .TP \fB\-\-best\-memattr\fR Enable the listing of local memory nodes with \fB\-\-local\-memory\fR, -but only display the local node that has the best value for the memory +but only display the local nodes that have the best value for the memory attribute given by \fI\fR (or as an index). If the memory attribute values depend on the initiator, the object given to hwloc-info is used as the initiator. + +\fI\fR may be suffixed with flags to tune the selection of best nodes, +for instance as \fBbandwidth,strict,default\fR. +\fBdefault\fR means that all local nodes are reported if no best could be found. +\fBstrict\fR means that nodes are selected only if their performance is the best +for all the input CPUs. On a dual-socket machine with HBM in each socket, +both HBMs are the best for their local socket, but not for the remote socket. +Hence both HBM are also considered best for the entire machine by default, +but none if \fBstrict\fR. .TP \fB\-\-first\fR For each input object, only report the first matching output object @@ -311,7 +320,7 @@ To list the NUMA nodes that are local a PU: type = NUMANode ... -To show the best-bandwidth node among NUMA nodes local to a PU: +To show the best-bandwidth node(s) among NUMA nodes local to a PU: $ hwloc-info --local-memory --best-memattr bandwidth pu:25 NUMANode L#7 = local memory #1 of PU L#25 diff --git a/utils/hwloc/hwloc-info.c b/utils/hwloc/hwloc-info.c index 744e647d47..c8943f5bab 100644 --- a/utils/hwloc/hwloc-info.c +++ b/utils/hwloc/hwloc-info.c @@ -83,6 +83,7 @@ static int show_first_only = 0; static int show_local_memory = 0; static int show_local_memory_flags = HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY | HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY; static hwloc_memattr_id_t best_memattr_id = (hwloc_memattr_id_t) -1; +static unsigned long best_node_flags = 0; static unsigned current_obj; void usage(const char *name, FILE *where) @@ -582,9 +583,10 @@ hwloc_calc_process_location_info_cb(struct hwloc_calc_location_context_s *lconte } else if (show_local_memory) { unsigned nrnodes; hwloc_obj_t *nodes; + hwloc_nodeset_t nodeset = hwloc_bitmap_alloc_full(); /* show all nodes by default */ nrnodes = hwloc_bitmap_weight(hwloc_topology_get_topology_nodeset(topology)); nodes = malloc(nrnodes * sizeof(*nodes)); - if (nodes) { + if (nodeset && nodes) { struct hwloc_location loc; int err; loc.type = HWLOC_LOCATION_TYPE_OBJECT; @@ -593,28 +595,21 @@ hwloc_calc_process_location_info_cb(struct hwloc_calc_location_context_s *lconte if (!err) { unsigned i; if (best_memattr_id != (hwloc_memattr_id_t) -1) { - /* only keep the best one for that memattr */ - int best; + /* only keep the best ones for that memattr */ /* won't work if obj is CPU-less: perf from I/O is likely different from perf from CPU objects */ loc.type = HWLOC_LOCATION_TYPE_CPUSET; loc.location.cpuset = obj->cpuset; - best = hwloc_utils_get_best_node_in_array_by_memattr(topology, best_memattr_id, - nrnodes, nodes, &loc); - if (best == -1) { - /* no perf info found, report nothing */ + err = hwloc_utils_get_best_node_in_array_by_memattr(topology, best_memattr_id, + nrnodes, nodes, &loc, best_node_flags, nodeset); + if (err < -1) { if (verbose > 0) fprintf(stderr, "Failed to find a best local node for memory attribute.\n"); - nrnodes = 0; - } else { - /* only report the best node, but keep the index intact */ - for(i=0; ios_index)) continue; if (show_index_prefix) snprintf(prefix, sizeof(prefix), "%u.%u: ", current_obj, i); @@ -626,6 +621,7 @@ hwloc_calc_process_location_info_cb(struct hwloc_calc_location_context_s *lconte } else { fprintf(stderr, "Failed to allocate array of local NUMA nodes\n"); } + hwloc_bitmap_free(nodeset); free(nodes); } else { hwloc_info_show_single_obj(topology, obj, objs, prefix, verbose); @@ -963,8 +959,19 @@ main (int argc, char *argv[]) } if (best_memattr_str) { + char *tmp; if (!show_local_memory) fprintf(stderr, "--best-memattr is ignored without --local-memory.\n"); + tmp = strstr(best_memattr_str, ",default"); + if (tmp) { + memmove(tmp, tmp+8, strlen(tmp+8)+1); + best_node_flags |= HWLOC_UTILS_BEST_NODE_FLAG_DEFAULT; + } + tmp = strstr(best_memattr_str, ",strict"); + if (tmp) { + memmove(tmp, tmp+7, strlen(tmp+7)+1); + best_node_flags |= HWLOC_UTILS_BEST_NODE_FLAG_STRICT; + } best_memattr_id = hwloc_utils_parse_memattr_name(topology, best_memattr_str); if (best_memattr_id == (hwloc_memattr_id_t) -1) { fprintf(stderr, "unrecognized memattr %s\n", best_memattr_str); diff --git a/utils/hwloc/misc.h b/utils/hwloc/misc.h index 972fb6c82c..5df89253d3 100644 --- a/utils/hwloc/misc.h +++ b/utils/hwloc/misc.h @@ -1,6 +1,6 @@ /* * Copyright © 2009 CNRS - * Copyright © 2009-2023 Inria. All rights reserved. + * Copyright © 2009-2024 Inria. All rights reserved. * Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2023 Université de Reims Champagne-Ardenne. All rights reserved. @@ -770,70 +770,144 @@ hwloc_utils_parse_memattr_name(hwloc_topology_t topo, const char *str) return id; } +#define HWLOC_UTILS_BEST_NODE_FLAG_DEFAULT (1UL<<0) /* report all nodes if no best found */ +#define HWLOC_UTILS_BEST_NODE_FLAG_STRICT (1UL<<1) /* report only best with same initiator */ + +static __hwloc_inline void +hwloc_utils__update_best_node(hwloc_obj_t newnode, uint64_t newvalue, + uint64_t *bestvalue, hwloc_bitmap_t bestnodeset, + unsigned long mflags) +{ + if (hwloc_bitmap_iszero(bestnodeset)) { + /* first */ + *bestvalue = newvalue; + hwloc_bitmap_only(bestnodeset, newnode->os_index); + + } else if (mflags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST) { + if (newvalue > *bestvalue) { + /* higher */ + *bestvalue = newvalue; + hwloc_bitmap_only(bestnodeset, newnode->os_index); + } else if (newvalue == *bestvalue) { + /* as high */ + hwloc_bitmap_set(bestnodeset, newnode->os_index); + } + + } else { + assert(mflags & HWLOC_MEMATTR_FLAG_LOWER_FIRST); + if (newvalue < *bestvalue) { + /* lower */ + *bestvalue = newvalue; + hwloc_bitmap_only(bestnodeset, newnode->os_index); + } else if (newvalue == *bestvalue) { + /* as low */ + hwloc_bitmap_set(bestnodeset, newnode->os_index); + } + } +} + +/* fill best_nodeset with best nodes. + * if STRICT flag, only the really local ones are returned. + * if none is best (they don't have values), return empty. + * if none is best and DEFAULT flag, return all nodes. + * on error, return empty. + */ static __hwloc_inline int hwloc_utils_get_best_node_in_array_by_memattr(hwloc_topology_t topology, hwloc_memattr_id_t id, unsigned nbnodes, hwloc_obj_t *nodes, - struct hwloc_location *initiator) + struct hwloc_location *initiator, + unsigned long flags, + hwloc_nodeset_t best_nodeset) { - unsigned nbtgs, i, j; - hwloc_obj_t *tgs; - int best; - hwloc_uint64_t *values, bestvalue; + unsigned i, j; + hwloc_uint64_t *values, bestvalue = 0; unsigned long mflags; int err; + hwloc_bitmap_zero(best_nodeset); + err = hwloc_memattr_get_flags(topology, id, &mflags); if (err < 0) goto out; - nbtgs = 0; - err = hwloc_memattr_get_targets(topology, id, initiator, 0, &nbtgs, NULL, NULL); - if (err < 0) - goto out; + if (mflags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { + /* iterate over targets, and then on their initiators */ + for(i=0; itype != initiators[j].type) + continue; + switch (initiator->type) { + case HWLOC_LOCATION_TYPE_OBJECT: + if (initiator->location.object->type != initiators[j].location.object->type + || initiator->location.object->gp_index != initiators[j].location.object->gp_index) + continue; + break; + case HWLOC_LOCATION_TYPE_CPUSET: + if (flags & HWLOC_UTILS_BEST_NODE_FLAG_STRICT) { + if (!hwloc_bitmap_isincluded(initiator->location.cpuset, initiators[j].location.cpuset)) + continue; + } else { + if (!hwloc_bitmap_intersects(initiator->location.cpuset, initiators[j].location.cpuset)) + continue; + } + break; + default: + abort(); + } - best = -1; - bestvalue = 0; - for(i=0; i bestvalue) { - best = i; - bestvalue = values[j]; - } - } else { - assert(mflags & HWLOC_MEMATTR_FLAG_LOWER_FIRST); - if (values[j] < bestvalue) { - best = i; - bestvalue = values[j]; + hwloc_utils__update_best_node(nodes[i], values[j], + &bestvalue, best_nodeset, + mflags); } + + free(initiators); + free(values); + } + + } else { + /* no initiator, just iterate over targets */ + for(i=0; ios_index); + } + return 0; - out_with_arrays: - free(tgs); - free(values); out: + hwloc_bitmap_zero(best_nodeset); return -1; } @@ -849,6 +923,8 @@ hwloc_utils_get_best_node_in_nodeset_by_memattr(hwloc_topology_t topology, hwloc unsigned long mflags; int err; + // TODO update + err = hwloc_memattr_get_flags(topology, id, &mflags); if (err < 0) goto out;