Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/mock_mlc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# arguments:
# --loaded_latency
# --bandwidth_matrix
# --idle_latency
# --max_bandwidth

if [ "$1" == "--loaded_latency" ]; then
cat <<EOT
Expand Down Expand Up @@ -37,6 +39,22 @@ Delay (ns) MB/sec
09000 105.60 1844.7
20000 105.48 1148.7

EOT
elif [ "$1" == "--idle_latency" ]; then
cat <<EOT
Intel(R) Memory Latency Checker - v3.10
Command line parameters: --idle_latency -b20 -t10 -c1 -i1

Each iteration took 1.5 base frequency clocks ( 1.5 ns)

EOT
elif [ "$1" == "--max_bandwidth" ]; then
cat <<EOT
Intel(R) Memory Latency Checker - v3.10
Command line parameters: --max_bandwidth -b20

Peak bandwidth: 7430968.4 MB/sec

EOT
elif [ "$1" == "--bandwidth_matrix" ]; then
cat <<EOT
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ See [`perfspect benchmark -h`](docs/perfspect_benchmark.md) for all options.
| power | runs stress-ng to load all cpus to 100% for 60s. Uses [turbostat](https://github.com/torvalds/linux/tree/master/tools/power/x86/turbostat) to measure power. |
| temperature | runs the same micro benchmark as 'power', but extracts maximum temperature from turbostat output. |
| frequency | runs [avx-turbo](https://github.com/travisdowns/avx-turbo) to measure scalar and AVX frequencies across processor's cores. **Note:** Runtime increases with core count. |
| memory | runs [Intel(r) Memory Latency Checker](https://www.intel.com/content/www/us/en/download/736633/intel-memory-latency-checker-intel-mlc.html) (MLC) to measure memory bandwidth and latency across a load range. **Note: MLC is not included with PerfSpect.** It can be downloaded from [Intel Memory Latency Checker](https://www.intel.com/content/www/us/en/download/736633/intel-memory-latency-checker-intel-mlc.html). Once downloaded, extract the Linux executable and place it in the perfspect/tools/x86_64 directory. |
| numa | runs Intel(r) Memory Latency Checker(MLC) to measure bandwidth between NUMA nodes. See Note above about downloading MLC. |
| memory | runs [Intel(r) Memory Latency Checker](https://www.intel.com/content/www/us/en/download/736633/intel-memory-latency-checker-intel-mlc.html) (MLC) to measure memory bandwidth and latency. **Note: MLC is not included with PerfSpect.** It can be downloaded from [Intel Memory Latency Checker](https://www.intel.com/content/www/us/en/download/736633/intel-memory-latency-checker-intel-mlc.html). Once downloaded, extract the Linux executable and place it in the perfspect/tools/x86_64 directory. |
| cache | runs MLC to measure L1, L2, and L3 cache idle latency (ns) and maximum bandwidth (GB/s). See Note above about downloading MLC. |
| storage | runs [fio](https://github.com/axboe/fio) for 2 minutes across multiple I/O patterns to measure storage latency, IOPs, and bandwidth. Use --storage-dir to override the default location (/tmp). Minimum 32GB disk space required. |

#### Telemetry Command
Expand Down
27 changes: 19 additions & 8 deletions cmd/benchmark/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ var (
flagTemperature bool
flagFrequency bool
flagMemory bool
flagNuma bool
flagCache bool
flagStorage bool

flagNoSystemSummary bool
Expand All @@ -76,7 +76,7 @@ const (
flagTemperatureName = "temperature"
flagFrequencyName = "frequency"
flagMemoryName = "memory"
flagNumaName = "numa"
flagCacheName = "cache"
flagStorageName = "storage"

flagNoSystemSummaryName = "no-summary"
Expand All @@ -91,8 +91,15 @@ var categories = []app.Category{
{FlagName: flagPowerName, FlagVar: &flagPower, DefaultValue: false, Help: "power consumption benchmark", Tables: []table.TableDefinition{tableDefinitions[PowerBenchmarkTableName]}},
{FlagName: flagTemperatureName, FlagVar: &flagTemperature, DefaultValue: false, Help: "temperature benchmark", Tables: []table.TableDefinition{tableDefinitions[TemperatureBenchmarkTableName]}},
{FlagName: flagFrequencyName, FlagVar: &flagFrequency, DefaultValue: false, Help: "turbo frequency benchmark", Tables: []table.TableDefinition{tableDefinitions[FrequencyBenchmarkTableName]}},
{FlagName: flagMemoryName, FlagVar: &flagMemory, DefaultValue: false, Help: "memory latency and bandwidth benchmark", Tables: []table.TableDefinition{tableDefinitions[MemoryBenchmarkTableName]}},
{FlagName: flagNumaName, FlagVar: &flagNuma, DefaultValue: false, Help: "NUMA bandwidth matrix benchmark", Tables: []table.TableDefinition{tableDefinitions[NUMABenchmarkTableName]}},
{FlagName: flagMemoryName, FlagVar: &flagMemory, DefaultValue: false, Help: "memory latency and bandwidth benchmark",
Tables: []table.TableDefinition{
tableDefinitions[MemoryLoadedLatencyBenchmarkTableName],
tableDefinitions[MemoryBandwidthMatrixBenchmarkName],
tableDefinitions[MemoryLatencyMatrixBenchmarkName]}},
{FlagName: flagCacheName, FlagVar: &flagCache, DefaultValue: false, Help: "L1/L2/L3 cache idle latency and maximum bandwidth benchmark",
Tables: []table.TableDefinition{
tableDefinitions[CacheIdleLatencyBenchmarkTableName],
tableDefinitions[CacheMaxBandwidthBenchmarkTableName]}},
{FlagName: flagStorageName, FlagVar: &flagStorage, DefaultValue: false, Help: "storage performance benchmark", Tables: []table.TableDefinition{tableDefinitions[StorageBenchmarkTableName]}},
}

Expand Down Expand Up @@ -258,9 +265,13 @@ func runCmd(cmd *cobra.Command, args []string) error {
}

report.RegisterHTMLRenderer(FrequencyBenchmarkTableName, frequencyBenchmarkTableHtmlRenderer)
report.RegisterHTMLRenderer(MemoryBenchmarkTableName, memoryBenchmarkTableHtmlRenderer)
report.RegisterHTMLRenderer(MemoryLoadedLatencyBenchmarkTableName, memoryBenchmarkTableHtmlRenderer)
report.RegisterHTMLRenderer(MemoryBandwidthMatrixBenchmarkName, memoryNUMABandwidthMatrixTableHtmlRenderer)
report.RegisterHTMLRenderer(MemoryLatencyMatrixBenchmarkName, memoryNUMALatencyMatrixTableHtmlRenderer)
report.RegisterHTMLRenderer(CacheIdleLatencyBenchmarkTableName, func(tv table.TableValues, _ string) string { return report.DefaultHTMLTableRendererFunc(tv) })
report.RegisterHTMLRenderer(CacheMaxBandwidthBenchmarkTableName, func(tv table.TableValues, _ string) string { return report.DefaultHTMLTableRendererFunc(tv) })

report.RegisterHTMLMultiTargetRenderer(MemoryBenchmarkTableName, memoryBenchmarkTableMultiTargetHtmlRenderer)
report.RegisterHTMLMultiTargetRenderer(MemoryLoadedLatencyBenchmarkTableName, memoryBenchmarkTableMultiTargetHtmlRenderer)

return reportingCommand.Run()
}
Expand All @@ -275,7 +286,7 @@ func benchmarkSummaryFromTableValues(allTableValues []table.TableValues, outputs
allCoreMaxFreq = allCoreMaxFreq + " GHz"
}
// get the maximum memory bandwidth from the memory latency table
memLatTableValues := getTableValues(allTableValues, MemoryBenchmarkTableName)
memLatTableValues := getTableValues(allTableValues, MemoryLoadedLatencyBenchmarkTableName)
var bandwidthValues []string
if len(memLatTableValues.Fields) > 1 {
bandwidthValues = memLatTableValues.Fields[1].Values
Expand All @@ -296,7 +307,7 @@ func benchmarkSummaryFromTableValues(allTableValues []table.TableValues, outputs
maxMemBW = fmt.Sprintf("%.1f GB/s", maxBandwidth)
}
// get the minimum memory latency
minLatency := getValueFromTableValues(getTableValues(allTableValues, MemoryBenchmarkTableName), "Latency (ns)", 0)
minLatency := getValueFromTableValues(getTableValues(allTableValues, MemoryLoadedLatencyBenchmarkTableName), "Latency (ns)", 0)
if minLatency != "" {
minLatency = minLatency + " ns"
}
Expand Down
114 changes: 114 additions & 0 deletions cmd/benchmark/benchmark_renderers.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package benchmark

import (
"fmt"
"html"
"log/slog"
"strconv"

Expand Down Expand Up @@ -125,3 +126,116 @@ func memoryBenchmarkTableMultiTargetHtmlRenderer(allTableValues []table.TableVal
}
return report.RenderScatterChart(data, datasetNames, chartConfig)
}

// renderNUMAMatrixHeatmapTable renders a NUMA matrix (bandwidth or latency) as an HTML table
// with heatmap cell background colors. higherIsBetter true = green for high values (e.g. bandwidth);
// false = green for low values (e.g. latency).
func renderNUMAMatrixHeatmapTable(tableValues table.TableValues, higherIsBetter bool) string {
if len(tableValues.Fields) < 2 {
slog.Error("insufficient fields for NUMA matrix", slog.String("table", tableValues.Name), slog.Int("fields", len(tableValues.Fields)))
return ""
}
rows := len(tableValues.Fields[0].Values)
cols := len(tableValues.Fields)
// Parse numeric matrix (skip field 0 = row header column)
type cell struct {
text string
val float64
ok bool
}
matrix := make([][]cell, rows)
var minVal, maxVal float64
first := true
for r := range rows {
matrix[r] = make([]cell, cols)
for c := range cols {
matrix[r][c].text = tableValues.Fields[c].Values[r]
if c == 0 {
matrix[r][c].ok = false
continue
}
v, err := strconv.ParseFloat(tableValues.Fields[c].Values[r], 64)
if err != nil {
matrix[r][c].ok = false
continue
}
matrix[r][c].val = v
matrix[r][c].ok = true
if first {
minVal, maxVal = v, v
first = false
} else {
if v < minVal {
minVal = v
}
if v > maxVal {
maxVal = v
}
}
}
}
// Build headers and rows for RenderHTMLTable
headers := make([]string, cols)
for c := range cols {
headers[c] = tableValues.Fields[c].Name
}
tableRows := make([][]string, rows)
valuesStyles := make([][]string, rows)
// Don't apply heatmap when there's only one NUMA node (single data cell); it would always be red.
dataCells := rows * (cols - 1)
applyHeatmap := dataCells > 1
span := maxVal - minVal
if span == 0 {
span = 1
}
for r := range rows {
tableRows[r] = make([]string, cols)
valuesStyles[r] = make([]string, cols)
for c := range cols {
tableRows[r][c] = html.EscapeString(matrix[r][c].text)
if c == 0 {
valuesStyles[r][c] = "font-weight:bold"
continue
}
if !matrix[r][c].ok {
continue
}
if !applyHeatmap {
continue
}
v := matrix[r][c].val
var t float64
if higherIsBetter {
t = (v - minVal) / span
} else {
t = (maxVal - v) / span
}
valuesStyles[r][c] = heatmapCellStyle(t)
}
}
return report.RenderHTMLTable(headers, tableRows, "pure-table pure-table-striped", valuesStyles)
}

// heatmapCellStyle returns a CSS background-color for a normalized value t in [0,1].
// t=0 -> red, t=1 -> green; interpolates in RGB.
func heatmapCellStyle(t float64) string {
if t < 0 {
t = 0
}
if t > 1 {
t = 1
}
// Red #e03131 to Green #2f9e44
r := uint8(224 - (224-47)*t)
g := uint8(49 + (158-49)*t)
b := uint8(49 + (68-49)*t)
return fmt.Sprintf("background-color: rgb(%d,%d,%d)", r, g, b)
}

func memoryNUMABandwidthMatrixTableHtmlRenderer(tableValues table.TableValues, targetName string) string {
return renderNUMAMatrixHeatmapTable(tableValues, true)
}

func memoryNUMALatencyMatrixTableHtmlRenderer(tableValues table.TableValues, targetName string) string {
return renderNUMAMatrixHeatmapTable(tableValues, false)
}
Loading
Loading