From 4153784a666894342f65de6681fc48a2d4abe870 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Thu, 29 Jun 2023 10:49:31 -0700 Subject: [PATCH 1/4] update html report --- README.md | 17 +++-- _version.txt | 2 +- perf-postprocess.py | 4 +- src/base.html | 157 ++++++++++++++++++++++++-------------------- 4 files changed, 101 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index f0fb26f1..d8cb9b05 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,17 @@ -# PerfSpect · [![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![CodeQL](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) +
-[Quick Start](#quick-start-requires-perf-installed) | [Output](#output) | [Requirements](#requirements) | [Build from source](#build-from-source) +
+
    +

    PerfSpect

    +
+
+ +[![Build](https://github.com/intel/PerfSpect/actions/workflows/build.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/build.yml)[![CodeQL](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml/badge.svg)](https://github.com/intel/PerfSpect/actions/workflows/codeql.yml)[![License](https://img.shields.io/badge/License-BSD--3-blue)](https://github.com/intel/PerfSpect/blob/master/LICENSE) + +[![Static Badge](https://img.shields.io/badge/Live_Demo-red?style=for-the-badge)](https://intel.github.io/PerfSpect/) + +[Quick Start](#quick-start-requires-perf-installed) | [Output](#output) | [Deploy in Kubernetes](#deploy-in-kubernetes) | [Requirements](#requirements) | [Build from source](#build-from-source) +
PerfSpect is a system performance characterization tool built on top of linux perf. Most metrics and events come from [perfmon](https://github.com/intel/perfmon) and [TMA v4.5](https://www.intel.com/content/www/us/en/docs/vtune-profiler/cookbook/2023-1/top-down-microarchitecture-analysis-method.html). It contains two parts: @@ -40,8 +51,6 @@ perf-postprocess outputs: 2. `metric_out.sys.csv`: metric values at every 5 second interval 3. `metric_out.html`: html view of a few select metrics -[live example report](https://intel.github.io/PerfSpect/) - ![basic_stats](https://raw.githubusercontent.com/wiki/intel/PerfSpect/newhtml.gif) ## Deploy in Kubernetes diff --git a/_version.txt b/_version.txt index 1892b926..31e5c843 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.2 +1.3.3 diff --git a/perf-postprocess.py b/perf-postprocess.py index 904f8fa2..49360bf0 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -612,7 +612,9 @@ def generate_metrics_averages( def row(df, name): if name in df.index: - return json.dumps(df.loc[name, :].values.flatten().tolist()) + timeseries = df.loc[[name]].to_dict("split") + timeseries["columns"] = map(lambda x: round(float(x), 1), timeseries["columns"]) + return json.dumps(list(zip(timeseries["columns"], timeseries["data"][0]))) else: return "[]" diff --git a/src/base.html b/src/base.html index 7c58ac1d..6aef7209 100644 --- a/src/base.html +++ b/src/base.html @@ -34,6 +34,7 @@ createTheme, Typography, Button, + IconButton, ButtonGroup, Slider, Grid, @@ -77,15 +78,7 @@ ].join(","), }, palette: { - primary: { - main: "#556cd6", - }, - secondary: { - main: "#19857b", - }, - error: { - main: colors.red.A400, - }, + mode: 'light', }, }); @@ -159,6 +152,8 @@ function App() { const [systemTabs, setSystemTabs] = React.useState(0); const [openlink, setOpenlink] = React.useState(true); + const [maxdiff, set_maxdiff] = React.useState(0) + const [mindiff, set_mindiff] = React.useState(0) const handleChange = (event, newSystemTabs) => { setSystemTabs(newSystemTabs); @@ -267,11 +262,14 @@ const base_line = { xAxis: { - name: "time (s)" + name: "time (s)", + min: "dataMin", + max: "dataMax", }, yAxis: {}, tooltip: { - trigger: 'axis' + trigger: 'axis', + valueFormatter: (value) => value.toFixed(2), }, legend: {}, } @@ -281,7 +279,7 @@ series: [ { type: 'line', - data: CPUUTIL.map((e, i) => [i * 5, e]), + data: CPUUTIL, } ] } @@ -291,7 +289,7 @@ series: [ { type: 'line', - data: CPIDATA.map((e, i) => [i * 5, e]), + data: CPIDATA, } ] } @@ -301,7 +299,7 @@ series: [ { type: 'line', - data: CPUFREQ.map((e, i) => [i * 5, e]), + data: CPUFREQ, } ] } @@ -311,7 +309,7 @@ series: [ { type: 'line', - data: REMOTENUMA.map((e, i) => [i * 5, e]), + data: REMOTENUMA, } ] } @@ -322,17 +320,17 @@ { name: "L1D", type: 'line', - data: L1DATA.map((e, i) => [i * 5, e]), + data: L1DATA, }, { name: "L2", type: 'line', - data: L2DATA.map((e, i) => [i * 5, e]), + data: L2DATA, }, { name: "LLC Data", type: 'line', - data: LLCDATA.map((e, i) => [i * 5, e]), + data: LLCDATA, }, ] } @@ -343,17 +341,17 @@ { name: "Read", type: 'line', - data: READDATA.map((e, i) => [i * 5, e]), + data: READDATA, }, { name: "Write", type: 'line', - data: WRITEDATA.map((e, i) => [i * 5, e]), + data: WRITEDATA, }, { name: "Total", type: 'line', - data: TOTALDATA.map((e, i) => [i * 5, e]), + data: TOTALDATA, }, ] } @@ -363,7 +361,7 @@ series: [ { type: 'line', - data: PKGPOWER.map((e, i) => [i * 5, e]), + data: PKGPOWER, } ] } @@ -373,27 +371,38 @@ series: [ { type: 'line', - data: DRAMPOWER.map((e, i) => [i * 5, e]), + data: DRAMPOWER, } ] } const diffreport = (e) => { - console.log(e) - var reader = new FileReader(); - + let reader = new FileReader(); reader.onload = (e) => { let new_metrics = JSON.parse(e.target.result.split("\n").filter(e => e.includes("const all_metrics ="))[0].split("const all_metrics =")[1]) let copy = JSON.parse(JSON.stringify(current_metrics)) + let temp_mindiff = 0 + let temp_maxdiff = 0 for (const metric of copy) { for (const other_metric of new_metrics) { if (metric.metrics === other_metric.metrics) { - console.log(metric) - console.log(current_metrics) metric.other = other_metric["0"] + if (other_metric["0"] < metric["0"]) { + metric.diff = ((other_metric["0"] / metric["0"]) - 1) * 100 + } else { + metric.diff = ((metric["0"] / other_metric["0"]) - 1) * -100 + } + if (temp_maxdiff == 0 || temp_maxdiff < metric.diff) { + temp_maxdiff = metric.diff + } + if (temp_mindiff == 0 || temp_mindiff > metric.diff) { + temp_mindiff = metric.diff + } } } } + set_maxdiff(temp_maxdiff) + set_mindiff(temp_mindiff) setCurrent_metrics(copy) }; @@ -413,8 +422,7 @@ onChange={handleChange} variant="scrollable" > - - + @@ -427,7 +435,7 @@ value={systemTabs} index={0} > - + Top-down Microarchitecture Analysis Method (TMAM) @@ -484,27 +492,27 @@ data: [ { name: "Bad Speculation", - value: BADSPECULATION, + value: Math.round(BADSPECULATION * 10) / 10, }, { name: "Retiring", - value: RETIRING, + value: Math.round(RETIRING * 10) / 10, }, { name: "Frontend", - value: FRONTEND, + value: Math.round(FRONTEND * 10) / 10, }, { name: "Backend", - value: BACKEND, + value: Math.round(BACKEND * 10) / 10, children: [ { name: "Core", - value: COREDATA, + value: Math.round(COREDATA * 10) / 10, }, { name: "Memory", - value: MEMORY, + value: Math.round(MEMORY * 10) / 10, }, ], }, @@ -517,18 +525,13 @@ }} /> - - Pressure Stall Information (PSI) - Your workload spent an average of {(PSI_MEM + PSI_CPU + PSI_IO).toFixed(2)}% of time stalled waiting on a hardware resource. + Your workload spent an average of {(PSI_MEM + PSI_CPU + PSI_IO).toFixed(1)}% of time stalled waiting on a hardware resource.
  • @@ -572,28 +575,28 @@ data: [ { name: "CPU stall", - value: PSI_CPU, + value: Math.round(PSI_CPU * 10) / 10, itemStyle: { color: "#ee6666" } }, { name: "Memory stall", - value: PSI_MEM, + value: Math.round(PSI_MEM * 10) / 10, itemStyle: { color: "#fac858" } }, { name: "IO stall", - value: PSI_IO, + value: Math.round(PSI_IO * 10) / 10, itemStyle: { color: "#73c0de" } }, { name: "Not stalled", - value: 100 - (PSI_MEM + PSI_CPU + PSI_IO), + value: Math.round((100 - (PSI_MEM + PSI_CPU + PSI_IO)) * 10) / 10, itemStyle: { color: "#91cc75" } @@ -610,7 +613,7 @@ @@ -632,7 +635,6 @@ {transactions ? "Cycles per transaction retired; indicating how much time each executed transaction took; in units of cycles. Often this metric shows how efficiently applications are using the underlying hardware. A lower \"Cycles per TXN\" could indicate that transactions are not hitting bottlenecks and retiring quickly." : "Cycles per instruction retired; indicating how much time each executed instruction took; in units of cycles. Often this metric shows how efficiently applications are using the underlying hardware. A lower CPI could indicate that instructions are not hitting bottlenecks and retiring quickly."} - @@ -655,7 +657,7 @@ @@ -699,7 +701,7 @@ @@ -730,10 +732,10 @@ - - + +
    Metadata @@ -749,7 +751,9 @@ {row[0]} - {row[1]} + + {row[1]} + ))} @@ -758,16 +762,20 @@ - - -
    + + + TMA metrics are a hierarchy where each sub-metric contains more periods "..." to designate its depth in the tree + + +
    - Value - Metric - {current_metrics[0].hasOwnProperty("other") && Other} + Value + Metric + {current_metrics[0].hasOwnProperty("other") && Other} + {current_metrics[0].hasOwnProperty("other") && Diff} @@ -778,21 +786,24 @@ sx={{ '&:last-child td, &:last-child th': { border: 0 } }} > - - - {row.metrics} - + + {description.hasOwnProperty(row.metrics) && + help + } + {!description.hasOwnProperty(row.metrics) && + help + } + {row.metrics} - - {Number(row["0"]).toFixed(2)} - + {Number(row["0"]).toFixed(1)} - {row.hasOwnProperty("other") && - - {Number(row["other"]).toFixed(2)} - + {row.hasOwnProperty("other") && + {Number(row["other"]).toFixed(1)} + } + {row.hasOwnProperty("other") && 0 ? "rgba(255,0,0," + (row.diff / maxdiff * .5) + ")" : "rgba(0,0,255," + (row.diff / mindiff * .5) + ")") }}> + {Math.round(Number(row["diff"]))}% } ))} From 44624a22ae808204452b50872121ac92e61a0e98 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Tue, 8 Aug 2023 08:58:59 -0700 Subject: [PATCH 2/4] improve VM filtering --- Makefile | 1 - README.md | 2 +- _version.txt | 2 +- events/bdx.txt | 4 +- events/clx_skx.txt | 5 +-- events/icx.txt | 2 +- events/metric_bdx.json | 4 +- events/metric_icx.json | 78 +++++++++++++++++----------------- events/metric_skx_clx.json | 86 +++++++++++++++++++------------------- events/metric_spr.json | 76 ++++++++++++++++----------------- events/spr.txt | 2 +- src/prepare_perf_events.py | 57 +++++++++++++++---------- 12 files changed, 165 insertions(+), 154 deletions(-) diff --git a/Makefile b/Makefile index d4021995..0a1b0e19 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,6 @@ dist/$(PACKAGE_EXTERNAL): build_dir build/libtsc build-public/collect build-publ cp build/$(BINARY_COLLECT) dist/$(BINARY_FINAL)/$(BINARY_COLLECT) cp build/$(BINARY_POSTPROCESS) dist/$(BINARY_FINAL)/$(BINARY_POSTPROCESS) cp LICENSE dist/$(BINARY_FINAL)/ - cp README.md dist/$(BINARY_FINAL)/README.md cd dist && tar -czf $(PACKAGE_EXTERNAL) $(BINARY_FINAL) cd dist && cp -r $(BINARY_FINAL) ../build/ rm -rf dist/$(BINARY_FINAL)/ diff --git a/README.md b/README.md index d0baa132..45ceb49e 100644 --- a/README.md +++ b/README.md @@ -90,4 +90,4 @@ Requires recent python. On successful build, binaries will be created in `dist` ``` pip3 install -r requirements.txt make -``` +``` \ No newline at end of file diff --git a/_version.txt b/_version.txt index 31e5c843..d0149fef 100644 --- a/_version.txt +++ b/_version.txt @@ -1 +1 @@ -1.3.3 +1.3.4 diff --git a/events/bdx.txt b/events/bdx.txt index 7cc20250..2e1ffd87 100644 --- a/events/bdx.txt +++ b/events/bdx.txt @@ -3,8 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Broadwell event list (default, with extensive TMA collection) - +# Broadwell event list cpu/event=0xc2,umask=0x02,period=2000003,name='UOPS_RETIRED.RETIRE_SLOTS'/, cpu/event=0xc5,umask=0x00,name='BR_MISP_RETIRED.ALL_BRANCHES'/, cpu/event=0xc3,umask=0x01,name='MACHINE_CLEARS.COUNT'/, @@ -173,4 +172,3 @@ imc/event=0x04,umask=0x0c,name='UNC_M_CAS_COUNT.WR'/; #power related power/energy-pkg/, power/energy-ram/; - diff --git a/events/clx_skx.txt b/events/clx_skx.txt index ed2b2c51..9c2abbb4 100644 --- a/events/clx_skx.txt +++ b/events/clx_skx.txt @@ -3,8 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Cascadelake event list (default, with extensive TMA collection) - +# Cascadelake event list #avx related power levels cpu/event=0x28,umask=0x07,period=200003,name='CORE_POWER.LVL0_TURBO_LICENSE'/, cpu/event=0x28,umask=0x18,period=200003,name='CORE_POWER.LVL1_TURBO_LICENSE'/, @@ -239,4 +238,4 @@ iio/event=0x83,umask=0x01,ch_mask=0x08,fc_mask=0x07,name='UNC_IIO_DATA_REQ_OF_CP upi/event=0x2,umask=0x0f,name='UNC_UPI_TxL_FLITS.ALL_DATA'/, upi/event=0x2,umask=0x97,name='UNC_UPI_TxL_FLITS.NON_DATA'/, upi/event=0x1,umask=0x0,name='UNC_UPI_CLOCKTICKS'/, -upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; +upi/event=0x21,umask=0x0,name='UNC_UPI_L1_POWER_CYCLES'/; \ No newline at end of file diff --git a/events/icx.txt b/events/icx.txt index 8a5cdef0..590e01bd 100644 --- a/events/icx.txt +++ b/events/icx.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# Icelake event list (default) +# Icelake event list cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, cpu/event=0xd1,umask=0x01,period=1000003,name='MEM_LOAD_RETIRED.L1_HIT'/, cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, diff --git a/events/metric_bdx.json b/events/metric_bdx.json index b39fa827..921d8d90 100644 --- a/events/metric_bdx.json +++ b/events/metric_bdx.json @@ -29,7 +29,7 @@ "name": "metric_locks retired per instr", "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { @@ -386,4 +386,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] )/ (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count]))", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/metric_icx.json b/events/metric_icx.json index 1fd9f225..9448c37b 100644 --- a/events/metric_icx.json +++ b/events/metric_icx.json @@ -14,22 +14,22 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", - "name-txn": "metric_txn per cycles", + "name-txn": "metric_txn per cycles", "expression": "[instructions] / [cpu-cycles]", - "expression-txn": "[instructions] / [TXN]", + "expression-txn": "[instructions] / [TXN]", "origin": "perfspect" }, { @@ -39,52 +39,52 @@ }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", - "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_Average LLC data read miss latency (in clks)", @@ -160,27 +160,27 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_CRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_CRD_PREF]) / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]" }, { "name": "metric_Average LLC demand data read miss latency (in ns)", @@ -196,27 +196,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "name-txn": "metric_ITLB (2nd level) misses per txn", + "name-txn": "metric_ITLB (2nd level) misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "name-txn": "metric_DTLB (2nd level) load misses per txn", + "name-txn": "metric_DTLB (2nd level) load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", - "name-txn": "metric_DTLB (2nd level) store misses per txn", + "name-txn": "metric_DTLB (2nd level) store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -394,4 +394,4 @@ "name": "metric_TMA_Info_System_SMT_2T_Utilization", "expression": "1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED] if [SOCKET_COUNT] > 1 else 0" } -] +] \ No newline at end of file diff --git a/events/metric_skx_clx.json b/events/metric_skx_clx.json index 5887e67b..4caf3fae 100644 --- a/events/metric_skx_clx.json +++ b/events/metric_skx_clx.json @@ -14,97 +14,97 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", - "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" + "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "exression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "exression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_LLC MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_LLC misses per txn (includes code+data+rfo w/ prefetches)", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [TXN]", + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] + [UNC_CHA_TOR_INSERTS.IA_MISS.0x12C40033]) / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", - "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [TXN]" + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [instructions]", + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12CC0233] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [TXN]" + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS.0x12D40433] / [TXN]" }, { "name": "metric_LLC total HITM (per instr)", "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [instructions]", - "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [instructions]", - "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", + "expression-txn": "[OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD] / [TXN]", "origin": "perfspect" }, { @@ -129,47 +129,47 @@ }, { "name": "metric_ITLB MPI", - "name-txn": "metric_ITLB misses per txn", + "name-txn": "metric_ITLB misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_ITLB large page MPI", - "name-txn": "metric_ITLB large page misses per txn", + "name-txn": "metric_ITLB large page misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB load MPI", - "name-txn": "metric_DTLB load misses per txn", + "name-txn": "metric_DTLB load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB 4KB page load MPI", - "name-txn": "metric_DTLB 4KB page load misses per txn", + "name-txn": "metric_DTLB 4KB page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_4K] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB 2MB large page load MPI", - "name-txn": "metric_DTLB 2MB large page load misses per txn", + "name-txn": "metric_DTLB 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB 1GB large page load MPI", "name-txn": "metric_DTLB 1GB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]", + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_1G] / [TXN]", "origin": "perfspect" }, { "name": "metric_DTLB store MPI", - "name-txn": "metric_DTLB store misses per txn", + "name-txn": "metric_DTLB store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB load miss latency (in core clks)", @@ -461,4 +461,4 @@ "expression": "100 * (([UOPS_RETIRED.RETIRE_SLOTS] / [UOPS_ISSUED.ANY]) * [IDQ.MS_UOPS] / (4 * ([CPU_CLK_UNHALTED.THREAD_ANY] / [const_thread_count])))", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/metric_spr.json b/events/metric_spr.json index ff40466e..76e803e5 100644 --- a/events/metric_spr.json +++ b/events/metric_spr.json @@ -14,22 +14,22 @@ }, { "name": "metric_CPI", - "name-txn": "metric_cycles per txn", + "name-txn": "metric_cycles per txn", "expression": "[cpu-cycles] / [instructions]", - "expression-txn": "[cpu-cycles] / [TXN]" + "expression-txn": "[cpu-cycles] / [TXN]" }, { "name": "metric_kernel_CPI", - "name-txn": "metric_kernel_cycles per txn", + "name-txn": "metric_kernel_cycles per txn", "expression": "[cpu-cycles:k] / [instructions:k]", - "expression-txn": "[cpu-cycles:k] / [TXN]", + "expression-txn": "[cpu-cycles:k] / [TXN]", "origin": "perfspect" }, { "name": "metric_IPC", - "name-txn": "metric_txn per cycle", + "name-txn": "metric_txn per cycle", "expression": "[instructions] / [cpu-cycles]", - "expression-txn": "[TXN] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", "origin": "perfspect" }, { @@ -39,52 +39,52 @@ }, { "name": "metric_locks retired per instr", - "name-txn": "metric_locks retired per txn", + "name-txn": "metric_locks retired per txn", "expression": "[MEM_INST_RETIRED.LOCK_LOADS] / [instructions]", - "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", + "expression-txn": "[MEM_INST_RETIRED.LOCK_LOADS] / [TXN]", "origin": "perfmon website" }, { "name": "metric_L1D MPI (includes data+rfo w/ prefetches)", - "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", - "expression": "[L1D.REPLACEMENT] / [instructions]", - "expression-txn": "[L1D.REPLACEMENT] / [TXN]" + "name-txn": "metric_L1D misses per txn (includes data+rfo w/ prefetches)", + "expression": "[L1D.REPLACEMENT] / [instructions]", + "expression-txn": "[L1D.REPLACEMENT] / [TXN]" }, { "name": "metric_L1D demand data read hits per instr", - "name-txn": "metric_L1D demand data read hits per txn", + "name-txn": "metric_L1D demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L1_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L1_HIT] / [TXN]" }, { "name": "metric_L1-I code read misses (w/ prefetches) per instr", - "name-txn": "metric_L1I code read misses (includes prefetches) per txn", + "name-txn": "metric_L1I code read misses (includes prefetches) per txn", "expression": "[L2_RQSTS.ALL_CODE_RD] / [instructions]", - "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" + "expression-txn": "[L2_RQSTS.ALL_CODE_RD] / [TXN]" }, { "name": "metric_L2 demand data read hits per instr", - "name-txn": "metric_L2 demand data read hits per txn", + "name-txn": "metric_L2 demand data read hits per txn", "expression": "[MEM_LOAD_RETIRED.L2_HIT] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_HIT] / [TXN]" }, { "name": "metric_L2 MPI (includes code+data+rfo w/ prefetches)", - "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", + "name-txn": "metric_L2 misses per txn (includes code+data+rfo w/ prefetches)", "expression": "[L2_LINES_IN.ALL] / [instructions]", "expression-txn": "[L2_LINES_IN.ALL] / [TXN]" }, { "name": "metric_L2 demand data read MPI", - "name-txn": "metric_L2 demand data read misses per txn", + "name-txn": "metric_L2 demand data read misses per txn", "expression": "[MEM_LOAD_RETIRED.L2_MISS] / [instructions]", - "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" + "expression-txn": "[MEM_LOAD_RETIRED.L2_MISS] / [TXN]" }, { "name": "metric_L2 demand code MPI", - "name-txn": "metric_L2 demand code misses per txn", + "name-txn": "metric_L2 demand code misses per txn", "expression": "[L2_RQSTS.CODE_RD_MISS] / [instructions]", - "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" + "expression-txn": "[L2_RQSTS.CODE_RD_MISS] / [TXN]" }, { "name": "metric_UPI Data transmit BW (MB/sec) (only data)", @@ -142,28 +142,28 @@ }, { "name": "metric_LLC code read MPI (demand+prefetch)", - "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC code read (demand+prefetch) misses per txn", "expression": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [instructions]", - "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" + "expression-txn": "[UNC_CHA_TOR_INSERTS.IA_MISS_CRD] / [TXN]" }, { "name": "metric_LLC data read MPI (demand+prefetch)", - "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", + "name-txn": "metric_LLC data read (demand+prefetch) misses per txn", "expression": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [instructions]", - "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" + "expression-txn": "([UNC_CHA_TOR_INSERTS.IA_MISS_LLCPREFDATA] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD] + [UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PREF]) / [TXN]" }, { "name": "metric_LLC total HITM (per instr) (excludes LLC prefetches)", "name-txn": "metric_LLC total HITM per txn (excludes LLC prefetches)", - "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", + "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [instructions]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HITM] / [TXN]", "origin": "perfspect" }, { "name": "metric_LLC total HIT clean line forwards (per instr) (excludes LLC prefetches)", - "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", + "name-txn": "metric_LLC total HIT clean line forwards per txn (excludes LLC prefetches)", "expression": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [instructions]", - "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", + "expression-txn": "[OCR.READS_TO_CORE.REMOTE_CACHE.SNOOP_HIT_WITH_FWD] / [TXN]", "origin": "perfspect" }, { @@ -180,27 +180,27 @@ }, { "name": "metric_ITLB (2nd level) MPI", - "name-txn": "metric_ITLB (2nd level) misses per txn", + "name-txn": "metric_ITLB (2nd level) misses per txn", "expression": "[ITLB_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[ITLB_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) load MPI", - "name-txn": "metric_DTLB (2nd level) load misses per txn", + "name-txn": "metric_DTLB (2nd level) load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_DTLB (2nd level) 2MB large page load MPI", - "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", + "name-txn": "metric_DTLB (2nd level) 2MB large page load misses per txn", "expression": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [instructions]", - "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" + "expression-txn": "[DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M] / [TXN]" }, { "name": "metric_DTLB (2nd level) store MPI", "name-txn": "metric_DTLB (2nd level) store misses per txn", "expression": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [instructions]", - "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" + "expression-txn": "[DTLB_STORE_MISSES.WALK_COMPLETED] / [TXN]" }, { "name": "metric_NUMA %_Reads addressed to local DRAM", @@ -384,4 +384,4 @@ "expression": "(1 - [CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE] / [CPU_CLK_UNHALTED.REF_DISTRIBUTED]) if [SOCKET_COUNT] > 1 else 0", "origin": "perfspect" } -] +] \ No newline at end of file diff --git a/events/spr.txt b/events/spr.txt index 2a517aac..5702d1dc 100644 --- a/events/spr.txt +++ b/events/spr.txt @@ -3,7 +3,7 @@ # SPDX-License-Identifier: BSD-3-Clause ########################################################################################################### -# SapphireRapids event list (default) +# SapphireRapids event list cpu/event=0x51,umask=0x01,period=100003,name='L1D.REPLACEMENT'/, cpu/event=0x24,umask=0xe4,period=200003,name='L2_RQSTS.ALL_CODE_RD'/, diff --git a/src/prepare_perf_events.py b/src/prepare_perf_events.py index d51e028c..5ac21f75 100644 --- a/src/prepare_perf_events.py +++ b/src/prepare_perf_events.py @@ -63,6 +63,8 @@ def is_cpu_event(line): if ( (len(tmp_list) == 1 or tmp_list[0] == "cpu" or tmp_list[0].startswith("cstate")) and "OCR." not in line + and "uops_retired.ms" not in line + and "int_misc.unknown_branch_cycles" not in line and "power/" not in line ): return True @@ -119,30 +121,43 @@ def filter_events(event_file, cpu_only, PID_CID_mode, TMA_supported): collection_events = [] unsupported_events = [] perf_list = helper.get_perf_list() + seperate_cycles = [] + if cpu_only: + # since most CSP's hide cycles fixed PMU inside their VM's we put it in its own group + seperate_cycles = [ + "cpu-cycles,", + "cpu-cycles:k,", + "ref-cycles,", + "instructions;", + ] + + def process(line): + line = line.strip() + if line == "" or line.startswith("#") or (cpu_only and not is_cpu_event(line)): + return + if PID_CID_mode and line.startswith("cstate_"): + return + if not TMA_supported and ( + "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line + ): + return + if not is_collectable_event(line, perf_list): + # not a collectable event + unsupported_events.append(line) + # if this is the last event in the group, mark the previous event as the last (with a ';') + if line.endswith(";") and len(collection_events) > 1: + end_event = collection_events[-1] + collection_events[-1] = end_event[:-1] + ";" + else: + collection_events.append(line) + with open(event_file, "r") as fin: for line in fin: - line = line.strip() - if ( - line == "" - or line.startswith("#") - or (cpu_only and not is_cpu_event(line)) - ): - continue - if PID_CID_mode and line.startswith("cstate_"): + if cpu_only and "cpu-cycles" in line: continue - if not TMA_supported and ( - "name='TOPDOWN.SLOTS'" in line or "name='PERF_METRICS." in line - ): - continue - if not is_collectable_event(line, perf_list): - # not a collectable event - unsupported_events.append(line) - # if this is the last event in the group, mark the previous event as the last (with a ';') - if line.endswith(";") and len(collection_events) > 1: - end_event = collection_events[-1] - collection_events[-1] = end_event[:-1] + ";" - else: - collection_events.append(line) + process(line) + for line in seperate_cycles: + process(line) if len(unsupported_events) > 0: logging.warning( f"Perf unsupported events not counted: {unsupported_events}" From 644e4f600c532f85e7bcb3a3fdfdd5525d128c4a Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Fri, 11 Aug 2023 08:57:35 -0700 Subject: [PATCH 3/4] move acknowledgements and compress metric table --- README.md | 6 ++++-- src/base.html | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 45ceb49e..cb7955b4 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ [Quick Start](#quick-start-requires-perf-installed) | [Output](#output) | [Deploy in Kubernetes](#deploy-in-kubernetes) | [Requirements](#requirements) | [Build from source](#build-from-source) -PerfSpect is a system performance characterization tool built on top of linux perf. Most metrics and events come from [perfmon](https://github.com/intel/perfmon) and [TMA v4.5](https://www.intel.com/content/www/us/en/docs/vtune-profiler/cookbook/2023-1/top-down-microarchitecture-analysis-method.html). It contains two parts: +PerfSpect is a system performance characterization tool built on top of linux perf. It contains two parts: perf-collect: Collects hardware events at a 5 second output interval with practically zero overhead since PMU's run in counting mode. @@ -90,4 +90,6 @@ Requires recent python. On successful build, binaries will be created in `dist` ``` pip3 install -r requirements.txt make -``` \ No newline at end of file +``` + +_Note: Most metrics and events come from [perfmon](https://github.com/intel/perfmon) and [TMA v4.5](https://www.intel.com/content/www/us/en/docs/vtune-profiler/cookbook/2023-1/top-down-microarchitecture-analysis-method.html)_ diff --git a/src/base.html b/src/base.html index 6aef7209..4cd99809 100644 --- a/src/base.html +++ b/src/base.html @@ -769,7 +769,7 @@ TMA metrics are a hierarchy where each sub-metric contains more periods "..." to designate its depth in the tree -
    +
    Value @@ -827,4 +827,4 @@ - \ No newline at end of file + From bd2e920cecfdb068ae69b5a72ff46ed972e11f17 Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Wed, 16 Aug 2023 08:29:08 -0700 Subject: [PATCH 4/4] adding mini sierra forest --- Makefile | 1 + events/metric_srf.json | 40 ++++++++++++++++++++++++++++++++++++++++ events/srf.txt | 22 ++++++++++++++++++++++ perf-collect.py | 3 +++ perf-collect.spec | 2 +- perf-postprocess.py | 2 ++ src/perf_helpers.py | 8 ++++++-- 7 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 events/metric_srf.json create mode 100644 events/srf.txt diff --git a/Makefile b/Makefile index 0a1b0e19..2837cba5 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,7 @@ build-public/postprocess: --add-data "./events/metric_bdx.json:." \ --add-data "./events/metric_icx.json:." \ --add-data "./events/metric_spr.json:." \ + --add-data "./events/metric_srf.json:." \ --add-data "./src/base.html:." \ --runtime-tmpdir . \ --exclude-module readline diff --git a/events/metric_srf.json b/events/metric_srf.json new file mode 100644 index 00000000..1f38f6e5 --- /dev/null +++ b/events/metric_srf.json @@ -0,0 +1,40 @@ +[ + { + "name": "metric_CPU operating frequency (in GHz)", + "expression": "(([cpu-cycles] / [ref-cycles] * [SYSTEM_TSC_FREQ]) / 1000000000)" + }, + { + "name": "metric_CPU utilization %", + "expression": "100 * [ref-cycles] / [TSC]" + }, + { + "name": "metric_CPU utilization% in kernel mode", + "expression": "100 * [ref-cycles:k] / [TSC]", + "origin": "perfspect" + }, + { + "name": "metric_CPI", + "name-txn": "metric_cycles per txn", + "expression": "[cpu-cycles] / [instructions]", + "expression-txn": "[cpu-cycles] / [TXN]" + }, + { + "name": "metric_kernel_CPI", + "name-txn": "metric_kernel_cycles per txn", + "expression": "[cpu-cycles:k] / [instructions:k]", + "expression-txn": "[cpu-cycles:k] / [TXN]", + "origin": "perfspect" + }, + { + "name": "metric_IPC", + "name-txn": "metric_txn per cycle", + "expression": "[instructions] / [cpu-cycles]", + "expression-txn": "[TXN] / [cpu-cycles]", + "origin": "perfspect" + }, + { + "name": "metric_giga_instructions_per_sec", + "expression": "[instructions] / 1000000000", + "origin": "perfspect" + } +] \ No newline at end of file diff --git a/events/srf.txt b/events/srf.txt new file mode 100644 index 00000000..b57637da --- /dev/null +++ b/events/srf.txt @@ -0,0 +1,22 @@ +########################################################################################################### +# Copyright (C) 2021-2023 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause +########################################################################################################### + +# SierraForest event list + +cpu-cycles, +ref-cycles, +instructions; + +cpu-cycles:k, +ref-cycles:k, +instructions:k; + +#C6 +cstate_core/c6-residency/; +cstate_pkg/c6-residency/; + +#power +power/energy-pkg/, +power/energy-ram/; \ No newline at end of file diff --git a/perf-collect.py b/perf-collect.py index 3a37f3f0..3a9ee612 100644 --- a/perf-collect.py +++ b/perf-collect.py @@ -27,6 +27,7 @@ "Icelake", "SapphireRapids", "EmeraldRapids", + "SierraForest", ] @@ -318,6 +319,8 @@ def validate_file(fname): elif arch == "emeraldrapids": eventfile = "spr.txt" have_uncore = False + elif arch == "sierraforest": + eventfile = "srf.txt" if eventfile is None: crash(f"failed to match architecture ({arch}) to event file name.") diff --git a/perf-collect.spec b/perf-collect.spec index 4de6f7a0..da16ddee 100644 --- a/perf-collect.spec +++ b/perf-collect.spec @@ -7,7 +7,7 @@ block_cipher = None a = Analysis( ['perf-collect.py'], pathex=[], - datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr.txt', '.')], + datas=[('./src/libtsc.so', '.'), ('./events/bdx.txt', '.'), ('./events/clx_skx.txt', '.'), ('./events/icx.txt', '.'), ('./events/spr.txt', '.'), ('./events/srf.txt', '.')], hiddenimports=[], hookspath=[], hooksconfig={}, diff --git a/perf-postprocess.py b/perf-postprocess.py index 49360bf0..2f63880d 100644 --- a/perf-postprocess.py +++ b/perf-postprocess.py @@ -416,6 +416,8 @@ def get_metric_file_name(microarchitecture): metric_file = "metric_icx.json" elif microarchitecture == "sapphirerapids" or microarchitecture == "emeraldrapids": metric_file = "metric_spr.json" + elif microarchitecture == "sierraforest": + metric_file = "metric_srf.json" else: crash("Suitable metric file not found") diff --git a/src/perf_helpers.py b/src/perf_helpers.py index 87eecd35..111b6900 100644 --- a/src/perf_helpers.py +++ b/src/perf_helpers.py @@ -158,7 +158,8 @@ def disable_nmi_watchdog(): logging.info("nmi_watchdog disabled!") return nmi_watchdog_status except subprocess.CalledProcessError as e: - crash(e.output + "\nFailed to disable nmi_watchdog.") + logging.warning(e) + logging.warning("Failed to disable nmi_watchdog.") except ValueError as e: crash(f"Failed to disable watchdog: {e}") @@ -175,7 +176,8 @@ def enable_nmi_watchdog(): else: logging.info("nmi_watchdog enabled!") except subprocess.CalledProcessError as e: - logging.warning(e.output + "\nFailed to re-enable nmi_watchdog!") + logging.warning(e.output) + logging.warning("Failed to re-enable nmi_watchdog!") except ValueError as e: logging.warning(f"Failed to re-enable nmi_watchdog: {e}") @@ -281,6 +283,8 @@ def get_arch_and_name(procinfo): arch = "sapphirerapids" elif model == 207 and cpufamily == 6: arch = "emeraldrapids" + elif model == 175 and cpufamily == 6: + arch = "sierraforest" return arch, modelname