Skip to content

Commit

Permalink
CLX: Release v1.19 event files
Browse files Browse the repository at this point in the history
This commit releases CLX v1.19 events and updates mapfile.csv
accordingly.
  • Loading branch information
edwarddavidbaker committed Jun 12, 2023
1 parent 19ba4e1 commit e4f8353
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 28 deletions.
146 changes: 133 additions & 13 deletions CLX/events/cascadelakex_core.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"Header": {
"Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
"DatePublished": "04/17/2023",
"Version": "1.18",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
"DatePublished": "06/07/2023",
"Version": "1.19",
"Legend": ""
},
"Events": [
Expand Down Expand Up @@ -2579,8 +2579,8 @@
"EventCode": "0x79",
"UMask": "0x18",
"EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
"PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
"PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
Expand All @@ -2603,8 +2603,56 @@
"EventCode": "0x79",
"UMask": "0x18",
"EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
"PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]",
"PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"MSRIndex": "0",
"MSRValue": "0",
"TakenAlone": "0",
"CounterMask": "1",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"Data_LA": "0",
"L1_Hit_Indication": "0",
"Errata": "null",
"ELLC": "0",
"Offcore": "0",
"Deprecated": "0"
},
{
"EventCode": "0x79",
"UMask": "0x18",
"EventName": "IDQ.DSB_CYCLES_OK",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"MSRIndex": "0",
"MSRValue": "0",
"TakenAlone": "0",
"CounterMask": "4",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"Data_LA": "0",
"L1_Hit_Indication": "0",
"Errata": "null",
"ELLC": "0",
"Offcore": "0",
"Deprecated": "0"
},
{
"EventCode": "0x79",
"UMask": "0x18",
"EventName": "IDQ.DSB_CYCLES_ANY",
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
"PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
Expand Down Expand Up @@ -2843,8 +2891,32 @@
"EventCode": "0x83",
"UMask": "0x04",
"EventName": "ICACHE_64B.IFTAG_STALL",
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "200003",
"MSRIndex": "0",
"MSRValue": "0",
"TakenAlone": "0",
"CounterMask": "0",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"Data_LA": "0",
"L1_Hit_Indication": "0",
"Errata": "null",
"ELLC": "0",
"Offcore": "0",
"Deprecated": "0"
},
{
"EventCode": "0x83",
"UMask": "0x04",
"EventName": "ICACHE_TAG.STALLS",
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
"PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "200003",
Expand Down Expand Up @@ -3059,8 +3131,32 @@
"EventCode": "0x87",
"UMask": "0x01",
"EventName": "ILD_STALL.LCP",
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
"PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"MSRIndex": "0",
"MSRValue": "0",
"TakenAlone": "0",
"CounterMask": "0",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"Data_LA": "0",
"L1_Hit_Indication": "0",
"Errata": "null",
"ELLC": "0",
"Offcore": "0",
"Deprecated": "0"
},
{
"EventCode": "0x87",
"UMask": "0x01",
"EventName": "DECODE.LCP",
"BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
"PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
Expand Down Expand Up @@ -3923,8 +4019,32 @@
"EventCode": "0xA8",
"UMask": "0x01",
"EventName": "LSD.CYCLES_4_UOPS",
"BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
"PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
"BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]",
"PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"MSRIndex": "0",
"MSRValue": "0x00",
"TakenAlone": "0",
"CounterMask": "4",
"Invert": "0",
"AnyThread": "0",
"EdgeDetect": "0",
"PEBS": "0",
"Data_LA": "0",
"L1_Hit_Indication": "0",
"Errata": "null",
"ELLC": "0",
"Offcore": "0",
"Deprecated": "0"
},
{
"EventCode": "0xA8",
"UMask": "0x01",
"EventName": "LSD.CYCLES_OK",
"BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]",
"PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
Expand Down
6 changes: 3 additions & 3 deletions CLX/events/cascadelakex_fp_arith_inst.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"Header": {
"Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
"DatePublished": "04/17/2023",
"Version": "1.18",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
"DatePublished": "06/07/2023",
"Version": "1.19",
"Legend": ""
},
"Events": [
Expand Down
8 changes: 4 additions & 4 deletions CLX/events/cascadelakex_uncore.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"Header": {
"Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
"DatePublished": "04/17/2023",
"Version": "1.18",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
"DatePublished": "06/07/2023",
"Version": "1.19",
"Legend": ""
},
"Events": [
Expand Down Expand Up @@ -1726,7 +1726,7 @@
"UMaskExt": "0x00",
"EventName": "UNC_M_WPQ_OCCUPANCY",
"BriefDescription": "Write Pending Queue Occupancy",
"PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???",
"PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts.",
"Counter": "0,1,2,3",
"MSRValue": "0x00",
"ELLC": "0",
Expand Down
8 changes: 4 additions & 4 deletions CLX/events/cascadelakex_uncore_experimental.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"Header": {
"Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
"DatePublished": "04/17/2023",
"Version": "1.18",
"Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
"DatePublished": "06/07/2023",
"Version": "1.19",
"Legend": ""
},
"Events": [
Expand Down Expand Up @@ -48958,7 +48958,7 @@
"UMaskExt": "0x00",
"EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
"BriefDescription": "Message Held; Parallel Success",
"PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle",
"PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel",
"Counter": "0,1,2",
"MSRValue": "0x00",
"ELLC": "0",
Expand Down
8 changes: 4 additions & 4 deletions mapfile.csv
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_core.json,core,,,
GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_fp_arith_inst.json,fp_arith_inst,,,
GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_uncore.json,uncore,,,
GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_uncore_experimental.json,uncore experimental,,,
GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_core.json,core,,,
GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_fp_arith_inst.json,fp_arith_inst,,,
GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_uncore.json,uncore,,,
GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_uncore_experimental.json,uncore experimental,,,
GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_core.json,core,,,
GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_fp_arith_inst.json,fp_arith_inst,,,
GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_uncore.json,uncore,,,
GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_uncore_experimental.json,uncore experimental,,,
GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_core.json,core,,,
GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_fp_arith_inst.json,fp_arith_inst,,,
GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_matrix.json,offcore,,,
Expand Down

0 comments on commit e4f8353

Please sign in to comment.