CLX: Release v1.19 event files

This commit releases CLX v1.19 events and updates mapfile.csv accordingly.
intel · Jun 12, 2023 · e4f8353 · e4f8353
1 parent 19ba4e1
commit e4f8353
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 28 deletions.
diff --git a/CLX/events/cascadelakex_core.json b/CLX/events/cascadelakex_core.json
@@ -1,9 +1,9 @@
 {
   "Header": {
     "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
-    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
-    "DatePublished": "04/17/2023",
-    "Version": "1.18",
+    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
+    "DatePublished": "06/07/2023",
+    "Version": "1.19",
     "Legend": ""
   },
   "Events": [
@@ -2579,8 +2579,8 @@
       "EventCode": "0x79",
       "UMask": "0x18",
       "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
-      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
-      "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.DSB_CYCLES_OK]",
+      "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_OK]",
       "Counter": "0,1,2,3",
       "CounterHTOff": "0,1,2,3,4,5,6,7",
       "SampleAfterValue": "2000003",
@@ -2603,8 +2603,56 @@
       "EventCode": "0x79",
       "UMask": "0x18",
       "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
-      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
-      "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.DSB_CYCLES_ANY]",
+      "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.DSB_CYCLES_ANY]",
+      "Counter": "0,1,2,3",
+      "CounterHTOff": "0,1,2,3,4,5,6,7",
+      "SampleAfterValue": "2000003",
+      "MSRIndex": "0",
+      "MSRValue": "0",
+      "TakenAlone": "0",
+      "CounterMask": "1",
+      "Invert": "0",
+      "AnyThread": "0",
+      "EdgeDetect": "0",
+      "PEBS": "0",
+      "Data_LA": "0",
+      "L1_Hit_Indication": "0",
+      "Errata": "null",
+      "ELLC": "0",
+      "Offcore": "0",
+      "Deprecated": "0"
+    },
+    {
+      "EventCode": "0x79",
+      "UMask": "0x18",
+      "EventName": "IDQ.DSB_CYCLES_OK",
+      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+      "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_4_UOPS]",
+      "Counter": "0,1,2,3",
+      "CounterHTOff": "0,1,2,3,4,5,6,7",
+      "SampleAfterValue": "2000003",
+      "MSRIndex": "0",
+      "MSRValue": "0",
+      "TakenAlone": "0",
+      "CounterMask": "4",
+      "Invert": "0",
+      "AnyThread": "0",
+      "EdgeDetect": "0",
+      "PEBS": "0",
+      "Data_LA": "0",
+      "L1_Hit_Indication": "0",
+      "Errata": "null",
+      "ELLC": "0",
+      "Offcore": "0",
+      "Deprecated": "0"
+    },
+    {
+      "EventCode": "0x79",
+      "UMask": "0x18",
+      "EventName": "IDQ.DSB_CYCLES_ANY",
+      "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
+      "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ. [This event is alias to IDQ.ALL_DSB_CYCLES_ANY_UOPS]",
       "Counter": "0,1,2,3",
       "CounterHTOff": "0,1,2,3,4,5,6,7",
       "SampleAfterValue": "2000003",
@@ -2843,8 +2891,32 @@
       "EventCode": "0x83",
       "UMask": "0x04",
       "EventName": "ICACHE_64B.IFTAG_STALL",
-      "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
-      "PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+      "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+      "PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_TAG.STALLS]",
+      "Counter": "0,1,2,3",
+      "CounterHTOff": "0,1,2,3,4,5,6,7",
+      "SampleAfterValue": "200003",
+      "MSRIndex": "0",
+      "MSRValue": "0",
+      "TakenAlone": "0",
+      "CounterMask": "0",
+      "Invert": "0",
+      "AnyThread": "0",
+      "EdgeDetect": "0",
+      "PEBS": "0",
+      "Data_LA": "0",
+      "L1_Hit_Indication": "0",
+      "Errata": "null",
+      "ELLC": "0",
+      "Offcore": "0",
+      "Deprecated": "0"
+    },
+    {
+      "EventCode": "0x83",
+      "UMask": "0x04",
+      "EventName": "ICACHE_TAG.STALLS",
+      "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
+      "PublicDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss. [This event is alias to ICACHE_64B.IFTAG_STALL]",
       "Counter": "0,1,2,3",
       "CounterHTOff": "0,1,2,3,4,5,6,7",
       "SampleAfterValue": "200003",
@@ -3059,8 +3131,32 @@
       "EventCode": "0x87",
       "UMask": "0x01",
       "EventName": "ILD_STALL.LCP",
-      "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
-      "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+      "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to DECODE.LCP]",
+      "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to DECODE.LCP]",
+      "Counter": "0,1,2,3",
+      "CounterHTOff": "0,1,2,3,4,5,6,7",
+      "SampleAfterValue": "2000003",
+      "MSRIndex": "0",
+      "MSRValue": "0",
+      "TakenAlone": "0",
+      "CounterMask": "0",
+      "Invert": "0",
+      "AnyThread": "0",
+      "EdgeDetect": "0",
+      "PEBS": "0",
+      "Data_LA": "0",
+      "L1_Hit_Indication": "0",
+      "Errata": "null",
+      "ELLC": "0",
+      "Offcore": "0",
+      "Deprecated": "0"
+    },
+    {
+      "EventCode": "0x87",
+      "UMask": "0x01",
+      "EventName": "DECODE.LCP",
+      "BriefDescription": "Stalls caused by changing prefix length of the instruction. [This event is alias to ILD_STALL.LCP]",
+      "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. [This event is alias to ILD_STALL.LCP]",
       "Counter": "0,1,2,3",
       "CounterHTOff": "0,1,2,3,4,5,6,7",
       "SampleAfterValue": "2000003",
@@ -3923,8 +4019,32 @@
       "EventCode": "0xA8",
       "UMask": "0x01",
       "EventName": "LSD.CYCLES_4_UOPS",
-      "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
-      "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+      "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_OK]",
+      "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_OK]",
+      "Counter": "0,1,2,3",
+      "CounterHTOff": "0,1,2,3,4,5,6,7",
+      "SampleAfterValue": "2000003",
+      "MSRIndex": "0",
+      "MSRValue": "0x00",
+      "TakenAlone": "0",
+      "CounterMask": "4",
+      "Invert": "0",
+      "AnyThread": "0",
+      "EdgeDetect": "0",
+      "PEBS": "0",
+      "Data_LA": "0",
+      "L1_Hit_Indication": "0",
+      "Errata": "null",
+      "ELLC": "0",
+      "Offcore": "0",
+      "Deprecated": "0"
+    },
+    {
+      "EventCode": "0xA8",
+      "UMask": "0x01",
+      "EventName": "LSD.CYCLES_OK",
+      "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder. [This event is alias to LSD.CYCLES_4_UOPS]",
+      "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector). [This event is alias to LSD.CYCLES_4_UOPS]",
       "Counter": "0,1,2,3",
       "CounterHTOff": "0,1,2,3,4,5,6,7",
       "SampleAfterValue": "2000003",

diff --git a/CLX/events/cascadelakex_fp_arith_inst.json b/CLX/events/cascadelakex_fp_arith_inst.json
@@ -1,9 +1,9 @@
 {
   "Header": {
     "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
-    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
-    "DatePublished": "04/17/2023",
-    "Version": "1.18",
+    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
+    "DatePublished": "06/07/2023",
+    "Version": "1.19",
     "Legend": ""
   },
   "Events": [

diff --git a/CLX/events/cascadelakex_uncore.json b/CLX/events/cascadelakex_uncore.json
@@ -1,9 +1,9 @@
 {
   "Header": {
     "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
-    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
-    "DatePublished": "04/17/2023",
-    "Version": "1.18",
+    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
+    "DatePublished": "06/07/2023",
+    "Version": "1.19",
     "Legend": ""
   },
   "Events": [
@@ -1726,7 +1726,7 @@
       "UMaskExt": "0x00",
       "EventName": "UNC_M_WPQ_OCCUPANCY",
       "BriefDescription": "Write Pending Queue Occupancy",
-      "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle.  This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations).  The WPQ is used to schedule writes out to the memory controller and to track the requests.  Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller).  They deallocate after being issued to DRAM.  Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.  This is not to be confused with actually performing the write to DRAM.  Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies.  So, we provide filtering based on if the request has posted or not.  By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA.  The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory.  High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???",
+      "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle.  This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations).  The WPQ is used to schedule writes out to the memory controller and to track the requests.  Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller).  They deallocate after being issued to DRAM.  Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.  This is not to be confused with actually performing the write to DRAM.  Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies.  So, we provide filtering based on if the request has posted or not.  By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA.  The 'posted' filter, on the other hand, provides information about how much queueing is actually happening in the iMC for writes before they are actually issued to memory.  High average occupancies will generally coincide with high write major mode counts.",
       "Counter": "0,1,2,3",
       "MSRValue": "0x00",
       "ELLC": "0",

diff --git a/CLX/events/cascadelakex_uncore_experimental.json b/CLX/events/cascadelakex_uncore_experimental.json
@@ -1,9 +1,9 @@
 {
   "Header": {
     "Copyright": "Copyright (c) 2001 - 2023 Intel Corporation. All rights reserved.",
-    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.18",
-    "DatePublished": "04/17/2023",
-    "Version": "1.18",
+    "Info": "Performance Monitoring Events for 2nd Generation Intel(R) Xeon(R) Processor Scalable Family based on Cascade Lake product - V1.19",
+    "DatePublished": "06/07/2023",
+    "Version": "1.19",
     "Legend": ""
   },
   "Events": [
@@ -48958,7 +48958,7 @@
       "UMaskExt": "0x00",
       "EventName": "UNC_M3UPI_RxC_HELD.PARALLEL_SUCCESS",
       "BriefDescription": "Message Held; Parallel Success",
-      "PublicDescription": "ad and bl messages were actually slotted into the same flit in paralle",
+      "PublicDescription": "ad and bl messages were actually slotted into the same flit in parallel",
       "Counter": "0,1,2",
       "MSRValue": "0x00",
       "ELLC": "0",

diff --git a/mapfile.csv b/mapfile.csv
@@ -90,10 +90,10 @@ GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_core.json,core,,,
 GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_fp_arith_inst.json,fp_arith_inst,,,
 GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_uncore.json,uncore,,,
 GenuineIntel-6-55-[01234],V1.30,/SKX/events/skylakex_uncore_experimental.json,uncore experimental,,,
-GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_core.json,core,,,
-GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_fp_arith_inst.json,fp_arith_inst,,,
-GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_uncore.json,uncore,,,
-GenuineIntel-6-55-[56789ABCDEF],V1.18,/CLX/events/cascadelakex_uncore_experimental.json,uncore experimental,,,
+GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_core.json,core,,,
+GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_fp_arith_inst.json,fp_arith_inst,,,
+GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_uncore.json,uncore,,,
+GenuineIntel-6-55-[56789ABCDEF],V1.19,/CLX/events/cascadelakex_uncore_experimental.json,uncore experimental,,,
 GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_core.json,core,,,
 GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_fp_arith_inst.json,fp_arith_inst,,,
 GenuineIntel-6-7A,V1.01,/GLP/events/goldmontplus_matrix.json,offcore,,,