Enable Arrow Lake and Lunar Lake platforms support

.
intel · Feb 20, 2024 · 4147c5d · 4147c5d
1 parent c3718d7
commit 4147c5d
Show file tree

Hide file tree

Showing 15 changed files with 752 additions and 41 deletions.
diff --git a/clang/compute_sdk/docs/cmcuserguide/cmcuserguide.rst b/clang/compute_sdk/docs/cmcuserguide/cmcuserguide.rst
@@ -1,6 +1,6 @@
 .. ========================= begin_copyright_notice ============================
 
-  Copyright (C) 2021-2023 Intel Corporation
+  Copyright (C) 2021-2024 Intel Corporation
 
   SPDX-License-Identifier: MIT
 
@@ -60,29 +60,31 @@ For Gen targets only, macro of the form CM_GEN\ *n* is defined (without a value)
 For Xe targets special macros are predefined with Xe name, like CM_XEHP.
 The targets supported by cmc, and the corresponding macros, are given in the table below.
 
-========= ======== =========== ============= ===================
-Gen/Xe    Name     Macro       CM_GENX value CM_GENX_REVID value
-========= ======== =========== ============= ===================
-GEN7_5    HSW      CM_GEN7_5   750           0
-GEN8      BDW      CM_GEN8     800           0
-GEN8_5    CHV      CM_GEN8_5   850           0
-GEN9      SKL      CM_GEN9     900           0
-..        BXT      CM_GEN9     920           0
-GEN9_5    KBL      CM_GEN9_5   950           0
-..        GLK      CM_GEN9_5   970           0
-GEN11     ICLLP    CM_GEN11    1150          0
-GEN12     TGLLP    CM_GEN12    1200          0
-...       RKL      CM_GEN12    1201          0
-...       DG1      CM_GEN12    1210          0
-...       ADLP     CM_GEN12    1220          0
-...       ADLS     CM_GEN12    1230          0
-...       ADLN     CM_GEN12    1240          0
-XEHP_SDV  XEHP_SDV CM_XEHP     1270          0
-XeHPG     DG2      CM_XEHPG    1271          0
-XeLPG     MTL      CM_XELPG    1275          0
-XeHPC     PVC      CM_XEHPC    1280          0
-...       PVCXT    CM_XEHPC    1280          5
-========= ======== =========== ============= ===================
+========= ======== ============ ============= ===================
+Gen/Xe    Name     Macro        CM_GENX value CM_GENX_REVID value
+========= ======== ============ ============= ===================
+GEN7_5    HSW      CM_GEN7_5    750           0
+GEN8      BDW      CM_GEN8      800           0
+GEN8_5    CHV      CM_GEN8_5    850           0
+GEN9      SKL      CM_GEN9      900           0
+..        BXT      CM_GEN9      920           0
+GEN9_5    KBL      CM_GEN9_5    950           0
+..        GLK      CM_GEN9_5    970           0
+GEN11     ICLLP    CM_GEN11     1150          0
+GEN12     TGLLP    CM_GEN12     1200          0
+...       RKL      CM_GEN12     1201          0
+...       DG1      CM_GEN12     1210          0
+...       ADLP     CM_GEN12     1220          0
+...       ADLS     CM_GEN12     1230          0
+...       ADLN     CM_GEN12     1240          0
+XEHP_SDV  XEHP_SDV CM_XEHP      1270          0
+XeHPG     DG2      CM_XEHPG     1271          0
+XeLPG     MTL      CM_XELPG     1275          0
+XeLPG+    ARL-H    CM_XELPGPLUS 1276          0
+XeHPC     PVC      CM_XEHPC     1280          0
+...       PVCXT    CM_XEHPC     1280          5
+Xe2LPG    LNL      CM_XE2_LPG   1295          0
+========= ======== ============ ============= ===================
 
 
 Also you may use CM_GENX_REVID to query revision id for given platform if

diff --git a/clang/compute_sdk/docs/cmlangspec/cmlangspec.rst b/clang/compute_sdk/docs/cmlangspec/cmlangspec.rst
@@ -2152,6 +2152,213 @@ ATOMIC_PREDEC           old_dst - 1                             new_dst
 ----------------------
 
 
+Typed LSC Surface load/store/prefetch {Xe2+}
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+cm_load4_typed
+""""""""""""""
+
+.. code-block:: c++
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_load4_typed(matrix_ref<T, N, M> Data, SurfaceIndex Surface,
+                      vector<unsigned, M> U, vector<unsigned, M> V = 0,
+                      vector<unsigned, M> R = 0, vector<unsigned, M> LOD = 0);
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_load4_typed(matrix_ref<T, N, M> Data, vector<uint16_t, M> Pred,
+                      SurfaceIndex Surface, vector<unsigned, M> U,
+                      vector<unsigned, M> V = 0, vector<unsigned, M> R = 0,
+                      vector<unsigned, M> LOD = 0);
+
+The compiler generates code for the hardware to perform gathering read from
+the given offsets. The results are returned in ``Data`` matrix with each
+enabled channel returned in the next row of the matrix. The enabled channels
+are returned in RGBA order with no gap for disabled channels. Out-of-bound
+load operations return zero.
+
+=============== ============================================================
+Parameter       Description
+=============== ============================================================
+ChannelMask
+                Mask for enabled channels.
+
+L1Hint, L2Hint
+                Load cache hints. Not all the combinations are supported.
+                The compiler will emit an error when invalid hints are set.
+                Optional arguments. Should be both present or both omitted.
+
+Data
+                The matrix_ref object to store return values. The ``T`` type
+                must be of size of DWord (i.e., int, unsigned or float).
+
+                The size ``N`` must be equal to the number of enabled
+                channels.
+
+                The size ``M`` may have any value. HW supports native SIMD
+                sizes of 16 or 32. All other sizes will be split and/or
+                extended by the compiler into a sequence of SIMD16 and
+                SIMD32 load operations.
+
+Pred (optional)
+                The execution predicate for conditional load.
+
+Surface
+                Surface index corresponding to 1D, 2D or 3D image surface.
+
+U
+                The x coordinates of the data elements (in unit of texels)
+                to be loaded from the surface.
+
+V
+                The y coordinates of the data elements to be loaded from
+                non-1D images. Ignored otherwise. Optional, default value
+                is zero.
+
+R
+                The z coordinates of the data elements to be loaded from
+                3D images. Ignored otherwise. Optional, default value is
+                zero.
+
+LOD
+                The w coordinates of the data elements to be loaded from
+                images. Ignored otherwise. Optional, default value is zero.
+=============== ============================================================
+
+cm_store4_typed
+"""""""""""""""
+
+.. code-block:: c++
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_store4_typed(matrix<T, N, M> Data, SurfaceIndex Surface,
+                       vector<unsigned, M> U, vector<unsigned, M> V = 0,
+                       vector<unsigned, M> R = 0, vector<unsigned, M> LOD = 0);
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_store4_typed(matrix<T, N, M> Data, vector<uint16_t, M> Pred,
+                       SurfaceIndex Surface, vector<unsigned, M> U,
+                       vector<unsigned, M> V = 0, vector<unsigned, M> R = 0,
+                       vector<unsigned, M> LOD = 0);
+
+The compiler generates code for the hardware to perform scattered write to
+the given offsets. If a required pixel channel (as per Surface State pixel
+format) is missing from the channel mask setting, then the memory write
+data will be undefined. In other words, the channel mask setting must have
+all channels that are present in the Surface's pixel format in memory.
+Out-of-bound write operations are ignored by hardware and have no side
+effects.
+
+=============== ============================================================
+Parameter       Description
+=============== ============================================================
+ChannelMask
+                Mask for enabled channels. Only contiguous channel masks are
+                supported (R, GR, BGR and ABGR).
+
+L1Hint, L2Hint
+                Store cache hints. Not all the combinations are supported.
+                The compiler will emit an error when invalid hints are set.
+                Optional arguments. Should be both present or both omitted.
+
+Data
+                The matrix holding data to be written. The ``T`` type must
+                be of size of DWord (i.e., int, unsigned or float).
+
+                The size ``N`` must be equal to the number of enabled
+                channels.
+
+                The size ``M`` may have any value. HW supports native SIMD
+                sizes of 16 or 32. All other sizes will be split and/or
+                extended by the compiler into a sequence of SIMD16 and
+                SIMD32 store operations.
+
+Pred (optional)
+                The execution predicate for conditional store.
+
+Surface
+                Surface index corresponding to 1D, 2D or 3D image surface.
+
+U
+                The x coordinates of the data elements (in unit of texels)
+                to be loaded from the surface.
+
+V
+                The y coordinates of the data elements to be stored into
+                non-1D images. Ignored otherwise. Optional, default value
+                is zero.
+
+R
+                The z coordinates of the data elements to be stored into
+                3D images. Ignored otherwise. Optional, default value is
+                zero.
+
+LOD
+                The w coordinates of the data elements to be stored into
+                images. Ignored otherwise. Optional, default value is zero.
+=============== ============================================================
+
+cm_prefetch4_typed
+""""""""""""""""""
+
+.. code-block:: c++
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_prefetch4_typed(SurfaceIndex Surface,
+                          vector<unsigned, M> U, vector<unsigned, M> V = 0,
+                          vector<unsigned, M> R = 0, vector<unsigned, M> LOD = 0);
+
+  template <ChannelMaskType ChannelMask, CacheHint L1Hint = Default,
+            CacheHint L2Hint = Default>
+  void cm_prefetch4_typed(vector<uint16_t, M> Pred, SurfaceIndex Surface,
+                          vector<unsigned, M> U, vector<unsigned, M> V = 0,
+                          vector<unsigned, M> R = 0, vector<unsigned, M> LOD = 0);
+
+The compiler generates code for the hardware to perform gathering prefetch from
+the given offsets to L1 and/or L2 cache. No results returned. This operation
+has no side effects except subsequent loads latency.
+
+=============== ============================================================
+Parameter       Description
+=============== ============================================================
+ChannelMask
+                Mask for enabled channels.
+
+L1Hint, L2Hint
+                Prefech cache hints. Not all the combinations are supported.
+                The compiler will emit an error when invalid hints are set.
+                Must be present.
+
+Pred (optional)
+                The execution predicate for conditional load.
+
+Surface
+                Surface index corresponding to 1D, 2D or 3D image surface.
+
+U
+                The x coordinates of the data elements (in unit of texels)
+                to be fetched from the surface.
+
+V
+                The y coordinates of the data elements to be fetched from
+                non-1D images. Ignored otherwise. Optional, default value
+                is zero.
+
+R
+                The z coordinates of the data elements to be fetched from
+                3D images. Ignored otherwise. Optional, default value is
+                zero.
+
+LOD
+                The w coordinates of the data elements to be fetched from
+                images. Ignored otherwise. Optional, default value is zero.
+=============== ============================================================
+
 Media Block Read/Write
 ^^^^^^^^^^^^^^^^^^^^^^
 
@@ -3840,12 +4047,12 @@ in the order they are expected.
    ============================================ =======================================================
    CM_3D_LOAD                                   u, v, lod, r
    CM_3D_LOAD_LZ                                u, v, r
+   CM_3D_LOAD_L {Xe2+}                          u, v, r, lod
    CM_3D_LOAD_2DMS_W (type uint)                si, mcsl, mcsh, u, v, r, lod
    CM_3D_LOAD_2DMS_W (type ushort) {ICL+}       si, mcs0, mcs1, mcs2, mcs3, u, v, r, lod
    CM_3D_LOAD_MCS                               u, v, r, lod
    ============================================ =======================================================
 
-
 Each of the values above can be composed with CM_3D_LOAD_NULLMASK_ENABLE
 using the | operation to enable a variant of the operation that also
 returns pixel null mask information.
@@ -3860,11 +4067,11 @@ The following table lists surface types supported for each operation:
    ==================== =======================
    CM_3D_LOAD           1D, 2D, 3D, BUFFER
    CM_3D_LOAD_LZ        1D, 2D, 3D, BUFFER
+   CM_3D_LOAD_L         1D, 2D, 3D, BUFFER
    CM_3D_LOAD_2DMS_W    2D
    CM_3D_LOAD_MCS       2D
    ==================== =======================
 
-
 The following table lists how the common "u", "v", "r", and "ai" arguments are interpreted
 against different surface types:
 

diff --git a/clang/lib/Basic/Targets/GenXPlatforms.cpp b/clang/lib/Basic/Targets/GenXPlatforms.cpp
@@ -34,6 +34,66 @@ struct TargetProperties {
 
 // clang-format off
 static const std::unordered_map<uint32_t, TargetProperties> TargetProps = {
+  { encodeGmdId(20, 4, 4),
+    { /*.HasFP64 =*/ true,
+      /*.HasBFloat16 =*/ true,
+      /*.HasSLMCasInt64 =*/ true,
+      /*.HasDpas =*/ true,
+      /*.HasDpasw =*/ false,
+      /*.HasDpasFp16 =*/ true,
+      /*.HasDpasBf16 =*/ true,
+      /*.HasDpasTf32 =*/ true,
+      /*.GrfWidth =*/ 512,
+      /*.SupportedGrfNums =*/ {128,256},
+      /*.MaxSLMSize =*/ 128, }, },
+  { encodeGmdId(20, 4, 1),
+    { /*.HasFP64 =*/ true,
+      /*.HasBFloat16 =*/ true,
+      /*.HasSLMCasInt64 =*/ true,
+      /*.HasDpas =*/ true,
+      /*.HasDpasw =*/ false,
+      /*.HasDpasFp16 =*/ true,
+      /*.HasDpasBf16 =*/ true,
+      /*.HasDpasTf32 =*/ true,
+      /*.GrfWidth =*/ 512,
+      /*.SupportedGrfNums =*/ {128,256},
+      /*.MaxSLMSize =*/ 128, }, },
+  { encodeGmdId(20, 4, 0),
+    { /*.HasFP64 =*/ true,
+      /*.HasBFloat16 =*/ true,
+      /*.HasSLMCasInt64 =*/ true,
+      /*.HasDpas =*/ true,
+      /*.HasDpasw =*/ false,
+      /*.HasDpasFp16 =*/ true,
+      /*.HasDpasBf16 =*/ true,
+      /*.HasDpasTf32 =*/ true,
+      /*.GrfWidth =*/ 512,
+      /*.SupportedGrfNums =*/ {128,256},
+      /*.MaxSLMSize =*/ 128, }, },
+  { encodeGmdId(12, 74, 4),
+    { /*.HasFP64 =*/ true,
+      /*.HasBFloat16 =*/ true,
+      /*.HasSLMCasInt64 =*/ false,
+      /*.HasDpas =*/ true,
+      /*.HasDpasw =*/ true,
+      /*.HasDpasFp16 =*/ true,
+      /*.HasDpasBf16 =*/ true,
+      /*.HasDpasTf32 =*/ false,
+      /*.GrfWidth =*/ 256,
+      /*.SupportedGrfNums =*/ {128,256},
+      /*.MaxSLMSize =*/ 64, }, },
+  { encodeGmdId(12, 74, 0),
+    { /*.HasFP64 =*/ true,
+      /*.HasBFloat16 =*/ true,
+      /*.HasSLMCasInt64 =*/ false,
+      /*.HasDpas =*/ true,
+      /*.HasDpasw =*/ true,
+      /*.HasDpasFp16 =*/ true,
+      /*.HasDpasBf16 =*/ true,
+      /*.HasDpasTf32 =*/ false,
+      /*.GrfWidth =*/ 256,
+      /*.SupportedGrfNums =*/ {128,256},
+      /*.MaxSLMSize =*/ 64, }, },
   { encodeGmdId(12, 71, 4),
     { /*.HasFP64 =*/ true,
       /*.HasBFloat16 =*/ false,

diff --git a/clang/lib/CodeGen/CGCM.h b/clang/lib/CodeGen/CGCM.h
@@ -645,6 +645,13 @@ class CGCMRuntime {
                                         CMBuiltinKind Kind);
   /// \brief Postprocess block 2d builtins load/store/prefetch.
   llvm::Value *HandleBuiltinLSC2dImpl(CMCallInfo &CallInfo, CMBuiltinKind Kind);
+
+  llvm::Value *HandleBuiltinLSCTypedImpl(CMCallInfo &CallInfo,
+                                         CMBuiltinKind Kind);
+  /// \brief Postprocess Xe2+ typed 2d load/store
+  llvm::Value *HandleBuiltinLSCTyped2DImpl(CMCallInfo &CallInfo,
+                                           CMBuiltinKind Kind);
+
   /// \brief Postprocess BTI-based load/store/prefetch
   llvm::Value *HandleBuiltinLSCImpl(CMCallInfo &CallInfo, CMBuiltinKind Kind);