Skip to content

Commit 30603b5

Browse files
hkalvalarodrigovivi
authored andcommitted
drm/xe/xe2: Update MOCS fields in blitter instructions
Xe2 changes or adds bits for mocs in a few BLT instructions: XY_CTRL_SURF_COPY_BLT, XY_FAST_COLOR_BLT, XY_FAST_COPY_BLT, and MEM_SET. Modify the code to deal with the new location. Unlike Xe1, the MOCS field in those instructions is only the MOCS index and not the Structure_MEMORY_OBJECT_CONTROL_STATE anymore. The pxp bit is now explicitly documented separately. Bspec: 57567,57566,57565,57562 Cc: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Haridhar Kalvala <haridhar.kalvala@intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Link: https://lore.kernel.org/r/20230929213640.3189912-5-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 4bdd8c2 commit 30603b5

File tree

2 files changed

+35
-17
lines changed

2 files changed

+35
-17
lines changed

drivers/gpu/drm/xe/regs/xe_gpu_commands.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#define CCS_SIZE_MASK 0x3FF
4646
#define CCS_SIZE_SHIFT 8
4747
#define XY_CTRL_SURF_MOCS_MASK GENMASK(31, 26)
48+
#define XE2_XY_CTRL_SURF_MOCS_INDEX_MASK GENMASK(31, 28)
4849
#define NUM_CCS_BYTES_PER_BLOCK 256
4950
#define NUM_BYTES_PER_CCS_BYTE 256
5051
#define NUM_CCS_BLKS_PER_XFER 1024
@@ -53,19 +54,22 @@
5354
#define XY_FAST_COLOR_BLT_DEPTH_32 (2 << 19)
5455
#define XY_FAST_COLOR_BLT_DW 16
5556
#define XY_FAST_COLOR_BLT_MOCS_MASK GENMASK(27, 22)
57+
#define XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK GENMASK(27, 24)
5658
#define XY_FAST_COLOR_BLT_MEM_TYPE_SHIFT 31
5759

5860
#define XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
5961
#define XY_FAST_COPY_BLT_DEPTH_32 (3<<24)
6062
#define XY_FAST_COPY_BLT_D1_SRC_TILE4 REG_BIT(31)
6163
#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30)
64+
#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
6265

6366
#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
6467
#define PVC_MEM_SET_CMD_LEN_DW 7
6568
#define PVC_MEM_SET_MATRIX REG_BIT(17)
6669
#define PVC_MEM_SET_DATA_FIELD GENMASK(31, 24)
6770
/* Bspec lists field as [6:0], but index alone is from [6:1] */
6871
#define PVC_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 1)
72+
#define XE2_MEM_SET_MOCS_INDEX_MASK GENMASK(6, 3)
6973

7074
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
7175

drivers/gpu/drm/xe/xe_migrate.c

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -517,23 +517,28 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
517517
u64 src_ofs, bool src_is_indirect,
518518
u32 size)
519519
{
520+
struct xe_device *xe = gt_to_xe(gt);
520521
u32 *cs = bb->cs + bb->len;
521522
u32 num_ccs_blks;
522-
u32 mocs = gt->mocs.uc_index;
523+
u32 mocs;
523524

524525
num_ccs_blks = DIV_ROUND_UP(xe_device_ccs_bytes(gt_to_xe(gt), size),
525526
NUM_CCS_BYTES_PER_BLOCK);
526527
xe_gt_assert(gt, num_ccs_blks <= NUM_CCS_BLKS_PER_XFER);
528+
529+
if (GRAPHICS_VERx100(xe) >= 2000)
530+
mocs = FIELD_PREP(XE2_XY_CTRL_SURF_MOCS_INDEX_MASK, gt->mocs.uc_index);
531+
else
532+
mocs = FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, gt->mocs.uc_index);
533+
527534
*cs++ = XY_CTRL_SURF_COPY_BLT |
528535
(src_is_indirect ? 0x0 : 0x1) << SRC_ACCESS_TYPE_SHIFT |
529536
(dst_is_indirect ? 0x0 : 0x1) << DST_ACCESS_TYPE_SHIFT |
530537
((num_ccs_blks - 1) & CCS_SIZE_MASK) << CCS_SIZE_SHIFT;
531538
*cs++ = lower_32_bits(src_ofs);
532-
*cs++ = upper_32_bits(src_ofs) |
533-
FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
539+
*cs++ = upper_32_bits(src_ofs) | mocs;
534540
*cs++ = lower_32_bits(dst_ofs);
535-
*cs++ = upper_32_bits(dst_ofs) |
536-
FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
541+
*cs++ = upper_32_bits(dst_ofs) | mocs;
537542

538543
bb->len = cs - bb->cs;
539544
}
@@ -544,24 +549,27 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
544549
unsigned int pitch)
545550
{
546551
struct xe_device *xe = gt_to_xe(gt);
552+
u32 mocs = 0;
553+
u32 tile_y = 0;
547554

548555
xe_gt_assert(gt, size / pitch <= S16_MAX);
549556
xe_gt_assert(gt, pitch / 4 <= S16_MAX);
550557
xe_gt_assert(gt, pitch <= U16_MAX);
551558

552-
bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
559+
if (GRAPHICS_VER(xe) >= 20)
560+
mocs = FIELD_PREP(XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index);
561+
553562
if (GRAPHICS_VERx100(xe) >= 1250)
554-
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch |
555-
XY_FAST_COPY_BLT_D1_SRC_TILE4 |
556-
XY_FAST_COPY_BLT_D1_DST_TILE4;
557-
else
558-
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch;
563+
tile_y = XY_FAST_COPY_BLT_D1_SRC_TILE4 | XY_FAST_COPY_BLT_D1_DST_TILE4;
564+
565+
bb->cs[bb->len++] = XY_FAST_COPY_BLT_CMD | (10 - 2);
566+
bb->cs[bb->len++] = XY_FAST_COPY_BLT_DEPTH_32 | pitch | tile_y | mocs;
559567
bb->cs[bb->len++] = 0;
560568
bb->cs[bb->len++] = (size / pitch) << 16 | pitch / 4;
561569
bb->cs[bb->len++] = lower_32_bits(dst_ofs);
562570
bb->cs[bb->len++] = upper_32_bits(dst_ofs);
563571
bb->cs[bb->len++] = 0;
564-
bb->cs[bb->len++] = pitch;
572+
bb->cs[bb->len++] = pitch | mocs;
565573
bb->cs[bb->len++] = lower_32_bits(src_ofs);
566574
bb->cs[bb->len++] = upper_32_bits(src_ofs);
567575
}
@@ -812,8 +820,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
812820
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
813821
u32 size, u32 pitch)
814822
{
823+
struct xe_device *xe = gt_to_xe(gt);
815824
u32 *cs = bb->cs + bb->len;
816-
u32 mocs = gt->mocs.uc_index;
817825
u32 len = PVC_MEM_SET_CMD_LEN_DW;
818826

819827
*cs++ = PVC_MEM_SET_CMD | PVC_MEM_SET_MATRIX | (len - 2);
@@ -822,7 +830,10 @@ static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs
822830
*cs++ = pitch - 1;
823831
*cs++ = lower_32_bits(src_ofs);
824832
*cs++ = upper_32_bits(src_ofs);
825-
*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, mocs);
833+
if (GRAPHICS_VERx100(xe) >= 2000)
834+
*cs++ = FIELD_PREP(XE2_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
835+
else
836+
*cs++ = FIELD_PREP(PVC_MEM_SET_MOCS_INDEX_MASK, gt->mocs.uc_index);
826837

827838
xe_gt_assert(gt, cs - bb->cs == len + bb->len);
828839

@@ -835,15 +846,18 @@ static void emit_clear_main_copy(struct xe_gt *gt, struct xe_bb *bb,
835846
struct xe_device *xe = gt_to_xe(gt);
836847
u32 *cs = bb->cs + bb->len;
837848
u32 len = XY_FAST_COLOR_BLT_DW;
838-
u32 mocs = gt->mocs.uc_index;
839849

840850
if (GRAPHICS_VERx100(xe) < 1250)
841851
len = 11;
842852

843853
*cs++ = XY_FAST_COLOR_BLT_CMD | XY_FAST_COLOR_BLT_DEPTH_32 |
844854
(len - 2);
845-
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) |
846-
(pitch - 1);
855+
if (GRAPHICS_VERx100(xe) >= 2000)
856+
*cs++ = FIELD_PREP(XE2_XY_FAST_COLOR_BLT_MOCS_INDEX_MASK, gt->mocs.uc_index) |
857+
(pitch - 1);
858+
else
859+
*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, gt->mocs.uc_index) |
860+
(pitch - 1);
847861
*cs++ = 0;
848862
*cs++ = (size / pitch) << 16 | pitch / 4;
849863
*cs++ = lower_32_bits(src_ofs);

0 commit comments

Comments
 (0)