Skip to content

Commit d115b34

Browse files
mattropeSasha Levin
authored andcommitted
drm/xe/wa: Steer RMW of MCR registers while building default LRC
[ Upstream commit 43d37df ] When generating the default LRC, if a register is not masked, we apply any save-restore programming necessary via a read-modify-write sequence that will ensure we only update the relevant bits/fields without clobbering the rest of the register. However some of the registers that need to be updated might be MCR registers which require steering to a non-terminated instance to ensure we can read back a valid, non-zero value. The steering of reads originating from a command streamer is controlled by register CS_MMIO_GROUP_INSTANCE_SELECT. Emit additional MI_LRI commands to update the steering before any RMW of an MCR register to ensure the reads are performed properly. Note that needing to perform a RMW of an MCR register while building the default LRC is pretty rare. Most of the MCR registers that are part of an engine's LRCs are also masked registers, so no MCR is necessary. Fixes: f2f9098 ("drm/xe: Avoid reading RMW registers in emit_wa_job") Cc: Michal Wajdeczko <michal.wajdeczko@intel.com> Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com> Link: https://patch.msgid.link/20260206223058.387014-2-matthew.d.roper@intel.com Signed-off-by: Matt Roper <matthew.d.roper@intel.com> (cherry picked from commit 6c2e331c915ba9e774aa847921262805feb00863) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 3ed2ae6 commit d115b34

File tree

2 files changed

+60
-12
lines changed

2 files changed

+60
-12
lines changed

drivers/gpu/drm/xe/regs/xe_engine_regs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@
9696
#define ENABLE_SEMAPHORE_POLL_BIT REG_BIT(13)
9797

9898
#define RING_CMD_CCTL(base) XE_REG((base) + 0xc4, XE_REG_OPTION_MASKED)
99+
100+
#define CS_MMIO_GROUP_INSTANCE_SELECT(base) XE_REG((base) + 0xcc)
101+
#define SELECTIVE_READ_ADDRESSING REG_BIT(30)
102+
#define SELECTIVE_READ_GROUP REG_GENMASK(29, 23)
103+
#define SELECTIVE_READ_INSTANCE REG_GENMASK(22, 16)
104+
99105
/*
100106
* CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
101107
* The lsb of each can be considered a separate enabling bit for encryption.

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 54 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -187,11 +187,15 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
187187
return ret;
188188
}
189189

190+
/* Dwords required to emit a RMW of a register */
191+
#define EMIT_RMW_DW 20
192+
190193
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
191194
{
192-
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
195+
struct xe_hw_engine *hwe = q->hwe;
196+
struct xe_reg_sr *sr = &hwe->reg_lrc;
193197
struct xe_reg_sr_entry *entry;
194-
int count_rmw = 0, count = 0, ret;
198+
int count_rmw = 0, count_rmw_mcr = 0, count = 0, ret;
195199
unsigned long idx;
196200
struct xe_bb *bb;
197201
size_t bb_len = 0;
@@ -201,24 +205,44 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
201205
xa_for_each(&sr->xa, idx, entry) {
202206
if (entry->reg.masked || entry->clr_bits == ~0)
203207
++count;
208+
else if (entry->reg.mcr)
209+
++count_rmw_mcr;
204210
else
205211
++count_rmw;
206212
}
207213

208214
if (count)
209215
bb_len += count * 2 + 1;
210216

211-
if (count_rmw)
212-
bb_len += count_rmw * 20 + 7;
217+
/*
218+
* RMW of MCR registers is the same as a normal RMW, except an
219+
* additional LRI (3 dwords) is required per register to steer the read
220+
* to a nom-terminated instance.
221+
*
222+
* We could probably shorten the batch slightly by eliding the
223+
* steering for consecutive MCR registers that have the same
224+
* group/instance target, but it's not worth the extra complexity to do
225+
* so.
226+
*/
227+
bb_len += count_rmw * EMIT_RMW_DW;
228+
bb_len += count_rmw_mcr * (EMIT_RMW_DW + 3);
229+
230+
/*
231+
* After doing all RMW, we need 7 trailing dwords to clean up,
232+
* plus an additional 3 dwords to reset steering if any of the
233+
* registers were MCR.
234+
*/
235+
if (count_rmw || count_rmw_mcr)
236+
bb_len += 7 + (count_rmw_mcr ? 3 : 0);
213237

214-
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
238+
if (hwe->class == XE_ENGINE_CLASS_RENDER)
215239
/*
216240
* Big enough to emit all of the context's 3DSTATE via
217241
* xe_lrc_emit_hwe_state_instructions()
218242
*/
219-
bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
243+
bb_len += xe_gt_lrc_size(gt, hwe->class) / sizeof(u32);
220244

221-
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
245+
xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", hwe->name, bb_len);
222246

223247
bb = xe_bb_new(gt, bb_len, false);
224248
if (IS_ERR(bb))
@@ -253,13 +277,23 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
253277
}
254278
}
255279

256-
if (count_rmw) {
257-
/* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
258-
280+
if (count_rmw || count_rmw_mcr) {
259281
xa_for_each(&sr->xa, idx, entry) {
260282
if (entry->reg.masked || entry->clr_bits == ~0)
261283
continue;
262284

285+
if (entry->reg.mcr) {
286+
struct xe_reg_mcr reg = { .__reg.raw = entry->reg.raw };
287+
u8 group, instance;
288+
289+
xe_gt_mcr_get_nonterminated_steering(gt, reg, &group, &instance);
290+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
291+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(hwe->mmio_base).addr;
292+
*cs++ = SELECTIVE_READ_ADDRESSING |
293+
REG_FIELD_PREP(SELECTIVE_READ_GROUP, group) |
294+
REG_FIELD_PREP(SELECTIVE_READ_INSTANCE, instance);
295+
}
296+
263297
*cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
264298
*cs++ = entry->reg.addr;
265299
*cs++ = CS_GPR_REG(0, 0).addr;
@@ -285,8 +319,9 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
285319
*cs++ = CS_GPR_REG(0, 0).addr;
286320
*cs++ = entry->reg.addr;
287321

288-
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
289-
entry->reg.addr, entry->clr_bits, entry->set_bits);
322+
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x%s\n",
323+
entry->reg.addr, entry->clr_bits, entry->set_bits,
324+
entry->reg.mcr ? " (MCR)" : "");
290325
}
291326

292327
/* reset used GPR */
@@ -298,6 +333,13 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
298333
*cs++ = 0;
299334
*cs++ = CS_GPR_REG(0, 2).addr;
300335
*cs++ = 0;
336+
337+
/* reset steering */
338+
if (count_rmw_mcr) {
339+
*cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
340+
*cs++ = CS_MMIO_GROUP_INSTANCE_SELECT(q->hwe->mmio_base).addr;
341+
*cs++ = 0;
342+
}
301343
}
302344

303345
cs = xe_lrc_emit_hwe_state_instructions(q, cs);

0 commit comments

Comments
 (0)