Skip to content

Commit f2f9098

Browse files
committed
drm/xe: Avoid reading RMW registers in emit_wa_job
To allow VFs properly handle LRC WAs, we should postpone doing all RMW register operations and let them be run by the engine itself, since attempt to perform read registers from within the driver will fail on the VF. Use MI_MATH and ALU for that. Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Michał Winiarski <michal.winiarski@intel.com> Cc: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20250303173522.1822-4-michal.wajdeczko@intel.com
1 parent b823f80 commit f2f9098

File tree

1 file changed

+63
-21
lines changed

1 file changed

+63
-21
lines changed

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212

1313
#include <generated/xe_wa_oob.h>
1414

15+
#include "instructions/xe_alu_commands.h"
1516
#include "instructions/xe_gfxpipe_commands.h"
1617
#include "instructions/xe_mi_commands.h"
18+
#include "regs/xe_engine_regs.h"
1719
#include "regs/xe_gt_regs.h"
1820
#include "xe_assert.h"
1921
#include "xe_bb.h"
@@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
176178
return 0;
177179
}
178180

179-
/*
180-
* Convert back from encoded value to type-safe, only to be used when reg.mcr
181-
* is true
182-
*/
183-
static struct xe_reg_mcr to_xe_reg_mcr(const struct xe_reg reg)
184-
{
185-
return (const struct xe_reg_mcr){.__reg.raw = reg.raw };
186-
}
187-
188181
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
189182
{
190183
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
@@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
194187
struct xe_bb *bb;
195188
struct dma_fence *fence;
196189
long timeout;
190+
int count_rmw = 0;
197191
int count = 0;
198192

199193
if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
@@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
206200
if (IS_ERR(bb))
207201
return PTR_ERR(bb);
208202

209-
xa_for_each(&sr->xa, idx, entry)
210-
++count;
203+
/* count RMW registers as those will be handled separately */
204+
xa_for_each(&sr->xa, idx, entry) {
205+
if (entry->reg.masked || entry->clr_bits == ~0)
206+
++count;
207+
else
208+
++count_rmw;
209+
}
211210

212-
if (count) {
211+
if (count || count_rmw)
213212
xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
214213

214+
if (count) {
215+
/* emit single LRI with all non RMW regs */
216+
215217
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
216218

217219
xa_for_each(&sr->xa, idx, entry) {
218220
struct xe_reg reg = entry->reg;
219-
struct xe_reg_mcr reg_mcr = to_xe_reg_mcr(reg);
220221
u32 val;
221222

222-
/*
223-
* Skip reading the register if it's not really needed
224-
*/
225223
if (reg.masked)
226224
val = entry->clr_bits << 16;
227-
else if (entry->clr_bits + 1)
228-
val = (reg.mcr ?
229-
xe_gt_mcr_unicast_read_any(gt, reg_mcr) :
230-
xe_mmio_read32(&gt->mmio, reg)) & (~entry->clr_bits);
231-
else
225+
else if (entry->clr_bits == ~0)
232226
val = 0;
227+
else
228+
continue;
233229

234230
val |= entry->set_bits;
235231

@@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
239235
}
240236
}
241237

238+
if (count_rmw) {
239+
/* emit MI_MATH for each RMW reg */
240+
241+
xa_for_each(&sr->xa, idx, entry) {
242+
if (entry->reg.masked || entry->clr_bits == ~0)
243+
continue;
244+
245+
bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
246+
bb->cs[bb->len++] = entry->reg.addr;
247+
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
248+
249+
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
250+
MI_LRI_LRM_CS_MMIO;
251+
bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
252+
bb->cs[bb->len++] = entry->clr_bits;
253+
bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
254+
bb->cs[bb->len++] = entry->set_bits;
255+
256+
bb->cs[bb->len++] = MI_MATH(8);
257+
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
258+
bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1);
259+
bb->cs[bb->len++] = CS_ALU_INSTR_AND;
260+
bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
261+
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
262+
bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2);
263+
bb->cs[bb->len++] = CS_ALU_INSTR_OR;
264+
bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
265+
266+
bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
267+
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
268+
bb->cs[bb->len++] = entry->reg.addr;
269+
270+
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
271+
entry->reg.addr, entry->clr_bits, entry->set_bits);
272+
}
273+
274+
/* reset used GPR */
275+
bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO;
276+
bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
277+
bb->cs[bb->len++] = 0;
278+
bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
279+
bb->cs[bb->len++] = 0;
280+
bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
281+
bb->cs[bb->len++] = 0;
282+
}
283+
242284
xe_lrc_emit_hwe_state_instructions(q, bb);
243285

244286
job = xe_bb_create_job(q, bb);

0 commit comments

Comments
 (0)