1212
1313#include <generated/xe_wa_oob.h>
1414
15+ #include "instructions/xe_alu_commands.h"
1516#include "instructions/xe_gfxpipe_commands.h"
1617#include "instructions/xe_mi_commands.h"
18+ #include "regs/xe_engine_regs.h"
1719#include "regs/xe_gt_regs.h"
1820#include "xe_assert.h"
1921#include "xe_bb.h"
@@ -176,15 +178,6 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
176178 return 0 ;
177179}
178180
179- /*
180- * Convert back from encoded value to type-safe, only to be used when reg.mcr
181- * is true
182- */
183- static struct xe_reg_mcr to_xe_reg_mcr (const struct xe_reg reg )
184- {
185- return (const struct xe_reg_mcr ){.__reg .raw = reg .raw };
186- }
187-
188181static int emit_wa_job (struct xe_gt * gt , struct xe_exec_queue * q )
189182{
190183 struct xe_reg_sr * sr = & q -> hwe -> reg_lrc ;
@@ -194,6 +187,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
194187 struct xe_bb * bb ;
195188 struct dma_fence * fence ;
196189 long timeout ;
190+ int count_rmw = 0 ;
197191 int count = 0 ;
198192
199193 if (q -> hwe -> class == XE_ENGINE_CLASS_RENDER )
@@ -206,30 +200,32 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
206200 if (IS_ERR (bb ))
207201 return PTR_ERR (bb );
208202
209- xa_for_each (& sr -> xa , idx , entry )
210- ++ count ;
203+ /* count RMW registers as those will be handled separately */
204+ xa_for_each (& sr -> xa , idx , entry ) {
205+ if (entry -> reg .masked || entry -> clr_bits == ~0 )
206+ ++ count ;
207+ else
208+ ++ count_rmw ;
209+ }
211210
212- if (count ) {
211+ if (count || count_rmw )
213212 xe_gt_dbg (gt , "LRC WA %s save-restore batch\n" , sr -> name );
214213
214+ if (count ) {
215+ /* emit single LRI with all non RMW regs */
216+
215217 bb -> cs [bb -> len ++ ] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS (count );
216218
217219 xa_for_each (& sr -> xa , idx , entry ) {
218220 struct xe_reg reg = entry -> reg ;
219- struct xe_reg_mcr reg_mcr = to_xe_reg_mcr (reg );
220221 u32 val ;
221222
222- /*
223- * Skip reading the register if it's not really needed
224- */
225223 if (reg .masked )
226224 val = entry -> clr_bits << 16 ;
227- else if (entry -> clr_bits + 1 )
228- val = (reg .mcr ?
229- xe_gt_mcr_unicast_read_any (gt , reg_mcr ) :
230- xe_mmio_read32 (& gt -> mmio , reg )) & (~entry -> clr_bits );
231- else
225+ else if (entry -> clr_bits == ~0 )
232226 val = 0 ;
227+ else
228+ continue ;
233229
234230 val |= entry -> set_bits ;
235231
@@ -239,6 +235,52 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
239235 }
240236 }
241237
238+ if (count_rmw ) {
239+ /* emit MI_MATH for each RMW reg */
240+
241+ xa_for_each (& sr -> xa , idx , entry ) {
242+ if (entry -> reg .masked || entry -> clr_bits == ~0 )
243+ continue ;
244+
245+ bb -> cs [bb -> len ++ ] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO ;
246+ bb -> cs [bb -> len ++ ] = entry -> reg .addr ;
247+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 0 ).addr ;
248+
249+ bb -> cs [bb -> len ++ ] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS (2 ) |
250+ MI_LRI_LRM_CS_MMIO ;
251+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 1 ).addr ;
252+ bb -> cs [bb -> len ++ ] = entry -> clr_bits ;
253+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 2 ).addr ;
254+ bb -> cs [bb -> len ++ ] = entry -> set_bits ;
255+
256+ bb -> cs [bb -> len ++ ] = MI_MATH (8 );
257+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_LOAD (SRCA , REG0 );
258+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_LOADINV (SRCB , REG1 );
259+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_AND ;
260+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_STORE (REG0 , ACCU );
261+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_LOAD (SRCA , REG0 );
262+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_LOAD (SRCB , REG2 );
263+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_OR ;
264+ bb -> cs [bb -> len ++ ] = CS_ALU_INSTR_STORE (REG0 , ACCU );
265+
266+ bb -> cs [bb -> len ++ ] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO ;
267+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 0 ).addr ;
268+ bb -> cs [bb -> len ++ ] = entry -> reg .addr ;
269+
270+ xe_gt_dbg (gt , "REG[%#x] = ~%#x|%#x\n" ,
271+ entry -> reg .addr , entry -> clr_bits , entry -> set_bits );
272+ }
273+
274+ /* reset used GPR */
275+ bb -> cs [bb -> len ++ ] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS (3 ) | MI_LRI_LRM_CS_MMIO ;
276+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 0 ).addr ;
277+ bb -> cs [bb -> len ++ ] = 0 ;
278+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 1 ).addr ;
279+ bb -> cs [bb -> len ++ ] = 0 ;
280+ bb -> cs [bb -> len ++ ] = CS_GPR_REG (0 , 2 ).addr ;
281+ bb -> cs [bb -> len ++ ] = 0 ;
282+ }
283+
242284 xe_lrc_emit_hwe_state_instructions (q , bb );
243285
244286 job = xe_bb_create_job (q , bb );
0 commit comments