Skip to content

Commit 26caeae

Browse files
dceraolorodrigovivi
authored andcommitted
drm/xe/guc: Set RCS/CCS yield policy
All recent platforms (including all the ones officially supported by the Xe driver) do not allow concurrent execution of RCS and CCS workloads from different address spaces, with the HW blocking the context switch when it detects such a scenario. The DUAL_QUEUE flag helps with this, by causing the GuC to not submit a context it knows will not be able to execute. This, however, causes a new problem: if RCS and CCS queues have pending workloads from different address spaces, the GuC needs to choose from which of the 2 queues to pick the next workload to execute. By default, the GuC prioritizes RCS submissions over CCS ones, which can lead to CCS workloads being significantly (or completely) starved of execution time. The driver can tune this by setting a dedicated scheduling policy KLV; this KLV allows the driver to specify a quantum (in ms) and a ratio (percentage value between 0 and 100), and the GuC will prioritize the CCS for that percentage of each quantum. Given that we want to guarantee enough RCS throughput to avoid missing frames, we set the yield policy to 20% of each 80ms interval. v2: updated quantum and ratio, improved comment, use xe_guc_submit_disable in gt_sanitize Fixes: d9a1ae0 ("drm/xe/guc: Enable WA_DUAL_QUEUE for newer platforms") Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Matthew Brost <matthew.brost@intel.com> Cc: John Harrison <John.C.Harrison@Intel.com> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Tested-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com> Link: https://lore.kernel.org/r/20250905235632.3333247-2-daniele.ceraolospurio@intel.com (cherry picked from commit 8843444) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> [Rodrigo added #include "xe_guc_submit.h" while backporting]
1 parent ae5fbbd commit 26caeae

File tree

6 files changed

+98
-5
lines changed

6 files changed

+98
-5
lines changed

drivers/gpu/drm/xe/abi/guc_actions_abi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ enum xe_guc_action {
117117
XE_GUC_ACTION_ENTER_S_STATE = 0x501,
118118
XE_GUC_ACTION_EXIT_S_STATE = 0x502,
119119
XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
120+
XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
120121
XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
121122
XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
122123
XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,

drivers/gpu/drm/xe/abi/guc_klvs_abi.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
* | 0 | 31:16 | **KEY** - KLV key identifier |
1818
* | | | - `GuC Self Config KLVs`_ |
1919
* | | | - `GuC Opt In Feature KLVs`_ |
20+
* | | | - `GuC Scheduling Policies KLVs`_ |
2021
* | | | - `GuC VGT Policy KLVs`_ |
2122
* | | | - `GuC VF Configuration KLVs`_ |
2223
* | | | |
@@ -152,6 +153,30 @@ enum {
152153
#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003
153154
#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u
154155

156+
/**
157+
* DOC: GuC Scheduling Policies KLVs
158+
*
159+
* `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV.
160+
*
161+
* _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001
162+
* Some platforms do not allow concurrent execution of RCS and CCS
163+
* workloads from different address spaces. By default, the GuC prioritizes
164+
* RCS submissions over CCS ones, which can lead to CCS workloads being
165+
* significantly (or completely) starved of execution time. This KLV allows
166+
* the driver to specify a quantum (in ms) and a ratio (percentage value
167+
* between 0 and 100), and the GuC will prioritize the CCS for that
168+
* percentage of each quantum. For example, specifying 100ms and 30% will
169+
* make the GuC prioritize the CCS for 30ms of every 100ms.
170+
* Note that this does not necessarly mean that RCS and CCS engines will
171+
* only be active for their percentage of the quantum, as the restriction
172+
* only kicks in if both classes are fully busy with non-compatible address
173+
* spaces; i.e., if one engine is idle or running the same address space,
174+
* a pending job on the other engine will still be submitted to the HW no
175+
* matter what the ratio is
176+
*/
177+
#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001
178+
#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u
179+
155180
/**
156181
* DOC: GuC VGT Policy KLVs
157182
*

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "xe_gt_topology.h"
4242
#include "xe_guc_exec_queue_types.h"
4343
#include "xe_guc_pc.h"
44+
#include "xe_guc_submit.h"
4445
#include "xe_hw_fence.h"
4546
#include "xe_hw_engine_class_sysfs.h"
4647
#include "xe_irq.h"
@@ -97,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt)
9798
* FIXME: if xe_uc_sanitize is called here, on TGL driver will not
9899
* reload
99100
*/
100-
gt->uc.guc.submission_state.enabled = false;
101+
xe_guc_submit_disable(&gt->uc.guc);
101102
}
102103

103104
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)

drivers/gpu/drm/xe/xe_guc.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -880,9 +880,7 @@ int xe_guc_post_load_init(struct xe_guc *guc)
880880
return ret;
881881
}
882882

883-
guc->submission_state.enabled = true;
884-
885-
return 0;
883+
return xe_guc_submit_enable(guc);
886884
}
887885

888886
int xe_guc_reset(struct xe_guc *guc)
@@ -1579,7 +1577,7 @@ void xe_guc_sanitize(struct xe_guc *guc)
15791577
{
15801578
xe_uc_fw_sanitize(&guc->fw);
15811579
xe_guc_ct_disable(&guc->ct);
1582-
guc->submission_state.enabled = false;
1580+
xe_guc_submit_disable(guc);
15831581
}
15841582

15851583
int xe_guc_reset_prepare(struct xe_guc *guc)

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "xe_guc_ct.h"
3333
#include "xe_guc_exec_queue_types.h"
3434
#include "xe_guc_id_mgr.h"
35+
#include "xe_guc_klv_helpers.h"
3536
#include "xe_guc_submit_types.h"
3637
#include "xe_hw_engine.h"
3738
#include "xe_hw_fence.h"
@@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
316317
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
317318
}
318319

320+
/*
321+
* Given that we want to guarantee enough RCS throughput to avoid missing
322+
* frames, we set the yield policy to 20% of each 80ms interval.
323+
*/
324+
#define RC_YIELD_DURATION 80 /* in ms */
325+
#define RC_YIELD_RATIO 20 /* in percent */
326+
static u32 *emit_render_compute_yield_klv(u32 *emit)
327+
{
328+
*emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
329+
*emit++ = RC_YIELD_DURATION;
330+
*emit++ = RC_YIELD_RATIO;
331+
332+
return emit;
333+
}
334+
335+
#define SCHEDULING_POLICY_MAX_DWORDS 16
336+
static int guc_init_global_schedule_policy(struct xe_guc *guc)
337+
{
338+
u32 data[SCHEDULING_POLICY_MAX_DWORDS];
339+
u32 *emit = data;
340+
u32 count = 0;
341+
int ret;
342+
343+
if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
344+
return 0;
345+
346+
*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
347+
348+
if (CCS_MASK(guc_to_gt(guc)))
349+
emit = emit_render_compute_yield_klv(emit);
350+
351+
count = emit - data;
352+
if (count > 1) {
353+
xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
354+
355+
ret = xe_guc_ct_send_block(&guc->ct, data, count);
356+
if (ret < 0) {
357+
xe_gt_err(guc_to_gt(guc),
358+
"failed to enable GuC sheduling policies: %pe\n",
359+
ERR_PTR(ret));
360+
return ret;
361+
}
362+
}
363+
364+
return 0;
365+
}
366+
367+
int xe_guc_submit_enable(struct xe_guc *guc)
368+
{
369+
int ret;
370+
371+
ret = guc_init_global_schedule_policy(guc);
372+
if (ret)
373+
return ret;
374+
375+
guc->submission_state.enabled = true;
376+
377+
return 0;
378+
}
379+
380+
void xe_guc_submit_disable(struct xe_guc *guc)
381+
{
382+
guc->submission_state.enabled = false;
383+
}
384+
319385
static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
320386
{
321387
int i;

drivers/gpu/drm/xe/xe_guc_submit.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ struct xe_exec_queue;
1313
struct xe_guc;
1414

1515
int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids);
16+
int xe_guc_submit_enable(struct xe_guc *guc);
17+
void xe_guc_submit_disable(struct xe_guc *guc);
1618

1719
int xe_guc_submit_reset_prepare(struct xe_guc *guc);
1820
void xe_guc_submit_reset_wait(struct xe_guc *guc);

0 commit comments

Comments
 (0)