Skip to content

Commit 0417a5f

Browse files
committed
drm/xe: Always capture exec queues on snapshot
Always capture exec queues on snapshot regardless if exec queue has pending jobs or not. Having jobs or not does indicate whether the exec queue capture is useful. Example bugs that would not be easily detected by skipping capture when pending job list is empty: - Jobs pending on exec queue have dependencies - Leaking exec queue refs - GuC protocol issues (i.e. losing G2H) In addition to above bugs, in general it just useful to see every exec queue registered with the GuC and its state. Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240405211632.223568-2-matthew.brost@intel.com
1 parent 31ced03 commit 0417a5f

File tree

3 files changed

+6
-25
lines changed

3 files changed

+6
-25
lines changed

drivers/gpu/drm/xe/xe_devcoredump.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
188188
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
189189

190190
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
191-
coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job);
191+
coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
192192
coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
193193
coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
194194

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
17751775

17761776
/**
17771777
* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
1778-
* @job: faulty Xe scheduled job.
1778+
* @q: faulty exec queue
17791779
*
17801780
* This can be printed out in a later stage like during dev_coredump
17811781
* analysis.
@@ -1784,9 +1784,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
17841784
* caller, using `xe_guc_exec_queue_snapshot_free`.
17851785
*/
17861786
struct xe_guc_submit_exec_queue_snapshot *
1787-
xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job)
1787+
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
17881788
{
1789-
struct xe_exec_queue *q = job->q;
17901789
struct xe_gpu_scheduler *sched = &q->guc->sched;
17911790
struct xe_guc_submit_exec_queue_snapshot *snapshot;
17921791
int i;
@@ -1942,28 +1941,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
19421941
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
19431942
{
19441943
struct xe_guc_submit_exec_queue_snapshot *snapshot;
1945-
struct xe_gpu_scheduler *sched = &q->guc->sched;
1946-
struct xe_sched_job *job;
1947-
bool found = false;
19481944

1949-
spin_lock(&sched->base.job_list_lock);
1950-
list_for_each_entry(job, &sched->base.pending_list, drm.list) {
1951-
if (job->q == q) {
1952-
xe_sched_job_get(job);
1953-
found = true;
1954-
break;
1955-
}
1956-
}
1957-
spin_unlock(&sched->base.job_list_lock);
1958-
1959-
if (!found)
1960-
return;
1961-
1962-
snapshot = xe_guc_exec_queue_snapshot_capture(job);
1945+
snapshot = xe_guc_exec_queue_snapshot_capture(q);
19631946
xe_guc_exec_queue_snapshot_print(snapshot, p);
19641947
xe_guc_exec_queue_snapshot_free(snapshot);
1965-
1966-
xe_sched_job_put(job);
19671948
}
19681949

19691950
/**

drivers/gpu/drm/xe/xe_guc_submit.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
#include <linux/types.h>
1010

1111
struct drm_printer;
12+
struct xe_exec_queue;
1213
struct xe_guc;
13-
struct xe_sched_job;
1414

1515
int xe_guc_submit_init(struct xe_guc *guc);
1616

@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
2727
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
2828

2929
struct xe_guc_submit_exec_queue_snapshot *
30-
xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job);
30+
xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
3131
void
3232
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot);
3333
void

0 commit comments

Comments
 (0)