Skip to content

Commit 264eecd

Browse files
nvishwa1mbrost05
authored andcommitted
drm/xe: Decouple xe_exec_queue and xe_lrc
Decouple xe_lrc from xe_exec_queue and reference count xe_lrc. Removing hard coupling between xe_exec_queue and xe_lrc allows flexible design where the user interface xe_exec_queue can be destroyed independent of the hardware/firmware interface xe_lrc. v2: Fix lrc indexing in wq_item_append() Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240530032211.29299-1-niranjana.vishwanathapura@intel.com
1 parent 0568a40 commit 264eecd

File tree

12 files changed

+109
-59
lines changed

12 files changed

+109
-59
lines changed

drivers/gpu/drm/xe/xe_exec_queue.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
8686

8787
if (extensions) {
8888
/*
89-
* may set q->usm, must come before xe_lrc_init(),
89+
* may set q->usm, must come before xe_lrc_create(),
9090
* may overwrite q->sched_props, must come before q->ops->init()
9191
*/
9292
err = exec_queue_user_extensions(xe, q, extensions, 0);
@@ -104,9 +104,11 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
104104
int i, err;
105105

106106
for (i = 0; i < q->width; ++i) {
107-
err = xe_lrc_init(q->lrc + i, q->hwe, q, q->vm, SZ_16K);
108-
if (err)
107+
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
108+
if (IS_ERR(q->lrc[i])) {
109+
err = PTR_ERR(q->lrc[i]);
109110
goto err_lrc;
111+
}
110112
}
111113

112114
err = q->ops->init(q);
@@ -117,7 +119,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
117119

118120
err_lrc:
119121
for (i = i - 1; i >= 0; --i)
120-
xe_lrc_finish(q->lrc + i);
122+
xe_lrc_put(q->lrc[i]);
121123
return err;
122124
}
123125

@@ -198,7 +200,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
198200
int i;
199201

200202
for (i = 0; i < q->width; ++i)
201-
xe_lrc_finish(q->lrc + i);
203+
xe_lrc_put(q->lrc[i]);
202204
__xe_exec_queue_free(q);
203205
}
204206

@@ -701,7 +703,7 @@ bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
701703

702704
static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
703705
{
704-
return q->lrc->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc) - 1;
706+
return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
705707
}
706708

707709
/**
@@ -712,7 +714,7 @@ static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
712714
*/
713715
bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
714716
{
715-
struct xe_lrc *lrc = q->lrc;
717+
struct xe_lrc *lrc = q->lrc[0];
716718
s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
717719

718720
return xe_exec_queue_num_job_inflight(q) >= max_job;
@@ -738,16 +740,16 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
738740
int i;
739741

740742
for (i = 0; i < q->width; ++i) {
741-
if (xe_lrc_seqno(&q->lrc[i]) !=
742-
q->lrc[i].fence_ctx.next_seqno - 1)
743+
if (xe_lrc_seqno(q->lrc[i]) !=
744+
q->lrc[i]->fence_ctx.next_seqno - 1)
743745
return false;
744746
}
745747

746748
return true;
747749
}
748750

749-
return xe_lrc_seqno(&q->lrc[0]) ==
750-
q->lrc[0].fence_ctx.next_seqno - 1;
751+
return xe_lrc_seqno(q->lrc[0]) ==
752+
q->lrc[0]->fence_ctx.next_seqno - 1;
751753
}
752754

753755
/**
@@ -779,7 +781,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
779781
* the LRCs and reading them in different time could also introduce
780782
* errors.
781783
*/
782-
lrc = &q->lrc[0];
784+
lrc = q->lrc[0];
783785
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
784786
q->run_ticks += (new_ts - old_ts) * q->width;
785787
}

drivers/gpu/drm/xe/xe_exec_queue_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ struct xe_exec_queue {
146146
/** @run_ticks: hw engine class run time in ticks for this exec queue */
147147
u64 run_ticks;
148148
/** @lrc: logical ring context for this exec queue */
149-
struct xe_lrc lrc[];
149+
struct xe_lrc *lrc[];
150150
};
151151

152152
/**

drivers/gpu/drm/xe/xe_execlist.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ static void __xe_execlist_port_start(struct xe_execlist_port *port,
109109
port->last_ctx_id = 1;
110110
}
111111

112-
__start_lrc(port->hwe, exl->q->lrc, port->last_ctx_id);
112+
__start_lrc(port->hwe, exl->q->lrc[0], port->last_ctx_id);
113113
port->running_exl = exl;
114114
exl->has_run = true;
115115
}
@@ -123,14 +123,14 @@ static void __xe_execlist_port_idle(struct xe_execlist_port *port)
123123
if (!port->running_exl)
124124
return;
125125

126-
xe_lrc_write_ring(&port->hwe->kernel_lrc, noop, sizeof(noop));
127-
__start_lrc(port->hwe, &port->hwe->kernel_lrc, 0);
126+
xe_lrc_write_ring(port->hwe->kernel_lrc, noop, sizeof(noop));
127+
__start_lrc(port->hwe, port->hwe->kernel_lrc, 0);
128128
port->running_exl = NULL;
129129
}
130130

131131
static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
132132
{
133-
struct xe_lrc *lrc = exl->q->lrc;
133+
struct xe_lrc *lrc = exl->q->lrc[0];
134134

135135
return lrc->ring.tail == lrc->ring.old_tail;
136136
}
@@ -333,7 +333,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
333333
exl->q = q;
334334

335335
err = drm_sched_init(&exl->sched, &drm_sched_ops, NULL, 1,
336-
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES,
336+
q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
337337
XE_SCHED_HANG_LIMIT, XE_SCHED_JOB_TIMEOUT,
338338
NULL, NULL, q->hwe->name,
339339
gt_to_xe(q->gt)->drm.dev);

drivers/gpu/drm/xe/xe_gt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
297297
}
298298

299299
xe_map_memcpy_from(xe, default_lrc,
300-
&q->lrc[0].bo->vmap,
301-
xe_lrc_pphwsp_offset(&q->lrc[0]),
300+
&q->lrc[0]->bo->vmap,
301+
xe_lrc_pphwsp_offset(q->lrc[0]),
302302
xe_gt_lrc_size(gt, hwe->class));
303303

304304
gt->default_lrc[hwe->class] = default_lrc;

drivers/gpu/drm/xe/xe_guc_submit.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
490490
action[len++] = info->hwlrca_hi;
491491

492492
for (i = 1; i < q->width; ++i) {
493-
struct xe_lrc *lrc = q->lrc + i;
493+
struct xe_lrc *lrc = q->lrc[i];
494494

495495
action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
496496
action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
@@ -527,7 +527,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
527527
{
528528
struct xe_guc *guc = exec_queue_to_guc(q);
529529
struct xe_device *xe = guc_to_xe(guc);
530-
struct xe_lrc *lrc = q->lrc;
530+
struct xe_lrc *lrc = q->lrc[0];
531531
struct guc_ctxt_registration_info info;
532532

533533
xe_assert(xe, !exec_queue_registered(q));
@@ -586,7 +586,7 @@ static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
586586
{
587587
struct xe_guc *guc = exec_queue_to_guc(q);
588588
struct xe_device *xe = guc_to_xe(guc);
589-
struct iosys_map map = xe_lrc_parallel_map(q->lrc);
589+
struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
590590
unsigned int sleep_period_ms = 1;
591591

592592
#define AVAILABLE_SPACE \
@@ -614,7 +614,7 @@ static int wq_noop_append(struct xe_exec_queue *q)
614614
{
615615
struct xe_guc *guc = exec_queue_to_guc(q);
616616
struct xe_device *xe = guc_to_xe(guc);
617-
struct iosys_map map = xe_lrc_parallel_map(q->lrc);
617+
struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
618618
u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - 1;
619619

620620
if (wq_wait_for_space(q, wq_space_until_wrap(q)))
@@ -634,7 +634,7 @@ static void wq_item_append(struct xe_exec_queue *q)
634634
{
635635
struct xe_guc *guc = exec_queue_to_guc(q);
636636
struct xe_device *xe = guc_to_xe(guc);
637-
struct iosys_map map = xe_lrc_parallel_map(q->lrc);
637+
struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
638638
#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
639639
u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - 1)];
640640
u32 wqi_size = (q->width + (WQ_HEADER_SIZE - 1)) * sizeof(u32);
@@ -650,12 +650,12 @@ static void wq_item_append(struct xe_exec_queue *q)
650650

651651
wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
652652
FIELD_PREP(WQ_LEN_MASK, len_dw);
653-
wqi[i++] = xe_lrc_descriptor(q->lrc);
653+
wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
654654
wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
655-
FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc->ring.tail / sizeof(u64));
655+
FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
656656
wqi[i++] = 0;
657657
for (j = 1; j < q->width; ++j) {
658-
struct xe_lrc *lrc = q->lrc + j;
658+
struct xe_lrc *lrc = q->lrc[j];
659659

660660
wqi[i++] = lrc->ring.tail / sizeof(u64);
661661
}
@@ -670,7 +670,7 @@ static void wq_item_append(struct xe_exec_queue *q)
670670

671671
xe_device_wmb(xe);
672672

673-
map = xe_lrc_parallel_map(q->lrc);
673+
map = xe_lrc_parallel_map(q->lrc[0]);
674674
parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
675675
}
676676

@@ -679,7 +679,7 @@ static void submit_exec_queue(struct xe_exec_queue *q)
679679
{
680680
struct xe_guc *guc = exec_queue_to_guc(q);
681681
struct xe_device *xe = guc_to_xe(guc);
682-
struct xe_lrc *lrc = q->lrc;
682+
struct xe_lrc *lrc = q->lrc[0];
683683
u32 action[3];
684684
u32 g2h_len = 0;
685685
u32 num_g2h = 0;
@@ -1236,7 +1236,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
12361236
msecs_to_jiffies(q->sched_props.job_timeout_ms);
12371237
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
12381238
get_submit_wq(guc),
1239-
q->lrc[0].ring.size / MAX_JOB_SIZE_BYTES, 64,
1239+
q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
12401240
timeout, guc_to_gt(guc)->ordered_wq, NULL,
12411241
q->name, gt_to_xe(q->gt)->drm.dev);
12421242
if (err)
@@ -1464,7 +1464,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
14641464
ban = true;
14651465
}
14661466
} else if (xe_exec_queue_is_lr(q) &&
1467-
(xe_lrc_ring_head(q->lrc) != xe_lrc_ring_tail(q->lrc))) {
1467+
(xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) {
14681468
ban = true;
14691469
}
14701470

@@ -1529,7 +1529,7 @@ static void guc_exec_queue_start(struct xe_exec_queue *q)
15291529

15301530
trace_xe_exec_queue_resubmit(q);
15311531
for (i = 0; i < q->width; ++i)
1532-
xe_lrc_set_ring_head(q->lrc + i, q->lrc[i].ring.tail);
1532+
xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
15331533
xe_sched_resubmit_jobs(sched);
15341534
}
15351535

@@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
17751775
{
17761776
struct xe_guc *guc = exec_queue_to_guc(q);
17771777
struct xe_device *xe = guc_to_xe(guc);
1778-
struct iosys_map map = xe_lrc_parallel_map(q->lrc);
1778+
struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
17791779
int i;
17801780

17811781
snapshot->guc.wqi_head = q->guc->wqi_head;
@@ -1855,7 +1855,7 @@ xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
18551855

18561856
if (snapshot->lrc) {
18571857
for (i = 0; i < q->width; ++i) {
1858-
struct xe_lrc *lrc = q->lrc + i;
1858+
struct xe_lrc *lrc = q->lrc[i];
18591859

18601860
snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
18611861
}

drivers/gpu/drm/xe/xe_hw_engine.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ static void hw_engine_fini(struct drm_device *drm, void *arg)
268268

269269
if (hwe->exl_port)
270270
xe_execlist_port_destroy(hwe->exl_port);
271-
xe_lrc_finish(&hwe->kernel_lrc);
271+
xe_lrc_put(hwe->kernel_lrc);
272272

273273
hwe->gt = NULL;
274274
}
@@ -527,9 +527,11 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
527527
goto err_name;
528528
}
529529

530-
err = xe_lrc_init(&hwe->kernel_lrc, hwe, NULL, NULL, SZ_16K);
531-
if (err)
530+
hwe->kernel_lrc = xe_lrc_create(hwe, NULL, SZ_16K);
531+
if (IS_ERR(hwe->kernel_lrc)) {
532+
err = PTR_ERR(hwe->kernel_lrc);
532533
goto err_hwsp;
534+
}
533535

534536
if (!xe_device_uc_enabled(xe)) {
535537
hwe->exl_port = xe_execlist_port_create(xe, hwe);
@@ -554,7 +556,7 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe,
554556
return drmm_add_action_or_reset(&xe->drm, hw_engine_fini, hwe);
555557

556558
err_kernel_lrc:
557-
xe_lrc_finish(&hwe->kernel_lrc);
559+
xe_lrc_put(hwe->kernel_lrc);
558560
err_hwsp:
559561
xe_bo_unpin_map_no_vm(hwe->hwsp);
560562
err_name:

drivers/gpu/drm/xe/xe_hw_engine_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ struct xe_hw_engine {
137137
/** @hwsp: hardware status page buffer object */
138138
struct xe_bo *hwsp;
139139
/** @kernel_lrc: Kernel LRC (should be replaced /w an xe_engine) */
140-
struct xe_lrc kernel_lrc;
140+
struct xe_lrc *kernel_lrc;
141141
/** @exl_port: execlists port */
142142
struct xe_execlist_port *exl_port;
143143
/** @fence_irq: fence IRQ to run when a hw engine IRQ is received */

drivers/gpu/drm/xe/xe_lrc.c

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -808,11 +808,20 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
808808
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
809809
}
810810

811+
static void xe_lrc_finish(struct xe_lrc *lrc)
812+
{
813+
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
814+
xe_bo_lock(lrc->bo, false);
815+
xe_bo_unpin(lrc->bo);
816+
xe_bo_unlock(lrc->bo);
817+
xe_bo_put(lrc->bo);
818+
}
819+
811820
#define PVC_CTX_ASID (0x2e + 1)
812821
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
813822

814-
int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
815-
struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
823+
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
824+
struct xe_vm *vm, u32 ring_size)
816825
{
817826
struct xe_gt *gt = hwe->gt;
818827
struct xe_tile *tile = gt_to_tile(gt);
@@ -823,6 +832,7 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
823832
u32 lrc_size;
824833
int err;
825834

835+
kref_init(&lrc->refcount);
826836
lrc->flags = 0;
827837
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
828838
if (xe_gt_has_indirect_ring_state(gt))
@@ -935,13 +945,31 @@ int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
935945
return err;
936946
}
937947

938-
void xe_lrc_finish(struct xe_lrc *lrc)
948+
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
949+
u32 ring_size)
939950
{
940-
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
941-
xe_bo_lock(lrc->bo, false);
942-
xe_bo_unpin(lrc->bo);
943-
xe_bo_unlock(lrc->bo);
944-
xe_bo_put(lrc->bo);
951+
struct xe_lrc *lrc;
952+
int err;
953+
954+
lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
955+
if (!lrc)
956+
return ERR_PTR(-ENOMEM);
957+
958+
err = xe_lrc_init(lrc, hwe, vm, ring_size);
959+
if (err) {
960+
kfree(lrc);
961+
return ERR_PTR(err);
962+
}
963+
964+
return lrc;
965+
}
966+
967+
void xe_lrc_destroy(struct kref *ref)
968+
{
969+
struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
970+
971+
xe_lrc_finish(lrc);
972+
kfree(lrc);
945973
}
946974

947975
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)

0 commit comments

Comments
 (0)