Skip to content

Commit a6149f0

Browse files
mbrost05Luben Tuikov
authored andcommitted
drm/sched: Convert drm scheduler to use a work queue rather than kthread
In Xe, the new Intel GPU driver, a choice has made to have a 1 to 1 mapping between a drm_gpu_scheduler and drm_sched_entity. At first this seems a bit odd but let us explain the reasoning below. 1. In Xe the submission order from multiple drm_sched_entity is not guaranteed to be the same completion even if targeting the same hardware engine. This is because in Xe we have a firmware scheduler, the GuC, which allowed to reorder, timeslice, and preempt submissions. If a using shared drm_gpu_scheduler across multiple drm_sched_entity, the TDR falls apart as the TDR expects submission order == completion order. Using a dedicated drm_gpu_scheduler per drm_sched_entity solve this problem. 2. In Xe submissions are done via programming a ring buffer (circular buffer), a drm_gpu_scheduler provides a limit on number of jobs, if the limit of number jobs is set to RING_SIZE / MAX_SIZE_PER_JOB we get flow control on the ring for free. A problem with this design is currently a drm_gpu_scheduler uses a kthread for submission / job cleanup. This doesn't scale if a large number of drm_gpu_scheduler are used. To work around the scaling issue, use a worker rather than kthread for submission / job cleanup. v2: - (Rob Clark) Fix msm build - Pass in run work queue v3: - (Boris) don't have loop in worker v4: - (Tvrtko) break out submit ready, stop, start helpers into own patch v5: - (Boris) default to ordered work queue v6: - (Luben / checkpatch) fix alignment in msm_ringbuffer.c - (Luben) s/drm_sched_submit_queue/drm_sched_wqueue_enqueue - (Luben) Update comment for drm_sched_wqueue_enqueue - (Luben) Positive check for submit_wq in drm_sched_init - (Luben) s/alloc_submit_wq/own_submit_wq v7: - (Luben) s/drm_sched_wqueue_enqueue/drm_sched_run_job_queue v8: - (Luben) Adjust var names / comments Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: Luben Tuikov <luben.tuikov@amd.com> Link: https://lore.kernel.org/r/20231031032439.1558703-3-matthew.brost@intel.com Signed-off-by: Luben Tuikov <ltuikov89@gmail.com>
1 parent 35963cf commit a6149f0

File tree

9 files changed

+86
-81
lines changed

9 files changed

+86
-81
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2279,7 +2279,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
22792279
break;
22802280
}
22812281

2282-
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2282+
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL,
22832283
DRM_SCHED_PRIORITY_COUNT,
22842284
ring->num_hw_submission, 0,
22852285
timeout, adev->reset_domain->wq,

drivers/gpu/drm/etnaviv/etnaviv_sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
134134
{
135135
int ret;
136136

137-
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
137+
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL,
138138
DRM_SCHED_PRIORITY_COUNT,
139139
etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
140140
msecs_to_jiffies(500), NULL, NULL,

drivers/gpu/drm/lima/lima_sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,7 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
488488

489489
INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
490490

491-
return drm_sched_init(&pipe->base, &lima_sched_ops,
491+
return drm_sched_init(&pipe->base, &lima_sched_ops, NULL,
492492
DRM_SCHED_PRIORITY_COUNT,
493493
1,
494494
lima_job_hang_limit,

drivers/gpu/drm/msm/msm_ringbuffer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
9494
/* currently managing hangcheck ourselves: */
9595
sched_timeout = MAX_SCHEDULE_TIMEOUT;
9696

97-
ret = drm_sched_init(&ring->sched, &msm_sched_ops,
97+
ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL,
9898
DRM_SCHED_PRIORITY_COUNT,
9999
num_hw_submissions, 0, sched_timeout,
100100
NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev);

drivers/gpu/drm/nouveau/nouveau_sched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ int nouveau_sched_init(struct nouveau_drm *drm)
429429
if (!drm->sched_wq)
430430
return -ENOMEM;
431431

432-
return drm_sched_init(sched, &nouveau_sched_ops,
432+
return drm_sched_init(sched, &nouveau_sched_ops, NULL,
433433
DRM_SCHED_PRIORITY_COUNT,
434434
NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
435435
NULL, NULL, "nouveau_sched", drm->dev->dev);

drivers/gpu/drm/panfrost/panfrost_job.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@ int panfrost_job_init(struct panfrost_device *pfdev)
852852
js->queue[j].fence_context = dma_fence_context_alloc(1);
853853

854854
ret = drm_sched_init(&js->queue[j].sched,
855-
&panfrost_sched_ops,
855+
&panfrost_sched_ops, NULL,
856856
DRM_SCHED_PRIORITY_COUNT,
857857
nentries, 0,
858858
msecs_to_jiffies(JOB_TIMEOUT_MS),

drivers/gpu/drm/scheduler/sched_main.c

Lines changed: 66 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
* through the jobs entity pointer.
4949
*/
5050

51-
#include <linux/kthread.h>
5251
#include <linux/wait.h>
5352
#include <linux/sched.h>
5453
#include <linux/completion.h>
@@ -256,6 +255,16 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
256255
return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
257256
}
258257

258+
/**
259+
* drm_sched_run_job_queue - enqueue run-job work
260+
* @sched: scheduler instance
261+
*/
262+
static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
263+
{
264+
if (!READ_ONCE(sched->pause_submit))
265+
queue_work(sched->submit_wq, &sched->work_run_job);
266+
}
267+
259268
/**
260269
* drm_sched_job_done - complete a job
261270
* @s_job: pointer to the job which is done
@@ -275,7 +284,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result)
275284
dma_fence_get(&s_fence->finished);
276285
drm_sched_fence_finished(s_fence, result);
277286
dma_fence_put(&s_fence->finished);
278-
wake_up_interruptible(&sched->wake_up_worker);
287+
drm_sched_run_job_queue(sched);
279288
}
280289

281290
/**
@@ -874,7 +883,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched)
874883
void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched)
875884
{
876885
if (drm_sched_can_queue(sched))
877-
wake_up_interruptible(&sched->wake_up_worker);
886+
drm_sched_run_job_queue(sched);
878887
}
879888

880889
/**
@@ -985,60 +994,41 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
985994
EXPORT_SYMBOL(drm_sched_pick_best);
986995

987996
/**
988-
* drm_sched_blocked - check if the scheduler is blocked
997+
* drm_sched_run_job_work - main scheduler thread
989998
*
990-
* @sched: scheduler instance
991-
*
992-
* Returns true if blocked, otherwise false.
999+
* @w: run job work
9931000
*/
994-
static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
1001+
static void drm_sched_run_job_work(struct work_struct *w)
9951002
{
996-
if (kthread_should_park()) {
997-
kthread_parkme();
998-
return true;
999-
}
1000-
1001-
return false;
1002-
}
1003-
1004-
/**
1005-
* drm_sched_main - main scheduler thread
1006-
*
1007-
* @param: scheduler instance
1008-
*
1009-
* Returns 0.
1010-
*/
1011-
static int drm_sched_main(void *param)
1012-
{
1013-
struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
1003+
struct drm_gpu_scheduler *sched =
1004+
container_of(w, struct drm_gpu_scheduler, work_run_job);
1005+
struct drm_sched_entity *entity;
1006+
struct drm_sched_job *cleanup_job;
10141007
int r;
10151008

1016-
sched_set_fifo_low(current);
1009+
if (READ_ONCE(sched->pause_submit))
1010+
return;
10171011

1018-
while (!kthread_should_stop()) {
1019-
struct drm_sched_entity *entity = NULL;
1020-
struct drm_sched_fence *s_fence;
1021-
struct drm_sched_job *sched_job;
1022-
struct dma_fence *fence;
1023-
struct drm_sched_job *cleanup_job = NULL;
1012+
cleanup_job = drm_sched_get_cleanup_job(sched);
1013+
entity = drm_sched_select_entity(sched);
10241014

1025-
wait_event_interruptible(sched->wake_up_worker,
1026-
(cleanup_job = drm_sched_get_cleanup_job(sched)) ||
1027-
(!drm_sched_blocked(sched) &&
1028-
(entity = drm_sched_select_entity(sched))) ||
1029-
kthread_should_stop());
1015+
if (!entity && !cleanup_job)
1016+
return; /* No more work */
10301017

1031-
if (cleanup_job)
1032-
sched->ops->free_job(cleanup_job);
1018+
if (cleanup_job)
1019+
sched->ops->free_job(cleanup_job);
10331020

1034-
if (!entity)
1035-
continue;
1021+
if (entity) {
1022+
struct dma_fence *fence;
1023+
struct drm_sched_fence *s_fence;
1024+
struct drm_sched_job *sched_job;
10361025

10371026
sched_job = drm_sched_entity_pop_job(entity);
1038-
10391027
if (!sched_job) {
10401028
complete_all(&entity->entity_idle);
1041-
continue;
1029+
if (!cleanup_job)
1030+
return; /* No more work */
1031+
goto again;
10421032
}
10431033

10441034
s_fence = sched_job->s_fence;
@@ -1069,14 +1059,18 @@ static int drm_sched_main(void *param)
10691059

10701060
wake_up(&sched->job_scheduled);
10711061
}
1072-
return 0;
1062+
1063+
again:
1064+
drm_sched_run_job_queue(sched);
10731065
}
10741066

10751067
/**
10761068
* drm_sched_init - Init a gpu scheduler instance
10771069
*
10781070
* @sched: scheduler instance
10791071
* @ops: backend operations for this scheduler
1072+
* @submit_wq: workqueue to use for submission. If NULL, an ordered wq is
1073+
* allocated and used
10801074
* @num_rqs: number of runqueues, one for each priority, up to DRM_SCHED_PRIORITY_COUNT
10811075
* @hw_submission: number of hw submissions that can be in flight
10821076
* @hang_limit: number of times to allow a job to hang before dropping it
@@ -1091,6 +1085,7 @@ static int drm_sched_main(void *param)
10911085
*/
10921086
int drm_sched_init(struct drm_gpu_scheduler *sched,
10931087
const struct drm_sched_backend_ops *ops,
1088+
struct workqueue_struct *submit_wq,
10941089
u32 num_rqs, uint32_t hw_submission, unsigned int hang_limit,
10951090
long timeout, struct workqueue_struct *timeout_wq,
10961091
atomic_t *score, const char *name, struct device *dev)
@@ -1121,46 +1116,49 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
11211116
return 0;
11221117
}
11231118

1119+
if (submit_wq) {
1120+
sched->submit_wq = submit_wq;
1121+
sched->own_submit_wq = false;
1122+
} else {
1123+
sched->submit_wq = alloc_ordered_workqueue(name, 0);
1124+
if (!sched->submit_wq)
1125+
return -ENOMEM;
1126+
1127+
sched->own_submit_wq = true;
1128+
}
1129+
ret = -ENOMEM;
11241130
sched->sched_rq = kmalloc_array(num_rqs, sizeof(*sched->sched_rq),
11251131
GFP_KERNEL | __GFP_ZERO);
1126-
if (!sched->sched_rq) {
1127-
drm_err(sched, "%s: out of memory for sched_rq\n", __func__);
1128-
return -ENOMEM;
1129-
}
1132+
if (!sched->sched_rq)
1133+
goto Out_free;
11301134
sched->num_rqs = num_rqs;
1131-
ret = -ENOMEM;
11321135
for (i = DRM_SCHED_PRIORITY_MIN; i < sched->num_rqs; i++) {
11331136
sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
11341137
if (!sched->sched_rq[i])
11351138
goto Out_unroll;
11361139
drm_sched_rq_init(sched, sched->sched_rq[i]);
11371140
}
11381141

1139-
init_waitqueue_head(&sched->wake_up_worker);
11401142
init_waitqueue_head(&sched->job_scheduled);
11411143
INIT_LIST_HEAD(&sched->pending_list);
11421144
spin_lock_init(&sched->job_list_lock);
11431145
atomic_set(&sched->hw_rq_count, 0);
11441146
INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1147+
INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
11451148
atomic_set(&sched->_score, 0);
11461149
atomic64_set(&sched->job_id_count, 0);
1147-
1148-
/* Each scheduler will run on a seperate kernel thread */
1149-
sched->thread = kthread_run(drm_sched_main, sched, sched->name);
1150-
if (IS_ERR(sched->thread)) {
1151-
ret = PTR_ERR(sched->thread);
1152-
sched->thread = NULL;
1153-
DRM_DEV_ERROR(sched->dev, "Failed to create scheduler for %s.\n", name);
1154-
goto Out_unroll;
1155-
}
1150+
sched->pause_submit = false;
11561151

11571152
sched->ready = true;
11581153
return 0;
11591154
Out_unroll:
11601155
for (--i ; i >= DRM_SCHED_PRIORITY_MIN; i--)
11611156
kfree(sched->sched_rq[i]);
1157+
Out_free:
11621158
kfree(sched->sched_rq);
11631159
sched->sched_rq = NULL;
1160+
if (sched->own_submit_wq)
1161+
destroy_workqueue(sched->submit_wq);
11641162
drm_err(sched, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
11651163
return ret;
11661164
}
@@ -1178,8 +1176,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
11781176
struct drm_sched_entity *s_entity;
11791177
int i;
11801178

1181-
if (sched->thread)
1182-
kthread_stop(sched->thread);
1179+
drm_sched_wqueue_stop(sched);
11831180

11841181
for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
11851182
struct drm_sched_rq *rq = sched->sched_rq[i];
@@ -1202,6 +1199,8 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
12021199
/* Confirm no work left behind accessing device structures */
12031200
cancel_delayed_work_sync(&sched->work_tdr);
12041201

1202+
if (sched->own_submit_wq)
1203+
destroy_workqueue(sched->submit_wq);
12051204
sched->ready = false;
12061205
kfree(sched->sched_rq);
12071206
sched->sched_rq = NULL;
@@ -1262,7 +1261,7 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
12621261
*/
12631262
bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
12641263
{
1265-
return !!sched->thread;
1264+
return sched->ready;
12661265
}
12671266
EXPORT_SYMBOL(drm_sched_wqueue_ready);
12681267

@@ -1273,7 +1272,8 @@ EXPORT_SYMBOL(drm_sched_wqueue_ready);
12731272
*/
12741273
void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
12751274
{
1276-
kthread_park(sched->thread);
1275+
WRITE_ONCE(sched->pause_submit, true);
1276+
cancel_work_sync(&sched->work_run_job);
12771277
}
12781278
EXPORT_SYMBOL(drm_sched_wqueue_stop);
12791279

@@ -1284,6 +1284,7 @@ EXPORT_SYMBOL(drm_sched_wqueue_stop);
12841284
*/
12851285
void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
12861286
{
1287-
kthread_unpark(sched->thread);
1287+
WRITE_ONCE(sched->pause_submit, false);
1288+
queue_work(sched->submit_wq, &sched->work_run_job);
12881289
}
12891290
EXPORT_SYMBOL(drm_sched_wqueue_start);

drivers/gpu/drm/v3d/v3d_sched.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ v3d_sched_init(struct v3d_dev *v3d)
388388
int ret;
389389

390390
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
391-
&v3d_bin_sched_ops,
391+
&v3d_bin_sched_ops, NULL,
392392
DRM_SCHED_PRIORITY_COUNT,
393393
hw_jobs_limit, job_hang_limit,
394394
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -397,7 +397,7 @@ v3d_sched_init(struct v3d_dev *v3d)
397397
return ret;
398398

399399
ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
400-
&v3d_render_sched_ops,
400+
&v3d_render_sched_ops, NULL,
401401
DRM_SCHED_PRIORITY_COUNT,
402402
hw_jobs_limit, job_hang_limit,
403403
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -406,7 +406,7 @@ v3d_sched_init(struct v3d_dev *v3d)
406406
goto fail;
407407

408408
ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
409-
&v3d_tfu_sched_ops,
409+
&v3d_tfu_sched_ops, NULL,
410410
DRM_SCHED_PRIORITY_COUNT,
411411
hw_jobs_limit, job_hang_limit,
412412
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -416,7 +416,7 @@ v3d_sched_init(struct v3d_dev *v3d)
416416

417417
if (v3d_has_csd(v3d)) {
418418
ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
419-
&v3d_csd_sched_ops,
419+
&v3d_csd_sched_ops, NULL,
420420
DRM_SCHED_PRIORITY_COUNT,
421421
hw_jobs_limit, job_hang_limit,
422422
msecs_to_jiffies(hang_limit_ms), NULL,
@@ -425,7 +425,7 @@ v3d_sched_init(struct v3d_dev *v3d)
425425
goto fail;
426426

427427
ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
428-
&v3d_cache_clean_sched_ops,
428+
&v3d_cache_clean_sched_ops, NULL,
429429
DRM_SCHED_PRIORITY_COUNT,
430430
hw_jobs_limit, job_hang_limit,
431431
msecs_to_jiffies(hang_limit_ms), NULL,

0 commit comments

Comments
 (0)