Skip to content

Commit d506703

Browse files
Philipp StannerDanilo Krummrich
authored andcommitted
Revert "drm/nouveau: Remove waitque for sched teardown"
This reverts: commit bead880 ("drm/nouveau: Remove waitque for sched teardown") commit 5f46f5c ("drm/nouveau: Add new callback for scheduler teardown") from the drm/sched teardown leak fix series: https://lore.kernel.org/dri-devel/20250710125412.128476-2-phasta@kernel.org/ The aforementioned series removed a blocking waitqueue from nouveau_sched_fini(). It was mistakenly assumed that this waitqueue only prevents jobs from leaking, which the series fixed. The waitqueue, however, also guarantees that all VM_BIND related jobs are finished in order, cleaning up mappings in the GPU's MMU. These jobs must be executed sequentially. Without the waitqueue, this is no longer guaranteed, because entity and scheduler teardown can race with each other. Revert all patches related to the waitqueue removal. Fixes: bead880 ("drm/nouveau: Remove waitque for sched teardown") Suggested-by: Danilo Krummrich <dakr@kernel.org> Signed-off-by: Philipp Stanner <phasta@kernel.org> Link: https://lore.kernel.org/r/20250901083107.10206-2-phasta@kernel.org Signed-off-by: Danilo Krummrich <dakr@kernel.org>
1 parent bdd5a14 commit d506703

File tree

5 files changed

+24
-44
lines changed

5 files changed

+24
-44
lines changed

drivers/gpu/drm/nouveau/nouveau_fence.c

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -240,21 +240,6 @@ nouveau_fence_emit(struct nouveau_fence *fence)
240240
return ret;
241241
}
242242

243-
void
244-
nouveau_fence_cancel(struct nouveau_fence *fence)
245-
{
246-
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
247-
unsigned long flags;
248-
249-
spin_lock_irqsave(&fctx->lock, flags);
250-
if (!dma_fence_is_signaled_locked(&fence->base)) {
251-
dma_fence_set_error(&fence->base, -ECANCELED);
252-
if (nouveau_fence_signal(fence))
253-
nvif_event_block(&fctx->event);
254-
}
255-
spin_unlock_irqrestore(&fctx->lock, flags);
256-
}
257-
258243
bool
259244
nouveau_fence_done(struct nouveau_fence *fence)
260245
{

drivers/gpu/drm/nouveau/nouveau_fence.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ void nouveau_fence_unref(struct nouveau_fence **);
2929

3030
int nouveau_fence_emit(struct nouveau_fence *);
3131
bool nouveau_fence_done(struct nouveau_fence *);
32-
void nouveau_fence_cancel(struct nouveau_fence *fence);
3332
int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
3433
int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
3534

drivers/gpu/drm/nouveau/nouveau_sched.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include "nouveau_exec.h"
1212
#include "nouveau_abi16.h"
1313
#include "nouveau_sched.h"
14-
#include "nouveau_chan.h"
1514

1615
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
1716

@@ -122,9 +121,11 @@ nouveau_job_done(struct nouveau_job *job)
122121
{
123122
struct nouveau_sched *sched = job->sched;
124123

125-
spin_lock(&sched->job_list.lock);
124+
spin_lock(&sched->job.list.lock);
126125
list_del(&job->entry);
127-
spin_unlock(&sched->job_list.lock);
126+
spin_unlock(&sched->job.list.lock);
127+
128+
wake_up(&sched->job.wq);
128129
}
129130

130131
void
@@ -305,9 +306,9 @@ nouveau_job_submit(struct nouveau_job *job)
305306
}
306307

307308
/* Submit was successful; add the job to the schedulers job list. */
308-
spin_lock(&sched->job_list.lock);
309-
list_add(&job->entry, &sched->job_list.head);
310-
spin_unlock(&sched->job_list.lock);
309+
spin_lock(&sched->job.list.lock);
310+
list_add(&job->entry, &sched->job.list.head);
311+
spin_unlock(&sched->job.list.lock);
311312

312313
drm_sched_job_arm(&job->base);
313314
job->done_fence = dma_fence_get(&job->base.s_fence->finished);
@@ -392,23 +393,10 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
392393
nouveau_job_fini(job);
393394
}
394395

395-
static void
396-
nouveau_sched_cancel_job(struct drm_sched_job *sched_job)
397-
{
398-
struct nouveau_fence *fence;
399-
struct nouveau_job *job;
400-
401-
job = to_nouveau_job(sched_job);
402-
fence = to_nouveau_fence(job->done_fence);
403-
404-
nouveau_fence_cancel(fence);
405-
}
406-
407396
static const struct drm_sched_backend_ops nouveau_sched_ops = {
408397
.run_job = nouveau_sched_run_job,
409398
.timedout_job = nouveau_sched_timedout_job,
410399
.free_job = nouveau_sched_free_job,
411-
.cancel_job = nouveau_sched_cancel_job,
412400
};
413401

414402
static int
@@ -458,8 +446,9 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
458446
goto fail_sched;
459447

460448
mutex_init(&sched->mutex);
461-
spin_lock_init(&sched->job_list.lock);
462-
INIT_LIST_HEAD(&sched->job_list.head);
449+
spin_lock_init(&sched->job.list.lock);
450+
INIT_LIST_HEAD(&sched->job.list.head);
451+
init_waitqueue_head(&sched->job.wq);
463452

464453
return 0;
465454

@@ -493,12 +482,16 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,
493482
return 0;
494483
}
495484

485+
496486
static void
497487
nouveau_sched_fini(struct nouveau_sched *sched)
498488
{
499489
struct drm_gpu_scheduler *drm_sched = &sched->base;
500490
struct drm_sched_entity *entity = &sched->entity;
501491

492+
rmb(); /* for list_empty to work without lock */
493+
wait_event(sched->job.wq, list_empty(&sched->job.list.head));
494+
502495
drm_sched_entity_fini(entity);
503496
drm_sched_fini(drm_sched);
504497

drivers/gpu/drm/nouveau/nouveau_sched.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,12 @@ struct nouveau_sched {
103103
struct mutex mutex;
104104

105105
struct {
106-
struct list_head head;
107-
spinlock_t lock;
108-
} job_list;
106+
struct {
107+
struct list_head head;
108+
spinlock_t lock;
109+
} list;
110+
struct wait_queue_head wq;
111+
} job;
109112
};
110113

111114
int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm,

drivers/gpu/drm/nouveau/nouveau_uvmm.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
10191019
u64 end = addr + range;
10201020

10211021
again:
1022-
spin_lock(&sched->job_list.lock);
1023-
list_for_each_entry(__job, &sched->job_list.head, entry) {
1022+
spin_lock(&sched->job.list.lock);
1023+
list_for_each_entry(__job, &sched->job.list.head, entry) {
10241024
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
10251025

10261026
list_for_each_op(op, &bind_job->ops) {
@@ -1030,15 +1030,15 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
10301030

10311031
if (!(end <= op_addr || addr >= op_end)) {
10321032
nouveau_uvmm_bind_job_get(bind_job);
1033-
spin_unlock(&sched->job_list.lock);
1033+
spin_unlock(&sched->job.list.lock);
10341034
wait_for_completion(&bind_job->complete);
10351035
nouveau_uvmm_bind_job_put(bind_job);
10361036
goto again;
10371037
}
10381038
}
10391039
}
10401040
}
1041-
spin_unlock(&sched->job_list.lock);
1041+
spin_unlock(&sched->job.list.lock);
10421042
}
10431043

10441044
static int

0 commit comments

Comments
 (0)