Skip to content

Commit 9bff18d

Browse files
drm/ttm: use per BO cleanup workers
Instead of a single worker going over the list of delete BOs in regular intervals use a per BO worker which blocks for the resv object and locking of the BO. This not only simplifies the handling massively, but also results in much better response time when cleaning up buffers. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20221125102137.1801-3-christian.koenig@amd.com
1 parent cd3a8a5 commit 9bff18d

File tree

8 files changed

+57
-111
lines changed

8 files changed

+57
-111
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3984,7 +3984,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)
39843984
amdgpu_fence_driver_hw_fini(adev);
39853985

39863986
if (adev->mman.initialized)
3987-
flush_delayed_work(&adev->mman.bdev.wq);
3987+
drain_workqueue(adev->mman.bdev.wq);
39883988

39893989
if (adev->pm_sysfs_en)
39903990
amdgpu_pm_sysfs_fini(adev);

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
10991099
{
11001100
while (atomic_read(&i915->mm.free_count)) {
11011101
flush_work(&i915->mm.free_work);
1102-
flush_delayed_work(&i915->bdev.wq);
1102+
drain_workqueue(i915->bdev.wq);
11031103
rcu_barrier();
11041104
}
11051105
}

drivers/gpu/drm/i915/intel_region_ttm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem)
132132
break;
133133

134134
msleep(20);
135-
flush_delayed_work(&mem->i915->bdev.wq);
135+
drain_workqueue(mem->i915->bdev.wq);
136136
}
137137

138138
/* If we leaked objects, Don't free the region causing use after free */

drivers/gpu/drm/ttm/ttm_bo.c

Lines changed: 40 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -280,14 +280,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
280280
ret = 0;
281281
}
282282

283-
if (ret || unlikely(list_empty(&bo->ddestroy))) {
283+
if (ret) {
284284
if (unlock_resv)
285285
dma_resv_unlock(bo->base.resv);
286286
spin_unlock(&bo->bdev->lru_lock);
287287
return ret;
288288
}
289289

290-
list_del_init(&bo->ddestroy);
291290
spin_unlock(&bo->bdev->lru_lock);
292291
ttm_bo_cleanup_memtype_use(bo);
293292

@@ -300,47 +299,21 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
300299
}
301300

302301
/*
303-
* Traverse the delayed list, and call ttm_bo_cleanup_refs on all
304-
* encountered buffers.
302+
* Block for the dma_resv object to become idle, lock the buffer and clean up
303+
* the resource and tt object.
305304
*/
306-
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
305+
static void ttm_bo_delayed_delete(struct work_struct *work)
307306
{
308-
struct list_head removed;
309-
bool empty;
310-
311-
INIT_LIST_HEAD(&removed);
312-
313-
spin_lock(&bdev->lru_lock);
314-
while (!list_empty(&bdev->ddestroy)) {
315-
struct ttm_buffer_object *bo;
316-
317-
bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object,
318-
ddestroy);
319-
list_move_tail(&bo->ddestroy, &removed);
320-
if (!ttm_bo_get_unless_zero(bo))
321-
continue;
322-
323-
if (remove_all || bo->base.resv != &bo->base._resv) {
324-
spin_unlock(&bdev->lru_lock);
325-
dma_resv_lock(bo->base.resv, NULL);
326-
327-
spin_lock(&bdev->lru_lock);
328-
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
329-
330-
} else if (dma_resv_trylock(bo->base.resv)) {
331-
ttm_bo_cleanup_refs(bo, false, !remove_all, true);
332-
} else {
333-
spin_unlock(&bdev->lru_lock);
334-
}
307+
struct ttm_buffer_object *bo;
335308

336-
ttm_bo_put(bo);
337-
spin_lock(&bdev->lru_lock);
338-
}
339-
list_splice_tail(&removed, &bdev->ddestroy);
340-
empty = list_empty(&bdev->ddestroy);
341-
spin_unlock(&bdev->lru_lock);
309+
bo = container_of(work, typeof(*bo), delayed_delete);
342310

343-
return empty;
311+
dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
312+
MAX_SCHEDULE_TIMEOUT);
313+
dma_resv_lock(bo->base.resv, NULL);
314+
ttm_bo_cleanup_memtype_use(bo);
315+
dma_resv_unlock(bo->base.resv);
316+
ttm_bo_put(bo);
344317
}
345318

346319
static void ttm_bo_release(struct kref *kref)
@@ -369,44 +342,40 @@ static void ttm_bo_release(struct kref *kref)
369342

370343
drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node);
371344
ttm_mem_io_free(bdev, bo->resource);
372-
}
373-
374-
if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) ||
375-
!dma_resv_trylock(bo->base.resv)) {
376-
/* The BO is not idle, resurrect it for delayed destroy */
377-
ttm_bo_flush_all_fences(bo);
378-
bo->deleted = true;
379345

380-
spin_lock(&bo->bdev->lru_lock);
346+
if (!dma_resv_test_signaled(bo->base.resv,
347+
DMA_RESV_USAGE_BOOKKEEP) ||
348+
!dma_resv_trylock(bo->base.resv)) {
349+
/* The BO is not idle, resurrect it for delayed destroy */
350+
ttm_bo_flush_all_fences(bo);
351+
bo->deleted = true;
381352

382-
/*
383-
* Make pinned bos immediately available to
384-
* shrinkers, now that they are queued for
385-
* destruction.
386-
*
387-
* FIXME: QXL is triggering this. Can be removed when the
388-
* driver is fixed.
389-
*/
390-
if (bo->pin_count) {
391-
bo->pin_count = 0;
392-
ttm_resource_move_to_lru_tail(bo->resource);
393-
}
353+
spin_lock(&bo->bdev->lru_lock);
394354

395-
kref_init(&bo->kref);
396-
list_add_tail(&bo->ddestroy, &bdev->ddestroy);
397-
spin_unlock(&bo->bdev->lru_lock);
355+
/*
356+
* Make pinned bos immediately available to
357+
* shrinkers, now that they are queued for
358+
* destruction.
359+
*
360+
* FIXME: QXL is triggering this. Can be removed when the
361+
* driver is fixed.
362+
*/
363+
if (bo->pin_count) {
364+
bo->pin_count = 0;
365+
ttm_resource_move_to_lru_tail(bo->resource);
366+
}
398367

399-
schedule_delayed_work(&bdev->wq,
400-
((HZ / 100) < 1) ? 1 : HZ / 100);
401-
return;
402-
}
368+
kref_init(&bo->kref);
369+
spin_unlock(&bo->bdev->lru_lock);
403370

404-
spin_lock(&bo->bdev->lru_lock);
405-
list_del(&bo->ddestroy);
406-
spin_unlock(&bo->bdev->lru_lock);
371+
INIT_WORK(&bo->delayed_delete, ttm_bo_delayed_delete);
372+
queue_work(bdev->wq, &bo->delayed_delete);
373+
return;
374+
}
407375

408-
ttm_bo_cleanup_memtype_use(bo);
409-
dma_resv_unlock(bo->base.resv);
376+
ttm_bo_cleanup_memtype_use(bo);
377+
dma_resv_unlock(bo->base.resv);
378+
}
410379

411380
atomic_dec(&ttm_glob.bo_count);
412381
bo->destroy(bo);
@@ -946,7 +915,6 @@ int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo,
946915
int ret;
947916

948917
kref_init(&bo->kref);
949-
INIT_LIST_HEAD(&bo->ddestroy);
950918
bo->bdev = bdev;
951919
bo->type = type;
952920
bo->page_alignment = alignment;

drivers/gpu/drm/ttm/ttm_bo_util.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
230230
*/
231231

232232
atomic_inc(&ttm_glob.bo_count);
233-
INIT_LIST_HEAD(&fbo->base.ddestroy);
234233
drm_vma_node_reset(&fbo->base.base.vma_node);
235234

236235
kref_init(&fbo->base.kref);

drivers/gpu/drm/ttm/ttm_device.c

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -175,16 +175,6 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
175175
}
176176
EXPORT_SYMBOL(ttm_device_swapout);
177177

178-
static void ttm_device_delayed_workqueue(struct work_struct *work)
179-
{
180-
struct ttm_device *bdev =
181-
container_of(work, struct ttm_device, wq.work);
182-
183-
if (!ttm_bo_delayed_delete(bdev, false))
184-
schedule_delayed_work(&bdev->wq,
185-
((HZ / 100) < 1) ? 1 : HZ / 100);
186-
}
187-
188178
/**
189179
* ttm_device_init
190180
*
@@ -215,15 +205,19 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,
215205
if (ret)
216206
return ret;
217207

208+
bdev->wq = alloc_workqueue("ttm", WQ_MEM_RECLAIM | WQ_HIGHPRI, 16);
209+
if (!bdev->wq) {
210+
ttm_global_release();
211+
return -ENOMEM;
212+
}
213+
218214
bdev->funcs = funcs;
219215

220216
ttm_sys_man_init(bdev);
221217
ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32);
222218

223219
bdev->vma_manager = vma_manager;
224-
INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
225220
spin_lock_init(&bdev->lru_lock);
226-
INIT_LIST_HEAD(&bdev->ddestroy);
227221
INIT_LIST_HEAD(&bdev->pinned);
228222
bdev->dev_mapping = mapping;
229223
mutex_lock(&ttm_global_mutex);
@@ -247,10 +241,8 @@ void ttm_device_fini(struct ttm_device *bdev)
247241
list_del(&bdev->device_list);
248242
mutex_unlock(&ttm_global_mutex);
249243

250-
cancel_delayed_work_sync(&bdev->wq);
251-
252-
if (ttm_bo_delayed_delete(bdev, true))
253-
pr_debug("Delayed destroy list was clean\n");
244+
drain_workqueue(bdev->wq);
245+
destroy_workqueue(bdev->wq);
254246

255247
spin_lock(&bdev->lru_lock);
256248
for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)

include/drm/ttm/ttm_bo_api.h

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@ struct ttm_tt;
9292
* @ttm: TTM structure holding system pages.
9393
* @evicted: Whether the object was evicted without user-space knowing.
9494
* @deleted: True if the object is only a zombie and already deleted.
95-
* @ddestroy: List head for the delayed destroy list.
9695
* @swap: List head for swap LRU list.
9796
* @offset: The current GPU offset, which can have different meanings
9897
* depending on the memory type. For SYSTEM type memory, it should be 0.
@@ -135,19 +134,14 @@ struct ttm_buffer_object {
135134
struct ttm_tt *ttm;
136135
bool deleted;
137136
struct ttm_lru_bulk_move *bulk_move;
137+
unsigned priority;
138+
unsigned pin_count;
138139

139140
/**
140-
* Members protected by the bdev::lru_lock.
141-
*/
142-
143-
struct list_head ddestroy;
144-
145-
/**
146-
* Members protected by a bo reservation.
141+
* @delayed_delete: Work item used when we can't delete the BO
142+
* immediately
147143
*/
148-
149-
unsigned priority;
150-
unsigned pin_count;
144+
struct work_struct delayed_delete;
151145

152146
/**
153147
* Special members that are protected by the reserve lock
@@ -448,8 +442,6 @@ void ttm_bo_vm_close(struct vm_area_struct *vma);
448442

449443
int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
450444
void *buf, int len, int write);
451-
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
452-
453445
vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
454446

455447
#endif

include/drm/ttm/ttm_device.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,6 @@ struct ttm_device {
251251
*/
252252
spinlock_t lru_lock;
253253

254-
/**
255-
* @ddestroy: Destroyed but not yet cleaned up buffer objects.
256-
*/
257-
struct list_head ddestroy;
258-
259254
/**
260255
* @pinned: Buffer objects which are pinned and so not on any LRU list.
261256
*/
@@ -270,7 +265,7 @@ struct ttm_device {
270265
/**
271266
* @wq: Work queue structure for the delayed delete workqueue.
272267
*/
273-
struct delayed_work wq;
268+
struct workqueue_struct *wq;
274269
};
275270

276271
int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags);

0 commit comments

Comments
 (0)