Skip to content

Commit

Permalink
drm/nouveau: implement new VM_BIND uAPI
Browse files Browse the repository at this point in the history
This commit provides the implementation for the new uapi motivated by the
Vulkan API. It allows user mode drivers (UMDs) to:

1) Initialize a GPU virtual address (VA) space via the new
   DRM_IOCTL_NOUVEAU_VM_INIT ioctl for UMDs to specify the portion of VA
   space managed by the kernel and userspace, respectively.

2) Allocate and free a VA space region as well as bind and unbind memory
   to the GPUs VA space via the new DRM_IOCTL_NOUVEAU_VM_BIND ioctl.
   UMDs can request the named operations to be processed either
   synchronously or asynchronously. It supports DRM syncobjs
   (incl. timelines) as synchronization mechanism. The management of the
   GPU VA mappings is implemented with the DRM GPU VA manager.

3) Execute push buffers with the new DRM_IOCTL_NOUVEAU_EXEC ioctl. The
   execution happens asynchronously. It supports DRM syncobj (incl.
   timelines) as synchronization mechanism. DRM GEM object locking is
   handled with drm_exec.

Both, DRM_IOCTL_NOUVEAU_VM_BIND and DRM_IOCTL_NOUVEAU_EXEC, use the DRM
GPU scheduler for the asynchronous paths.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Danilo Krummrich <dakr@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230804182406.5222-12-dakr@redhat.com
  • Loading branch information
dakr committed Aug 4, 2023
1 parent 6b252cf commit b88baab
Show file tree
Hide file tree
Showing 19 changed files with 3,321 additions and 69 deletions.
3 changes: 3 additions & 0 deletions Documentation/gpu/driver-uapi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@ drm/nouveau uAPI
VM_BIND / EXEC uAPI
-------------------

.. kernel-doc:: drivers/gpu/drm/nouveau/nouveau_exec.c
:doc: Overview

.. kernel-doc:: include/uapi/drm/nouveau_drm.h
3 changes: 3 additions & 0 deletions drivers/gpu/drm/nouveau/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o
nouveau-y += nouveau_sgdma.o
nouveau-y += nouveau_ttm.o
nouveau-y += nouveau_vmm.o
nouveau-y += nouveau_exec.o
nouveau-y += nouveau_sched.o
nouveau-y += nouveau_uvmm.o

# DRM - modesetting
nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/nouveau/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ config DRM_NOUVEAU
select DRM_KMS_HELPER
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
select DRM_SCHED
select I2C
select I2C_ALGOBIT
select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
Expand Down
24 changes: 24 additions & 0 deletions drivers/gpu/drm/nouveau/nouveau_abi16.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "nouveau_chan.h"
#include "nouveau_abi16.h"
#include "nouveau_vmm.h"
#include "nouveau_sched.h"

static struct nouveau_abi16 *
nouveau_abi16(struct drm_file *file_priv)
Expand Down Expand Up @@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
{
struct nouveau_abi16_ntfy *ntfy, *temp;

/* When a client exits without waiting for it's queued up jobs to
* finish it might happen that we fault the channel. This is due to
* drm_file_free() calling drm_gem_release() before the postclose()
* callback. Hence, we can't tear down this scheduler entity before
* uvmm mappings are unmapped. Currently, we can't detect this case.
*
* However, this should be rare and harmless, since the channel isn't
* needed anymore.
*/
nouveau_sched_entity_fini(&chan->sched_entity);

/* wait for all activity to stop before cleaning up */
if (chan->chan)
nouveau_channel_idle(chan->chan);
Expand Down Expand Up @@ -261,6 +273,13 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (!drm->channel)
return nouveau_abi16_put(abi16, -ENODEV);

/* If uvmm wasn't initialized until now disable it completely to prevent
* userspace from mixing up UAPIs.
*
* The client lock is already acquired by nouveau_abi16_get().
*/
__nouveau_cli_disable_uvmm_noinit(cli);

device = &abi16->device;
engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;

Expand Down Expand Up @@ -304,6 +323,11 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;

ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
drm->sched_wq);
if (ret)
goto done;

init->channel = chan->chan->chid;

if (device->info.family >= NV_DEVICE_INFO_V0_TESLA)
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/nouveau/nouveau_abi16.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap;
struct nouveau_sched_entity sched_entity;
};

struct nouveau_abi16 {
Expand Down
159 changes: 102 additions & 57 deletions drivers/gpu/drm/nouveau/nouveau_bo.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, u64 *size)

struct nouveau_bo *
nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
u32 tile_mode, u32 tile_flags)
u32 tile_mode, u32 tile_flags, bool internal)
{
struct nouveau_drm *drm = cli->drm;
struct nouveau_bo *nvbo;
Expand Down Expand Up @@ -233,68 +233,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
nvbo->force_coherent = true;
}

if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
if (!nouveau_cli_uvmm(cli) || internal) {
/* for BO noVM allocs, don't assign kinds */
if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}

nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
} else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
nvbo->comp = (tile_flags & 0x00030000) >> 16;
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}
} else {
nvbo->zeta = (tile_flags & 0x00000007);
}
nvbo->mode = tile_mode;

/* Determine the desirable target GPU page size for the buffer. */
for (i = 0; i < vmm->page_nr; i++) {
/* Because we cannot currently allow VMM maps to fail
* during buffer migration, we need to determine page
* size for the buffer up-front, and pre-allocate its
* page tables.
*
* Skip page sizes that can't support needed domains.
*/
if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
(domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
continue;
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
continue;

nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
} else
if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
nvbo->kind = (tile_flags & 0x00007f00) >> 8;
nvbo->comp = (tile_flags & 0x00030000) >> 16;
if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
/* Select this page size if it's the first that supports
* the potential memory domains, or when it's compatible
* with the requested compression settings.
*/
if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
pi = i;

/* Stop once the buffer is larger than the current page size. */
if (*size >= 1ULL << vmm->page[i].shift)
break;
}

if (WARN_ON(pi < 0)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}
} else {
nvbo->zeta = (tile_flags & 0x00000007);
}
nvbo->mode = tile_mode;
nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);

/* Determine the desirable target GPU page size for the buffer. */
for (i = 0; i < vmm->page_nr; i++) {
/* Because we cannot currently allow VMM maps to fail
* during buffer migration, we need to determine page
* size for the buffer up-front, and pre-allocate its
* page tables.
*
* Skip page sizes that can't support needed domains.
*/
if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
(domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
continue;
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
continue;

/* Select this page size if it's the first that supports
* the potential memory domains, or when it's compatible
* with the requested compression settings.
*/
if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
pi = i;

/* Stop once the buffer is larger than the current page size. */
if (*size >= 1ULL << vmm->page[i].shift)
break;
}
/* Disable compression if suitable settings couldn't be found. */
if (nvbo->comp && !vmm->page[pi].comp) {
if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
nvbo->kind = mmu->kind[nvbo->kind];
nvbo->comp = 0;
}
nvbo->page = vmm->page[pi].shift;
} else {
/* reject other tile flags when in VM mode. */
if (tile_mode)
return ERR_PTR(-EINVAL);
if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG)
return ERR_PTR(-EINVAL);

if (WARN_ON(pi < 0)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}
/* Determine the desirable target GPU page size for the buffer. */
for (i = 0; i < vmm->page_nr; i++) {
/* Because we cannot currently allow VMM maps to fail
* during buffer migration, we need to determine page
* size for the buffer up-front, and pre-allocate its
* page tables.
*
* Skip page sizes that can't support needed domains.
*/
if ((domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
continue;
if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
continue;

/* Disable compression if suitable settings couldn't be found. */
if (nvbo->comp && !vmm->page[pi].comp) {
if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
nvbo->kind = mmu->kind[nvbo->kind];
nvbo->comp = 0;
if (pi < 0)
pi = i;
/* Stop once the buffer is larger than the current page size. */
if (*size >= 1ULL << vmm->page[i].shift)
break;
}
if (WARN_ON(pi < 0)) {
kfree(nvbo);
return ERR_PTR(-EINVAL);
}
nvbo->page = vmm->page[pi].shift;
}
nvbo->page = vmm->page[pi].shift;

nouveau_bo_fixup_align(nvbo, align, size);

Expand All @@ -307,18 +342,26 @@ nouveau_bo_init(struct nouveau_bo *nvbo, u64 size, int align, u32 domain,
{
int type = sg ? ttm_bo_type_sg : ttm_bo_type_device;
int ret;
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
.resv = robj,
};

nouveau_bo_placement_set(nvbo, domain, 0);
INIT_LIST_HEAD(&nvbo->io_reserve_lru);

ret = ttm_bo_init_validate(nvbo->bo.bdev, &nvbo->bo, type,
&nvbo->placement, align >> PAGE_SHIFT, false,
ret = ttm_bo_init_reserved(nvbo->bo.bdev, &nvbo->bo, type,
&nvbo->placement, align >> PAGE_SHIFT, &ctx,
sg, robj, nouveau_bo_del_ttm);
if (ret) {
/* ttm will call nouveau_bo_del_ttm if it fails.. */
return ret;
}

if (!robj)
ttm_bo_unreserve(&nvbo->bo);

return 0;
}

Expand All @@ -332,7 +375,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
int ret;

nvbo = nouveau_bo_alloc(cli, &size, &align, domain, tile_mode,
tile_flags);
tile_flags, true);
if (IS_ERR(nvbo))
return PTR_ERR(nvbo);

Expand Down Expand Up @@ -951,6 +994,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
list_for_each_entry(vma, &nvbo->vma_list, head) {
nouveau_vma_map(vma, mem);
}
nouveau_uvmm_bo_map_all(nvbo, mem);
} else {
list_for_each_entry(vma, &nvbo->vma_list, head) {
ret = dma_resv_wait_timeout(bo->base.resv,
Expand All @@ -959,6 +1003,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
WARN_ON(ret <= 0);
nouveau_vma_unmap(vma);
}
nouveau_uvmm_bo_unmap_all(nvbo);
}

if (new_reg)
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/nouveau/nouveau_bo.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ struct nouveau_bo {
struct list_head entry;
int pbbo_index;
bool validate_mapped;
bool no_share;

/* GPU address space is independent of CPU word size */
uint64_t offset;
Expand Down Expand Up @@ -73,7 +74,7 @@ extern struct ttm_device_funcs nouveau_bo_driver;

void nouveau_bo_move_init(struct nouveau_drm *);
struct nouveau_bo *nouveau_bo_alloc(struct nouveau_cli *, u64 *size, int *align,
u32 domain, u32 tile_mode, u32 tile_flags);
u32 domain, u32 tile_mode, u32 tile_flags, bool internal);
int nouveau_bo_init(struct nouveau_bo *, u64 size, int align, u32 domain,
struct sg_table *sg, struct dma_resv *robj);
int nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 domain,
Expand Down

0 comments on commit b88baab

Please sign in to comment.