diff --git a/MAINTAINERS b/MAINTAINERS index ec57c42ed5440d..4720b97035ac30 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21955,6 +21955,12 @@ L: kvm@vger.kernel.org S: Maintained F: drivers/vfio/pci/mlx5/ +VFIO NVIDIA PCI DRIVER +M: Ankit Agrawal +L: kvm@vger.kernel.org +S: Maintained +F: drivers/vfio/pci/nvgpu/ + VGA_SWITCHEROO R: Lukas Wunner S: Maintained diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index f9d0c908e738c3..ade18b0ffb7b12 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -59,4 +59,6 @@ source "drivers/vfio/pci/mlx5/Kconfig" source "drivers/vfio/pci/hisilicon/Kconfig" +source "drivers/vfio/pci/nvgpu/Kconfig" + endif diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 24c524224da5a3..0c93d452d0daa0 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -11,3 +11,5 @@ obj-$(CONFIG_VFIO_PCI) += vfio-pci.o obj-$(CONFIG_MLX5_VFIO_PCI) += mlx5/ obj-$(CONFIG_HISI_ACC_VFIO_PCI) += hisilicon/ + +obj-$(CONFIG_NVGPU_VFIO_PCI) += nvgpu/ diff --git a/drivers/vfio/pci/nvgpu/Kconfig b/drivers/vfio/pci/nvgpu/Kconfig new file mode 100644 index 00000000000000..066f764f7c5f2b --- /dev/null +++ b/drivers/vfio/pci/nvgpu/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NVGPU_VFIO_PCI + tristate "VFIO support for the GPU in the NVIDIA Grace Hopper Superchip" + depends on ARM64 || (COMPILE_TEST && 64BIT) + select VFIO_PCI_CORE + help + VFIO support for the GPU in the NVIDIA Grace Hopper Superchip is + required to assign the GPU device to a VM using KVM/qemu/etc. + + If you don't know what to do here, say N. diff --git a/drivers/vfio/pci/nvgpu/Makefile b/drivers/vfio/pci/nvgpu/Makefile new file mode 100644 index 00000000000000..00fd3a078218bd --- /dev/null +++ b/drivers/vfio/pci/nvgpu/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_NVGPU_VFIO_PCI) += nvgpu-vfio-pci.o +nvgpu-vfio-pci-y := main.o diff --git a/drivers/vfio/pci/nvgpu/main.c b/drivers/vfio/pci/nvgpu/main.c new file mode 100644 index 00000000000000..2dd8cc6e014548 --- /dev/null +++ b/drivers/vfio/pci/nvgpu/main.c @@ -0,0 +1,255 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include +#include + +#define DUMMY_PFN \ + (((nvdev->mem_prop.hpa + nvdev->mem_prop.mem_length) >> PAGE_SHIFT) - 1) + +struct dev_mem_properties { + uint64_t hpa; + uint64_t mem_length; + int bar1_start_offset; +}; + +struct nvgpu_vfio_pci_core_device { + struct vfio_pci_core_device core_device; + struct dev_mem_properties mem_prop; +}; + +static int vfio_get_bar1_start_offset(struct vfio_pci_core_device *vdev) +{ + u8 val = 0; + + pci_read_config_byte(vdev->pdev, 0x10, &val); + /* + * The BAR1 start offset in the PCI config space depends on the BAR0size. + * Check if the BAR0 is 64b and return the approproiate BAR1 offset. + */ + if (val & PCI_BASE_ADDRESS_MEM_TYPE_64) + return VFIO_PCI_BAR2_REGION_INDEX; + + return VFIO_PCI_BAR1_REGION_INDEX; +} + +static int nvgpu_vfio_pci_open_device(struct vfio_device *core_vdev) +{ + struct nvgpu_vfio_pci_core_device *nvdev = container_of( + core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev); + struct vfio_pci_core_device *vdev = + container_of(core_vdev, struct vfio_pci_core_device, vdev); + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + vfio_pci_core_finish_enable(vdev); + + nvdev->mem_prop.bar1_start_offset = vfio_get_bar1_start_offset(vdev); + + return ret; +} + +int nvgpu_vfio_pci_mmap(struct vfio_device *core_vdev, + struct vm_area_struct *vma) +{ + struct nvgpu_vfio_pci_core_device *nvdev = container_of( + core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev); + + unsigned long start_pfn; + unsigned int index; + u64 req_len, pgoff; + int ret = 0; + + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + if (index != nvdev->mem_prop.bar1_start_offset) + return vfio_pci_core_mmap(core_vdev, vma); + + /* + * Request to mmap the BAR1. Map to the CPU accessible memory on the + * GPU using the memory information gathered from the system ACPI + * tables. + */ + start_pfn = nvdev->mem_prop.hpa >> PAGE_SHIFT; + req_len = vma->vm_end - vma->vm_start; + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + if (pgoff >= (nvdev->mem_prop.mem_length >> PAGE_SHIFT)) + return -EINVAL; + + /* + * Perform a PFN map to the memory. The device BAR1 is backed by the + * GPU memory now. Check that the mapping does not overflow out of + * the GPU memory size. + */ + ret = remap_pfn_range(vma, vma->vm_start, start_pfn + pgoff, + min(req_len, nvdev->mem_prop.mem_length - pgoff), + vma->vm_page_prot); + if (ret) + return ret; + + vma->vm_pgoff = start_pfn + pgoff; + + return 0; +} + +long nvgpu_vfio_pci_ioctl(struct vfio_device *core_vdev, unsigned int cmd, + unsigned long arg) +{ + struct nvgpu_vfio_pci_core_device *nvdev = container_of( + core_vdev, struct nvgpu_vfio_pci_core_device, core_device.vdev); + + unsigned long minsz = offsetofend(struct vfio_region_info, offset); + struct vfio_region_info info; + + switch (cmd) { + case VFIO_DEVICE_GET_REGION_INFO: + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + if (info.index == nvdev->mem_prop.bar1_start_offset) { + /* + * Request to determine the BAR1 region information. Send the + * GPU memory information. + */ + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = nvdev->mem_prop.mem_length; + info.flags = VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP; + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; + } + + if (info.index == nvdev->mem_prop.bar1_start_offset + 1) { + /* + * The BAR1 region is 64b. Ignore this access. + */ + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = 0; + info.flags = 0; + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; + } + + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + + default: + return vfio_pci_core_ioctl(core_vdev, cmd, arg); + } +} + +static const struct vfio_device_ops nvgpu_vfio_pci_ops = { + .name = "nvgpu-vfio-pci", + .init = vfio_pci_core_init_dev, + .release = vfio_pci_core_release_dev, + .open_device = nvgpu_vfio_pci_open_device, + .close_device = vfio_pci_core_close_device, + .ioctl = nvgpu_vfio_pci_ioctl, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = nvgpu_vfio_pci_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, +}; + +static struct nvgpu_vfio_pci_core_device *nvgpu_drvdata(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev); + + return container_of(core_device, struct nvgpu_vfio_pci_core_device, + core_device); +} + +static int +nvgpu_vfio_pci_fetch_memory_property(struct pci_dev *pdev, + struct nvgpu_vfio_pci_core_device *nvdev) +{ + int ret = 0; + + /* + * The memory information is present in the system ACPI tables as DSD + * properties nvidia,gpu-mem-base-pa and nvidia,gpu-mem-size. + */ + ret = device_property_read_u64(&(pdev->dev), "nvidia,gpu-mem-base-pa", + &(nvdev->mem_prop.hpa)); + if (ret) + return ret; + + ret = device_property_read_u64(&(pdev->dev), "nvidia,gpu-mem-size", + &(nvdev->mem_prop.mem_length)); + return ret; +} + +static int nvgpu_vfio_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + struct nvgpu_vfio_pci_core_device *nvdev; + int ret; + + nvdev = vfio_alloc_device(nvgpu_vfio_pci_core_device, core_device.vdev, + &pdev->dev, &nvgpu_vfio_pci_ops); + if (IS_ERR(nvdev)) + return PTR_ERR(nvdev); + + dev_set_drvdata(&pdev->dev, nvdev); + + ret = nvgpu_vfio_pci_fetch_memory_property(pdev, nvdev); + if (ret) + goto out_put_vdev; + + ret = vfio_pci_core_register_device(&nvdev->core_device); + if (ret) + goto out_put_vdev; + + return ret; + +out_put_vdev: + vfio_put_device(&nvdev->core_device.vdev); + return ret; +} + +static void nvgpu_vfio_pci_remove(struct pci_dev *pdev) +{ + struct nvgpu_vfio_pci_core_device *nvdev = nvgpu_drvdata(pdev); + struct vfio_pci_core_device *vdev = &nvdev->core_device; + + vfio_pci_core_unregister_device(vdev); + vfio_put_device(&vdev->vdev); +} + +static const struct pci_device_id nvgpu_vfio_pci_table[] = { + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2342) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2343) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_NVIDIA, 0x2345) }, + {} +}; + +MODULE_DEVICE_TABLE(pci, nvgpu_vfio_pci_table); + +static struct pci_driver nvgpu_vfio_pci_driver = { + .name = KBUILD_MODNAME, + .id_table = nvgpu_vfio_pci_table, + .probe = nvgpu_vfio_pci_probe, + .remove = nvgpu_vfio_pci_remove, + .err_handler = &vfio_pci_core_err_handlers, + .driver_managed_dma = true, +}; + +module_pci_driver(nvgpu_vfio_pci_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Ankit Agrawal "); +MODULE_AUTHOR("Aniket Agashe "); +MODULE_DESCRIPTION( + "VFIO NVGPU PF - User Level driver for NVIDIA devices with CPU coherently accessible device memory");