|
13 | 13 |
|
14 | 14 | #include <linux/device.h> |
15 | 15 | #include <linux/eventfd.h> |
| 16 | +#include <linux/file.h> |
16 | 17 | #include <linux/interrupt.h> |
17 | 18 | #include <linux/iommu.h> |
18 | 19 | #include <linux/module.h> |
@@ -227,6 +228,110 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) |
227 | 228 | return 0; |
228 | 229 | } |
229 | 230 |
|
| 231 | +static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) |
| 232 | +{ |
| 233 | + (*(int *)data)++; |
| 234 | + return 0; |
| 235 | +} |
| 236 | + |
| 237 | +struct vfio_pci_fill_info { |
| 238 | + int max; |
| 239 | + int cur; |
| 240 | + struct vfio_pci_dependent_device *devices; |
| 241 | +}; |
| 242 | + |
| 243 | +static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) |
| 244 | +{ |
| 245 | + struct vfio_pci_fill_info *fill = data; |
| 246 | + struct iommu_group *iommu_group; |
| 247 | + |
| 248 | + if (fill->cur == fill->max) |
| 249 | + return -EAGAIN; /* Something changed, try again */ |
| 250 | + |
| 251 | + iommu_group = iommu_group_get(&pdev->dev); |
| 252 | + if (!iommu_group) |
| 253 | + return -EPERM; /* Cannot reset non-isolated devices */ |
| 254 | + |
| 255 | + fill->devices[fill->cur].group_id = iommu_group_id(iommu_group); |
| 256 | + fill->devices[fill->cur].segment = pci_domain_nr(pdev->bus); |
| 257 | + fill->devices[fill->cur].bus = pdev->bus->number; |
| 258 | + fill->devices[fill->cur].devfn = pdev->devfn; |
| 259 | + fill->cur++; |
| 260 | + iommu_group_put(iommu_group); |
| 261 | + return 0; |
| 262 | +} |
| 263 | + |
| 264 | +struct vfio_pci_group_entry { |
| 265 | + struct vfio_group *group; |
| 266 | + int id; |
| 267 | +}; |
| 268 | + |
| 269 | +struct vfio_pci_group_info { |
| 270 | + int count; |
| 271 | + struct vfio_pci_group_entry *groups; |
| 272 | +}; |
| 273 | + |
| 274 | +static int vfio_pci_validate_devs(struct pci_dev *pdev, void *data) |
| 275 | +{ |
| 276 | + struct vfio_pci_group_info *info = data; |
| 277 | + struct iommu_group *group; |
| 278 | + int id, i; |
| 279 | + |
| 280 | + group = iommu_group_get(&pdev->dev); |
| 281 | + if (!group) |
| 282 | + return -EPERM; |
| 283 | + |
| 284 | + id = iommu_group_id(group); |
| 285 | + |
| 286 | + for (i = 0; i < info->count; i++) |
| 287 | + if (info->groups[i].id == id) |
| 288 | + break; |
| 289 | + |
| 290 | + iommu_group_put(group); |
| 291 | + |
| 292 | + return (i == info->count) ? -EINVAL : 0; |
| 293 | +} |
| 294 | + |
| 295 | +static bool vfio_pci_dev_below_slot(struct pci_dev *pdev, struct pci_slot *slot) |
| 296 | +{ |
| 297 | + for (; pdev; pdev = pdev->bus->self) |
| 298 | + if (pdev->bus == slot->bus) |
| 299 | + return (pdev->slot == slot); |
| 300 | + return false; |
| 301 | +} |
| 302 | + |
| 303 | +struct vfio_pci_walk_info { |
| 304 | + int (*fn)(struct pci_dev *, void *data); |
| 305 | + void *data; |
| 306 | + struct pci_dev *pdev; |
| 307 | + bool slot; |
| 308 | + int ret; |
| 309 | +}; |
| 310 | + |
| 311 | +static int vfio_pci_walk_wrapper(struct pci_dev *pdev, void *data) |
| 312 | +{ |
| 313 | + struct vfio_pci_walk_info *walk = data; |
| 314 | + |
| 315 | + if (!walk->slot || vfio_pci_dev_below_slot(pdev, walk->pdev->slot)) |
| 316 | + walk->ret = walk->fn(pdev, walk->data); |
| 317 | + |
| 318 | + return walk->ret; |
| 319 | +} |
| 320 | + |
| 321 | +static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, |
| 322 | + int (*fn)(struct pci_dev *, |
| 323 | + void *data), void *data, |
| 324 | + bool slot) |
| 325 | +{ |
| 326 | + struct vfio_pci_walk_info walk = { |
| 327 | + .fn = fn, .data = data, .pdev = pdev, .slot = slot, .ret = 0, |
| 328 | + }; |
| 329 | + |
| 330 | + pci_walk_bus(pdev->bus, vfio_pci_walk_wrapper, &walk); |
| 331 | + |
| 332 | + return walk.ret; |
| 333 | +} |
| 334 | + |
230 | 335 | static long vfio_pci_ioctl(void *device_data, |
231 | 336 | unsigned int cmd, unsigned long arg) |
232 | 337 | { |
@@ -407,10 +512,189 @@ static long vfio_pci_ioctl(void *device_data, |
407 | 512 |
|
408 | 513 | return ret; |
409 | 514 |
|
410 | | - } else if (cmd == VFIO_DEVICE_RESET) |
| 515 | + } else if (cmd == VFIO_DEVICE_RESET) { |
411 | 516 | return vdev->reset_works ? |
412 | 517 | pci_reset_function(vdev->pdev) : -EINVAL; |
413 | 518 |
|
| 519 | + } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) { |
| 520 | + struct vfio_pci_hot_reset_info hdr; |
| 521 | + struct vfio_pci_fill_info fill = { 0 }; |
| 522 | + struct vfio_pci_dependent_device *devices = NULL; |
| 523 | + bool slot = false; |
| 524 | + int ret = 0; |
| 525 | + |
| 526 | + minsz = offsetofend(struct vfio_pci_hot_reset_info, count); |
| 527 | + |
| 528 | + if (copy_from_user(&hdr, (void __user *)arg, minsz)) |
| 529 | + return -EFAULT; |
| 530 | + |
| 531 | + if (hdr.argsz < minsz) |
| 532 | + return -EINVAL; |
| 533 | + |
| 534 | + hdr.flags = 0; |
| 535 | + |
| 536 | + /* Can we do a slot or bus reset or neither? */ |
| 537 | + if (!pci_probe_reset_slot(vdev->pdev->slot)) |
| 538 | + slot = true; |
| 539 | + else if (pci_probe_reset_bus(vdev->pdev->bus)) |
| 540 | + return -ENODEV; |
| 541 | + |
| 542 | + /* How many devices are affected? */ |
| 543 | + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, |
| 544 | + vfio_pci_count_devs, |
| 545 | + &fill.max, slot); |
| 546 | + if (ret) |
| 547 | + return ret; |
| 548 | + |
| 549 | + WARN_ON(!fill.max); /* Should always be at least one */ |
| 550 | + |
| 551 | + /* |
| 552 | + * If there's enough space, fill it now, otherwise return |
| 553 | + * -ENOSPC and the number of devices affected. |
| 554 | + */ |
| 555 | + if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) { |
| 556 | + ret = -ENOSPC; |
| 557 | + hdr.count = fill.max; |
| 558 | + goto reset_info_exit; |
| 559 | + } |
| 560 | + |
| 561 | + devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL); |
| 562 | + if (!devices) |
| 563 | + return -ENOMEM; |
| 564 | + |
| 565 | + fill.devices = devices; |
| 566 | + |
| 567 | + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, |
| 568 | + vfio_pci_fill_devs, |
| 569 | + &fill, slot); |
| 570 | + |
| 571 | + /* |
| 572 | + * If a device was removed between counting and filling, |
| 573 | + * we may come up short of fill.max. If a device was |
| 574 | + * added, we'll have a return of -EAGAIN above. |
| 575 | + */ |
| 576 | + if (!ret) |
| 577 | + hdr.count = fill.cur; |
| 578 | + |
| 579 | +reset_info_exit: |
| 580 | + if (copy_to_user((void __user *)arg, &hdr, minsz)) |
| 581 | + ret = -EFAULT; |
| 582 | + |
| 583 | + if (!ret) { |
| 584 | + if (copy_to_user((void __user *)(arg + minsz), devices, |
| 585 | + hdr.count * sizeof(*devices))) |
| 586 | + ret = -EFAULT; |
| 587 | + } |
| 588 | + |
| 589 | + kfree(devices); |
| 590 | + return ret; |
| 591 | + |
| 592 | + } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) { |
| 593 | + struct vfio_pci_hot_reset hdr; |
| 594 | + int32_t *group_fds; |
| 595 | + struct vfio_pci_group_entry *groups; |
| 596 | + struct vfio_pci_group_info info; |
| 597 | + bool slot = false; |
| 598 | + int i, count = 0, ret = 0; |
| 599 | + |
| 600 | + minsz = offsetofend(struct vfio_pci_hot_reset, count); |
| 601 | + |
| 602 | + if (copy_from_user(&hdr, (void __user *)arg, minsz)) |
| 603 | + return -EFAULT; |
| 604 | + |
| 605 | + if (hdr.argsz < minsz || hdr.flags) |
| 606 | + return -EINVAL; |
| 607 | + |
| 608 | + /* Can we do a slot or bus reset or neither? */ |
| 609 | + if (!pci_probe_reset_slot(vdev->pdev->slot)) |
| 610 | + slot = true; |
| 611 | + else if (pci_probe_reset_bus(vdev->pdev->bus)) |
| 612 | + return -ENODEV; |
| 613 | + |
| 614 | + /* |
| 615 | + * We can't let userspace give us an arbitrarily large |
| 616 | + * buffer to copy, so verify how many we think there |
| 617 | + * could be. Note groups can have multiple devices so |
| 618 | + * one group per device is the max. |
| 619 | + */ |
| 620 | + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, |
| 621 | + vfio_pci_count_devs, |
| 622 | + &count, slot); |
| 623 | + if (ret) |
| 624 | + return ret; |
| 625 | + |
| 626 | + /* Somewhere between 1 and count is OK */ |
| 627 | + if (!hdr.count || hdr.count > count) |
| 628 | + return -EINVAL; |
| 629 | + |
| 630 | + group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL); |
| 631 | + groups = kcalloc(hdr.count, sizeof(*groups), GFP_KERNEL); |
| 632 | + if (!group_fds || !groups) { |
| 633 | + kfree(group_fds); |
| 634 | + kfree(groups); |
| 635 | + return -ENOMEM; |
| 636 | + } |
| 637 | + |
| 638 | + if (copy_from_user(group_fds, (void __user *)(arg + minsz), |
| 639 | + hdr.count * sizeof(*group_fds))) { |
| 640 | + kfree(group_fds); |
| 641 | + kfree(groups); |
| 642 | + return -EFAULT; |
| 643 | + } |
| 644 | + |
| 645 | + /* |
| 646 | + * For each group_fd, get the group through the vfio external |
| 647 | + * user interface and store the group and iommu ID. This |
| 648 | + * ensures the group is held across the reset. |
| 649 | + */ |
| 650 | + for (i = 0; i < hdr.count; i++) { |
| 651 | + struct vfio_group *group; |
| 652 | + struct fd f = fdget(group_fds[i]); |
| 653 | + if (!f.file) { |
| 654 | + ret = -EBADF; |
| 655 | + break; |
| 656 | + } |
| 657 | + |
| 658 | + group = vfio_group_get_external_user(f.file); |
| 659 | + fdput(f); |
| 660 | + if (IS_ERR(group)) { |
| 661 | + ret = PTR_ERR(group); |
| 662 | + break; |
| 663 | + } |
| 664 | + |
| 665 | + groups[i].group = group; |
| 666 | + groups[i].id = vfio_external_user_iommu_id(group); |
| 667 | + } |
| 668 | + |
| 669 | + kfree(group_fds); |
| 670 | + |
| 671 | + /* release reference to groups on error */ |
| 672 | + if (ret) |
| 673 | + goto hot_reset_release; |
| 674 | + |
| 675 | + info.count = hdr.count; |
| 676 | + info.groups = groups; |
| 677 | + |
| 678 | + /* |
| 679 | + * Test whether all the affected devices are contained |
| 680 | + * by the set of groups provided by the user. |
| 681 | + */ |
| 682 | + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, |
| 683 | + vfio_pci_validate_devs, |
| 684 | + &info, slot); |
| 685 | + if (!ret) |
| 686 | + /* User has access, do the reset */ |
| 687 | + ret = slot ? pci_reset_slot(vdev->pdev->slot) : |
| 688 | + pci_reset_bus(vdev->pdev->bus); |
| 689 | + |
| 690 | +hot_reset_release: |
| 691 | + for (i--; i >= 0; i--) |
| 692 | + vfio_group_put_external_user(groups[i].group); |
| 693 | + |
| 694 | + kfree(groups); |
| 695 | + return ret; |
| 696 | + } |
| 697 | + |
414 | 698 | return -ENOTTY; |
415 | 699 | } |
416 | 700 |
|
|
0 commit comments