|
5 | 5 | #include <linux/spinlock.h> |
6 | 6 |
|
7 | 7 | #include <linux/mm.h> |
| 8 | +#include <linux/memfd.h> |
8 | 9 | #include <linux/memremap.h> |
9 | 10 | #include <linux/pagemap.h> |
10 | 11 | #include <linux/rmap.h> |
|
17 | 18 | #include <linux/hugetlb.h> |
18 | 19 | #include <linux/migrate.h> |
19 | 20 | #include <linux/mm_inline.h> |
| 21 | +#include <linux/pagevec.h> |
20 | 22 | #include <linux/sched/mm.h> |
21 | 23 | #include <linux/shmem_fs.h> |
22 | 24 |
|
@@ -3764,3 +3766,140 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, |
3764 | 3766 | &locked, gup_flags); |
3765 | 3767 | } |
3766 | 3768 | EXPORT_SYMBOL(pin_user_pages_unlocked); |
| 3769 | + |
| 3770 | +/** |
| 3771 | + * memfd_pin_folios() - pin folios associated with a memfd |
| 3772 | + * @memfd: the memfd whose folios are to be pinned |
| 3773 | + * @start: the first memfd offset |
| 3774 | + * @end: the last memfd offset (inclusive) |
| 3775 | + * @folios: array that receives pointers to the folios pinned |
| 3776 | + * @max_folios: maximum number of entries in @folios |
| 3777 | + * @offset: the offset into the first folio |
| 3778 | + * |
| 3779 | + * Attempt to pin folios associated with a memfd in the contiguous range |
| 3780 | + * [start, end]. Given that a memfd is either backed by shmem or hugetlb, |
| 3781 | + * the folios can either be found in the page cache or need to be allocated |
| 3782 | + * if necessary. Once the folios are located, they are all pinned via |
| 3783 | + * FOLL_PIN and @offset is populatedwith the offset into the first folio. |
| 3784 | + * And, eventually, these pinned folios must be released either using |
| 3785 | + * unpin_folios() or unpin_folio(). |
| 3786 | + * |
| 3787 | + * It must be noted that the folios may be pinned for an indefinite amount |
| 3788 | + * of time. And, in most cases, the duration of time they may stay pinned |
| 3789 | + * would be controlled by the userspace. This behavior is effectively the |
| 3790 | + * same as using FOLL_LONGTERM with other GUP APIs. |
| 3791 | + * |
| 3792 | + * Returns number of folios pinned, which could be less than @max_folios |
| 3793 | + * as it depends on the folio sizes that cover the range [start, end]. |
| 3794 | + * If no folios were pinned, it returns -errno. |
| 3795 | + */ |
| 3796 | +long memfd_pin_folios(struct file *memfd, loff_t start, loff_t end, |
| 3797 | + struct folio **folios, unsigned int max_folios, |
| 3798 | + pgoff_t *offset) |
| 3799 | +{ |
| 3800 | + unsigned int flags, nr_folios, nr_found; |
| 3801 | + unsigned int i, pgshift = PAGE_SHIFT; |
| 3802 | + pgoff_t start_idx, end_idx, next_idx; |
| 3803 | + struct folio *folio = NULL; |
| 3804 | + struct folio_batch fbatch; |
| 3805 | + struct hstate *h; |
| 3806 | + long ret = -EINVAL; |
| 3807 | + |
| 3808 | + if (start < 0 || start > end || !max_folios) |
| 3809 | + return -EINVAL; |
| 3810 | + |
| 3811 | + if (!memfd) |
| 3812 | + return -EINVAL; |
| 3813 | + |
| 3814 | + if (!shmem_file(memfd) && !is_file_hugepages(memfd)) |
| 3815 | + return -EINVAL; |
| 3816 | + |
| 3817 | + if (end >= i_size_read(file_inode(memfd))) |
| 3818 | + return -EINVAL; |
| 3819 | + |
| 3820 | + if (is_file_hugepages(memfd)) { |
| 3821 | + h = hstate_file(memfd); |
| 3822 | + pgshift = huge_page_shift(h); |
| 3823 | + } |
| 3824 | + |
| 3825 | + flags = memalloc_pin_save(); |
| 3826 | + do { |
| 3827 | + nr_folios = 0; |
| 3828 | + start_idx = start >> pgshift; |
| 3829 | + end_idx = end >> pgshift; |
| 3830 | + if (is_file_hugepages(memfd)) { |
| 3831 | + start_idx <<= huge_page_order(h); |
| 3832 | + end_idx <<= huge_page_order(h); |
| 3833 | + } |
| 3834 | + |
| 3835 | + folio_batch_init(&fbatch); |
| 3836 | + while (start_idx <= end_idx && nr_folios < max_folios) { |
| 3837 | + /* |
| 3838 | + * In most cases, we should be able to find the folios |
| 3839 | + * in the page cache. If we cannot find them for some |
| 3840 | + * reason, we try to allocate them and add them to the |
| 3841 | + * page cache. |
| 3842 | + */ |
| 3843 | + nr_found = filemap_get_folios_contig(memfd->f_mapping, |
| 3844 | + &start_idx, |
| 3845 | + end_idx, |
| 3846 | + &fbatch); |
| 3847 | + if (folio) { |
| 3848 | + folio_put(folio); |
| 3849 | + folio = NULL; |
| 3850 | + } |
| 3851 | + |
| 3852 | + next_idx = 0; |
| 3853 | + for (i = 0; i < nr_found; i++) { |
| 3854 | + /* |
| 3855 | + * As there can be multiple entries for a |
| 3856 | + * given folio in the batch returned by |
| 3857 | + * filemap_get_folios_contig(), the below |
| 3858 | + * check is to ensure that we pin and return a |
| 3859 | + * unique set of folios between start and end. |
| 3860 | + */ |
| 3861 | + if (next_idx && |
| 3862 | + next_idx != folio_index(fbatch.folios[i])) |
| 3863 | + continue; |
| 3864 | + |
| 3865 | + folio = page_folio(&fbatch.folios[i]->page); |
| 3866 | + |
| 3867 | + if (try_grab_folio(folio, 1, FOLL_PIN)) { |
| 3868 | + folio_batch_release(&fbatch); |
| 3869 | + ret = -EINVAL; |
| 3870 | + goto err; |
| 3871 | + } |
| 3872 | + |
| 3873 | + if (nr_folios == 0) |
| 3874 | + *offset = offset_in_folio(folio, start); |
| 3875 | + |
| 3876 | + folios[nr_folios] = folio; |
| 3877 | + next_idx = folio_next_index(folio); |
| 3878 | + if (++nr_folios == max_folios) |
| 3879 | + break; |
| 3880 | + } |
| 3881 | + |
| 3882 | + folio = NULL; |
| 3883 | + folio_batch_release(&fbatch); |
| 3884 | + if (!nr_found) { |
| 3885 | + folio = memfd_alloc_folio(memfd, start_idx); |
| 3886 | + if (IS_ERR(folio)) { |
| 3887 | + ret = PTR_ERR(folio); |
| 3888 | + if (ret != -EEXIST) |
| 3889 | + goto err; |
| 3890 | + } |
| 3891 | + } |
| 3892 | + } |
| 3893 | + |
| 3894 | + ret = check_and_migrate_movable_folios(nr_folios, folios); |
| 3895 | + } while (ret == -EAGAIN); |
| 3896 | + |
| 3897 | + memalloc_pin_restore(flags); |
| 3898 | + return ret ? ret : nr_folios; |
| 3899 | +err: |
| 3900 | + memalloc_pin_restore(flags); |
| 3901 | + unpin_folios(folios, nr_folios); |
| 3902 | + |
| 3903 | + return ret; |
| 3904 | +} |
| 3905 | +EXPORT_SYMBOL_GPL(memfd_pin_folios); |
0 commit comments