Skip to content

Commit 0c8b91e

Browse files
vivekkreddyakpm00
authored andcommitted
udmabuf: add back support for mapping hugetlb pages
A user or admin can configure a VMM (Qemu) Guest's memory to be backed by hugetlb pages for various reasons. However, a Guest OS would still allocate (and pin) buffers that are backed by regular 4k sized pages. In order to map these buffers and create dma-bufs for them on the Host, we first need to find the hugetlb pages where the buffer allocations are located and then determine the offsets of individual chunks (within those pages) and use this information to eventually populate a scatterlist. Testcase: default_hugepagesz=2M hugepagesz=2M hugepages=2500 options were passed to the Host kernel and Qemu was launched with these relevant options: qemu-system-x86_64 -m 4096m.... -device virtio-gpu-pci,max_outputs=1,blob=true,xres=1920,yres=1080 -display gtk,gl=on -object memory-backend-memfd,hugetlb=on,id=mem1,size=4096M -machine memory-backend=mem1 Replacing -display gtk,gl=on with -display gtk,gl=off above would exercise the mmap handler. Link: https://lkml.kernel.org/r/20240624063952.1572359-7-vivek.kasireddy@intel.com Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com> Acked-by: Mike Kravetz <mike.kravetz@oracle.com> (v2) Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Gerd Hoffmann <kraxel@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Hugh Dickins <hughd@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Dongwon Kim <dongwon.kim@intel.com> Cc: Junxiao Chang <junxiao.chang@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@infradead.org> Cc: Christoph Hellwig <hch@lst.de> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: Shuah Khan <shuah@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 7d79cd7 commit 0c8b91e

File tree

1 file changed

+101
-21
lines changed

1 file changed

+101
-21
lines changed

drivers/dma-buf/udmabuf.c

Lines changed: 101 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/miscdevice.h>
1111
#include <linux/module.h>
1212
#include <linux/shmem_fs.h>
13+
#include <linux/hugetlb.h>
1314
#include <linux/slab.h>
1415
#include <linux/udmabuf.h>
1516
#include <linux/vmalloc.h>
@@ -28,6 +29,7 @@ struct udmabuf {
2829
struct page **pages;
2930
struct sg_table *sg;
3031
struct miscdevice *device;
32+
pgoff_t *offsets;
3133
};
3234

3335
static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
@@ -41,6 +43,8 @@ static vm_fault_t udmabuf_vm_fault(struct vm_fault *vmf)
4143
return VM_FAULT_SIGBUS;
4244

4345
pfn = page_to_pfn(ubuf->pages[pgoff]);
46+
pfn += ubuf->offsets[pgoff] >> PAGE_SHIFT;
47+
4448
return vmf_insert_pfn(vma, vmf->address, pfn);
4549
}
4650

@@ -90,23 +94,29 @@ static struct sg_table *get_sg_table(struct device *dev, struct dma_buf *buf,
9094
{
9195
struct udmabuf *ubuf = buf->priv;
9296
struct sg_table *sg;
97+
struct scatterlist *sgl;
98+
unsigned int i = 0;
9399
int ret;
94100

95101
sg = kzalloc(sizeof(*sg), GFP_KERNEL);
96102
if (!sg)
97103
return ERR_PTR(-ENOMEM);
98-
ret = sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
99-
0, ubuf->pagecount << PAGE_SHIFT,
100-
GFP_KERNEL);
104+
105+
ret = sg_alloc_table(sg, ubuf->pagecount, GFP_KERNEL);
101106
if (ret < 0)
102-
goto err;
107+
goto err_alloc;
108+
109+
for_each_sg(sg->sgl, sgl, ubuf->pagecount, i)
110+
sg_set_page(sgl, ubuf->pages[i], PAGE_SIZE, ubuf->offsets[i]);
111+
103112
ret = dma_map_sgtable(dev, sg, direction, 0);
104113
if (ret < 0)
105-
goto err;
114+
goto err_map;
106115
return sg;
107116

108-
err:
117+
err_map:
109118
sg_free_table(sg);
119+
err_alloc:
110120
kfree(sg);
111121
return ERR_PTR(ret);
112122
}
@@ -143,6 +153,7 @@ static void release_udmabuf(struct dma_buf *buf)
143153

144154
for (pg = 0; pg < ubuf->pagecount; pg++)
145155
put_page(ubuf->pages[pg]);
156+
kfree(ubuf->offsets);
146157
kfree(ubuf->pages);
147158
kfree(ubuf);
148159
}
@@ -196,17 +207,77 @@ static const struct dma_buf_ops udmabuf_ops = {
196207
#define SEALS_WANTED (F_SEAL_SHRINK)
197208
#define SEALS_DENIED (F_SEAL_WRITE)
198209

210+
static int handle_hugetlb_pages(struct udmabuf *ubuf, struct file *memfd,
211+
pgoff_t offset, pgoff_t pgcnt,
212+
pgoff_t *pgbuf)
213+
{
214+
struct hstate *hpstate = hstate_file(memfd);
215+
pgoff_t mapidx = offset >> huge_page_shift(hpstate);
216+
pgoff_t subpgoff = (offset & ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
217+
pgoff_t maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
218+
struct page *hpage = NULL;
219+
struct folio *folio;
220+
pgoff_t pgidx;
221+
222+
mapidx <<= huge_page_order(hpstate);
223+
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
224+
if (!hpage) {
225+
folio = __filemap_get_folio(memfd->f_mapping,
226+
mapidx,
227+
FGP_ACCESSED, 0);
228+
if (IS_ERR(folio))
229+
return PTR_ERR(folio);
230+
231+
hpage = &folio->page;
232+
}
233+
234+
get_page(hpage);
235+
ubuf->pages[*pgbuf] = hpage;
236+
ubuf->offsets[*pgbuf] = subpgoff << PAGE_SHIFT;
237+
(*pgbuf)++;
238+
if (++subpgoff == maxsubpgs) {
239+
put_page(hpage);
240+
hpage = NULL;
241+
subpgoff = 0;
242+
mapidx += pages_per_huge_page(hpstate);
243+
}
244+
}
245+
246+
if (hpage)
247+
put_page(hpage);
248+
249+
return 0;
250+
}
251+
252+
static int handle_shmem_pages(struct udmabuf *ubuf, struct file *memfd,
253+
pgoff_t offset, pgoff_t pgcnt,
254+
pgoff_t *pgbuf)
255+
{
256+
pgoff_t pgidx, pgoff = offset >> PAGE_SHIFT;
257+
struct page *page;
258+
259+
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
260+
page = shmem_read_mapping_page(memfd->f_mapping,
261+
pgoff + pgidx);
262+
if (IS_ERR(page))
263+
return PTR_ERR(page);
264+
265+
ubuf->pages[*pgbuf] = page;
266+
(*pgbuf)++;
267+
}
268+
269+
return 0;
270+
}
271+
199272
static long udmabuf_create(struct miscdevice *device,
200273
struct udmabuf_create_list *head,
201274
struct udmabuf_create_item *list)
202275
{
203276
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
204277
struct file *memfd = NULL;
205-
struct address_space *mapping = NULL;
206278
struct udmabuf *ubuf;
207279
struct dma_buf *buf;
208-
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
209-
struct page *page;
280+
pgoff_t pgcnt, pgbuf = 0, pglimit;
210281
int seals, ret = -EINVAL;
211282
u32 i, flags;
212283

@@ -234,15 +305,20 @@ static long udmabuf_create(struct miscdevice *device,
234305
ret = -ENOMEM;
235306
goto err;
236307
}
308+
ubuf->offsets = kcalloc(ubuf->pagecount, sizeof(*ubuf->offsets),
309+
GFP_KERNEL);
310+
if (!ubuf->offsets) {
311+
ret = -ENOMEM;
312+
goto err;
313+
}
237314

238315
pgbuf = 0;
239316
for (i = 0; i < head->count; i++) {
240317
ret = -EBADFD;
241318
memfd = fget(list[i].memfd);
242319
if (!memfd)
243320
goto err;
244-
mapping = memfd->f_mapping;
245-
if (!shmem_mapping(mapping))
321+
if (!shmem_file(memfd) && !is_file_hugepages(memfd))
246322
goto err;
247323
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
248324
if (seals == -EINVAL)
@@ -251,16 +327,19 @@ static long udmabuf_create(struct miscdevice *device,
251327
if ((seals & SEALS_WANTED) != SEALS_WANTED ||
252328
(seals & SEALS_DENIED) != 0)
253329
goto err;
254-
pgoff = list[i].offset >> PAGE_SHIFT;
255-
pgcnt = list[i].size >> PAGE_SHIFT;
256-
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
257-
page = shmem_read_mapping_page(mapping, pgoff + pgidx);
258-
if (IS_ERR(page)) {
259-
ret = PTR_ERR(page);
260-
goto err;
261-
}
262-
ubuf->pages[pgbuf++] = page;
263-
}
330+
331+
pgcnt = list[i].size >> PAGE_SHIFT;
332+
if (is_file_hugepages(memfd))
333+
ret = handle_hugetlb_pages(ubuf, memfd,
334+
list[i].offset,
335+
pgcnt, &pgbuf);
336+
else
337+
ret = handle_shmem_pages(ubuf, memfd,
338+
list[i].offset,
339+
pgcnt, &pgbuf);
340+
if (ret < 0)
341+
goto err;
342+
264343
fput(memfd);
265344
memfd = NULL;
266345
}
@@ -287,6 +366,7 @@ static long udmabuf_create(struct miscdevice *device,
287366
put_page(ubuf->pages[--pgbuf]);
288367
if (memfd)
289368
fput(memfd);
369+
kfree(ubuf->offsets);
290370
kfree(ubuf->pages);
291371
kfree(ubuf);
292372
return ret;

0 commit comments

Comments
 (0)