Skip to content

Commit 3bcc520

Browse files
kwachowsStanislaw Gruszka
authored andcommitted
accel/ivpu: Make DMA allocations for MMU600 write combined
Previously using dma_alloc_wc() API we created cache coherent (mapped as write-back) mappings. Because we disable MMU600 snooping it was required to do costly page walk and cache flushes after each page table modification. With write-combined buffers it's possible to do a single write memory barrier to flush write-combined buffer to memory which simplifies the driver and significantly reduce time of map/unmap operations. Mapping time of 255 MB is reduced from 2.5 ms to 500 us. Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231028155936.1183342-7-stanislaw.gruszka@linux.intel.com
1 parent e013aa9 commit 3bcc520

File tree

1 file changed

+63
-52
lines changed

1 file changed

+63
-52
lines changed

drivers/accel/ivpu/ivpu_mmu_context.c

Lines changed: 63 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
#include <linux/bitfield.h>
77
#include <linux/highmem.h>
8+
#include <linux/set_memory.h>
9+
10+
#include <drm/drm_cache.h>
811

912
#include "ivpu_drv.h"
1013
#include "ivpu_hw.h"
@@ -38,12 +41,57 @@
3841
#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
3942
IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
4043

44+
static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
45+
{
46+
dma_addr_t dma_addr;
47+
struct page *page;
48+
void *cpu;
49+
50+
page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
51+
if (!page)
52+
return NULL;
53+
54+
set_pages_array_wc(&page, 1);
55+
56+
dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
57+
if (dma_mapping_error(vdev->drm.dev, dma_addr))
58+
goto err_free_page;
59+
60+
cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
61+
if (!cpu)
62+
goto err_dma_unmap_page;
63+
64+
65+
*dma = dma_addr;
66+
return cpu;
67+
68+
err_dma_unmap_page:
69+
dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
70+
71+
err_free_page:
72+
put_page(page);
73+
return NULL;
74+
}
75+
76+
static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
77+
{
78+
struct page *page;
79+
80+
if (cpu_addr) {
81+
page = vmalloc_to_page(cpu_addr);
82+
vunmap(cpu_addr);
83+
dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
84+
DMA_BIDIRECTIONAL);
85+
set_pages_array_wb(&page, 1);
86+
put_page(page);
87+
}
88+
}
89+
4190
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
4291
{
4392
dma_addr_t pgd_dma;
4493

45-
pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
46-
GFP_KERNEL);
94+
pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
4795
if (!pgtable->pgd_dma_ptr)
4896
return -ENOMEM;
4997

@@ -52,13 +100,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab
52100
return 0;
53101
}
54102

55-
static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
56-
{
57-
if (cpu_addr)
58-
dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
59-
dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
60-
}
61-
62103
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
63104
{
64105
int pgd_idx, pud_idx, pmd_idx;
@@ -83,19 +124,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
83124
pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
84125
pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
85126

86-
ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
127+
ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
87128
}
88129

89130
kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
90-
ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
131+
ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
91132
}
92133

93134
kfree(pgtable->pmd_ptrs[pgd_idx]);
94135
kfree(pgtable->pte_ptrs[pgd_idx]);
95-
ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
136+
ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
96137
}
97138

98-
ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
139+
ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
99140
}
100141

101142
static u64*
@@ -107,7 +148,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
107148
if (pud_dma_ptr)
108149
return pud_dma_ptr;
109150

110-
pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
151+
pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
111152
if (!pud_dma_ptr)
112153
return NULL;
113154

@@ -130,7 +171,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
130171
kfree(pgtable->pmd_ptrs[pgd_idx]);
131172

132173
err_free_pud_dma_ptr:
133-
ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
174+
ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
134175
return NULL;
135176
}
136177

@@ -144,7 +185,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
144185
if (pmd_dma_ptr)
145186
return pmd_dma_ptr;
146187

147-
pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
188+
pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
148189
if (!pmd_dma_ptr)
149190
return NULL;
150191

@@ -159,7 +200,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
159200
return pmd_dma_ptr;
160201

161202
err_free_pmd_dma_ptr:
162-
ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
203+
ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
163204
return NULL;
164205
}
165206

@@ -173,7 +214,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
173214
if (pte_dma_ptr)
174215
return pte_dma_ptr;
175216

176-
pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
217+
pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
177218
if (!pte_dma_ptr)
178219
return NULL;
179220

@@ -248,38 +289,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad
248289
ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
249290
}
250291

251-
static void
252-
ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
253-
{
254-
struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
255-
u64 end_addr = vpu_addr + size;
256-
257-
/* Align to PMD entry (2 MB) */
258-
vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
259-
260-
while (vpu_addr < end_addr) {
261-
int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
262-
u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
263-
264-
while (vpu_addr < end_addr && vpu_addr < pud_end) {
265-
int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
266-
u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
267-
268-
while (vpu_addr < end_addr && vpu_addr < pmd_end) {
269-
int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
270-
271-
clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
272-
IVPU_MMU_PGTABLE_SIZE);
273-
vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
274-
}
275-
clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
276-
IVPU_MMU_PGTABLE_SIZE);
277-
}
278-
clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
279-
}
280-
clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
281-
}
282-
283292
static int
284293
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
285294
u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
@@ -352,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
352361
mutex_unlock(&ctx->lock);
353362
return ret;
354363
}
355-
ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
356364
vpu_addr += size;
357365
}
358366

367+
/* Ensure page table modifications are flushed from wc buffers to memory */
368+
wmb();
359369
mutex_unlock(&ctx->lock);
360370

361371
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -381,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
381391
size_t size = sg_dma_len(sg) + sg->offset;
382392

383393
ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
384-
ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
385394
vpu_addr += size;
386395
}
387396

397+
/* Ensure page table modifications are flushed from wc buffers to memory */
398+
wmb();
388399
mutex_unlock(&ctx->lock);
389400

390401
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);

0 commit comments

Comments
 (0)