Skip to content

Commit c7d9f77

Browse files
Nitin Guptadavem330
authored andcommitted
sparc64: Multi-page size support
Add support for using multiple hugepage sizes simultaneously on mainline. Currently, support for 256M has been added which can be used along with 8M pages. Page tables are set like this (e.g. for 256M page): VA + (8M * x) -> PA + (8M * x) (sz bit = 256M) where x in [0, 31] and TSB is set similarly: VA + (4M * x) -> PA + (4M * x) (sz bit = 256M) where x in [0, 63] - Testing Tested on Sonoma (which supports 256M pages) by running stream benchmark instances in parallel: one instance uses 8M pages and another uses 256M pages, consuming 48G each. Boot params used: default_hugepagesz=256M hugepagesz=256M hugepages=300 hugepagesz=8M hugepages=10000 Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 0d88b86 commit c7d9f77

File tree

8 files changed

+253
-62
lines changed

8 files changed

+253
-62
lines changed

arch/sparc/include/asm/page_64.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
#define HPAGE_SHIFT 23
1919
#define REAL_HPAGE_SHIFT 22
20-
20+
#define HPAGE_256MB_SHIFT 28
2121
#define REAL_HPAGE_SIZE (_AC(1,UL) << REAL_HPAGE_SHIFT)
2222

2323
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
@@ -26,6 +26,7 @@
2626
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
2727
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
2828
#define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
29+
#define HUGE_MAX_HSTATE 2
2930
#endif
3031

3132
#ifndef __ASSEMBLY__

arch/sparc/include/asm/pgtable_64.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,10 @@ static inline pgprot_t pgprot_noncached(pgprot_t prot)
375375
#define pgprot_noncached pgprot_noncached
376376

377377
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
378-
static inline unsigned long __pte_huge_mask(void)
378+
extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
379+
struct page *page, int writable);
380+
#define arch_make_huge_pte arch_make_huge_pte
381+
static inline unsigned long __pte_default_huge_mask(void)
379382
{
380383
unsigned long mask;
381384

@@ -395,12 +398,14 @@ static inline unsigned long __pte_huge_mask(void)
395398

396399
static inline pte_t pte_mkhuge(pte_t pte)
397400
{
398-
return __pte(pte_val(pte) | _PAGE_PMD_HUGE | __pte_huge_mask());
401+
return __pte(pte_val(pte) | __pte_default_huge_mask());
399402
}
400403

401-
static inline bool is_hugetlb_pte(pte_t pte)
404+
static inline bool is_default_hugetlb_pte(pte_t pte)
402405
{
403-
return !!(pte_val(pte) & __pte_huge_mask());
406+
unsigned long mask = __pte_default_huge_mask();
407+
408+
return (pte_val(pte) & mask) == mask;
404409
}
405410

406411
static inline bool is_hugetlb_pmd(pmd_t pmd)
@@ -875,10 +880,12 @@ static inline unsigned long pud_pfn(pud_t pud)
875880

876881
/* Actual page table PTE updates. */
877882
void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
878-
pte_t *ptep, pte_t orig, int fullmm);
883+
pte_t *ptep, pte_t orig, int fullmm,
884+
unsigned int hugepage_shift);
879885

880886
static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
881-
pte_t *ptep, pte_t orig, int fullmm)
887+
pte_t *ptep, pte_t orig, int fullmm,
888+
unsigned int hugepage_shift)
882889
{
883890
/* It is more efficient to let flush_tlb_kernel_range()
884891
* handle init_mm tlb flushes.
@@ -887,7 +894,7 @@ static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
887894
* and SUN4V pte layout, so this inline test is fine.
888895
*/
889896
if (likely(mm != &init_mm) && pte_accessible(mm, orig))
890-
tlb_batch_add(mm, vaddr, ptep, orig, fullmm);
897+
tlb_batch_add(mm, vaddr, ptep, orig, fullmm, hugepage_shift);
891898
}
892899

893900
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -906,7 +913,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
906913
pte_t orig = *ptep;
907914

908915
*ptep = pte;
909-
maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm);
916+
maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT);
910917
}
911918

912919
#define set_pte_at(mm,addr,ptep,pte) \

arch/sparc/include/asm/tlbflush_64.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#define TLB_BATCH_NR 192
99

1010
struct tlb_batch {
11-
bool huge;
11+
unsigned int hugepage_shift;
1212
struct mm_struct *mm;
1313
unsigned long tlb_nr;
1414
unsigned long active;
@@ -17,7 +17,8 @@ struct tlb_batch {
1717

1818
void flush_tsb_kernel_range(unsigned long start, unsigned long end);
1919
void flush_tsb_user(struct tlb_batch *tb);
20-
void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge);
20+
void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
21+
unsigned int hugepage_shift);
2122

2223
/* TLB flush operations. */
2324

arch/sparc/kernel/tsb.S

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,26 +117,11 @@ tsb_miss_page_table_walk_sun4v_fastpath:
117117
/* Valid PTE is now in %g5. */
118118

119119
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
120-
661: sethi %uhi(_PAGE_SZALL_4U), %g7
120+
sethi %uhi(_PAGE_PMD_HUGE), %g7
121121
sllx %g7, 32, %g7
122-
.section .sun4v_2insn_patch, "ax"
123-
.word 661b
124-
mov _PAGE_SZALL_4V, %g7
125-
nop
126-
.previous
127-
128-
and %g5, %g7, %g2
129-
130-
661: sethi %uhi(_PAGE_SZHUGE_4U), %g7
131-
sllx %g7, 32, %g7
132-
.section .sun4v_2insn_patch, "ax"
133-
.word 661b
134-
mov _PAGE_SZHUGE_4V, %g7
135-
nop
136-
.previous
137122

138-
cmp %g2, %g7
139-
bne,pt %xcc, 60f
123+
andcc %g5, %g7, %g0
124+
be,pt %xcc, 60f
140125
nop
141126

142127
/* It is a huge page, use huge page TSB entry address we

arch/sparc/mm/hugetlbpage.c

Lines changed: 144 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
2828
unsigned long pgoff,
2929
unsigned long flags)
3030
{
31+
struct hstate *h = hstate_file(filp);
3132
unsigned long task_size = TASK_SIZE;
3233
struct vm_unmapped_area_info info;
3334

@@ -38,7 +39,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
3839
info.length = len;
3940
info.low_limit = TASK_UNMAPPED_BASE;
4041
info.high_limit = min(task_size, VA_EXCLUDE_START);
41-
info.align_mask = PAGE_MASK & ~HPAGE_MASK;
42+
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
4243
info.align_offset = 0;
4344
addr = vm_unmapped_area(&info);
4445

@@ -58,6 +59,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
5859
const unsigned long pgoff,
5960
const unsigned long flags)
6061
{
62+
struct hstate *h = hstate_file(filp);
6163
struct mm_struct *mm = current->mm;
6264
unsigned long addr = addr0;
6365
struct vm_unmapped_area_info info;
@@ -69,7 +71,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
6971
info.length = len;
7072
info.low_limit = PAGE_SIZE;
7173
info.high_limit = mm->mmap_base;
72-
info.align_mask = PAGE_MASK & ~HPAGE_MASK;
74+
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
7375
info.align_offset = 0;
7476
addr = vm_unmapped_area(&info);
7577

@@ -94,14 +96,15 @@ unsigned long
9496
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
9597
unsigned long len, unsigned long pgoff, unsigned long flags)
9698
{
99+
struct hstate *h = hstate_file(file);
97100
struct mm_struct *mm = current->mm;
98101
struct vm_area_struct *vma;
99102
unsigned long task_size = TASK_SIZE;
100103

101104
if (test_thread_flag(TIF_32BIT))
102105
task_size = STACK_TOP32;
103106

104-
if (len & ~HPAGE_MASK)
107+
if (len & ~huge_page_mask(h))
105108
return -EINVAL;
106109
if (len > task_size)
107110
return -ENOMEM;
@@ -113,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
113116
}
114117

115118
if (addr) {
116-
addr = ALIGN(addr, HPAGE_SIZE);
119+
addr = ALIGN(addr, huge_page_size(h));
117120
vma = find_vma(mm, addr);
118121
if (task_size - len >= addr &&
119122
(!vma || addr + len <= vma->vm_start))
@@ -127,6 +130,112 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
127130
pgoff, flags);
128131
}
129132

133+
static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
134+
{
135+
return entry;
136+
}
137+
138+
static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
139+
{
140+
unsigned long hugepage_size = _PAGE_SZ4MB_4V;
141+
142+
pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
143+
144+
switch (shift) {
145+
case HPAGE_256MB_SHIFT:
146+
hugepage_size = _PAGE_SZ256MB_4V;
147+
pte_val(entry) |= _PAGE_PMD_HUGE;
148+
break;
149+
case HPAGE_SHIFT:
150+
pte_val(entry) |= _PAGE_PMD_HUGE;
151+
break;
152+
default:
153+
WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
154+
}
155+
156+
pte_val(entry) = pte_val(entry) | hugepage_size;
157+
return entry;
158+
}
159+
160+
static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
161+
{
162+
if (tlb_type == hypervisor)
163+
return sun4v_hugepage_shift_to_tte(entry, shift);
164+
else
165+
return sun4u_hugepage_shift_to_tte(entry, shift);
166+
}
167+
168+
pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
169+
struct page *page, int writeable)
170+
{
171+
unsigned int shift = huge_page_shift(hstate_vma(vma));
172+
173+
return hugepage_shift_to_tte(entry, shift);
174+
}
175+
176+
static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
177+
{
178+
unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
179+
unsigned int shift;
180+
181+
switch (tte_szbits) {
182+
case _PAGE_SZ256MB_4V:
183+
shift = HPAGE_256MB_SHIFT;
184+
break;
185+
case _PAGE_SZ4MB_4V:
186+
shift = REAL_HPAGE_SHIFT;
187+
break;
188+
default:
189+
shift = PAGE_SHIFT;
190+
break;
191+
}
192+
return shift;
193+
}
194+
195+
static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
196+
{
197+
unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
198+
unsigned int shift;
199+
200+
switch (tte_szbits) {
201+
case _PAGE_SZ256MB_4U:
202+
shift = HPAGE_256MB_SHIFT;
203+
break;
204+
case _PAGE_SZ4MB_4U:
205+
shift = REAL_HPAGE_SHIFT;
206+
break;
207+
default:
208+
shift = PAGE_SHIFT;
209+
break;
210+
}
211+
return shift;
212+
}
213+
214+
static unsigned int huge_tte_to_shift(pte_t entry)
215+
{
216+
unsigned long shift;
217+
218+
if (tlb_type == hypervisor)
219+
shift = sun4v_huge_tte_to_shift(entry);
220+
else
221+
shift = sun4u_huge_tte_to_shift(entry);
222+
223+
if (shift == PAGE_SHIFT)
224+
WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
225+
pte_val(entry));
226+
227+
return shift;
228+
}
229+
230+
static unsigned long huge_tte_to_size(pte_t pte)
231+
{
232+
unsigned long size = 1UL << huge_tte_to_shift(pte);
233+
234+
if (size == REAL_HPAGE_SIZE)
235+
size = HPAGE_SIZE;
236+
return size;
237+
}
238+
130239
pte_t *huge_pte_alloc(struct mm_struct *mm,
131240
unsigned long addr, unsigned long sz)
132241
{
@@ -160,35 +269,54 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
160269
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
161270
pte_t *ptep, pte_t entry)
162271
{
272+
unsigned int i, nptes, hugepage_shift;
273+
unsigned long size;
163274
pte_t orig;
164275

276+
size = huge_tte_to_size(entry);
277+
nptes = size >> PMD_SHIFT;
278+
165279
if (!pte_present(*ptep) && pte_present(entry))
166-
mm->context.hugetlb_pte_count++;
280+
mm->context.hugetlb_pte_count += nptes;
167281

168-
addr &= HPAGE_MASK;
282+
addr &= ~(size - 1);
169283
orig = *ptep;
170-
*ptep = entry;
284+
hugepage_shift = pte_none(orig) ? PAGE_SIZE : huge_tte_to_shift(orig);
171285

172-
/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
173-
maybe_tlb_batch_add(mm, addr, ptep, orig, 0);
174-
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0);
286+
for (i = 0; i < nptes; i++)
287+
ptep[i] = __pte(pte_val(entry) + (i << PMD_SHIFT));
288+
289+
maybe_tlb_batch_add(mm, addr, ptep, orig, 0, hugepage_shift);
290+
/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
291+
if (size == HPAGE_SIZE)
292+
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
293+
hugepage_shift);
175294
}
176295

177296
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
178297
pte_t *ptep)
179298
{
299+
unsigned int i, nptes, hugepage_shift;
300+
unsigned long size;
180301
pte_t entry;
181302

182303
entry = *ptep;
304+
size = huge_tte_to_size(entry);
305+
nptes = size >> PMD_SHIFT;
306+
hugepage_shift = pte_none(entry) ? PAGE_SIZE : huge_tte_to_shift(entry);
307+
183308
if (pte_present(entry))
184-
mm->context.hugetlb_pte_count--;
309+
mm->context.hugetlb_pte_count -= nptes;
185310

186-
addr &= HPAGE_MASK;
187-
*ptep = __pte(0UL);
311+
addr &= ~(size - 1);
312+
for (i = 0; i < nptes; i++)
313+
ptep[i] = __pte(0UL);
188314

189-
/* Issue TLB flush at REAL_HPAGE_SIZE boundaries */
190-
maybe_tlb_batch_add(mm, addr, ptep, entry, 0);
191-
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0);
315+
maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
316+
/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
317+
if (size == HPAGE_SIZE)
318+
maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
319+
hugepage_shift);
192320

193321
return entry;
194322
}

0 commit comments

Comments
 (0)