Skip to content

Commit 0807b85

Browse files
gerald-schaeferhcahca
authored andcommitted
s390/mm: add support for RDP (Reset DAT-Protection)
RDP instruction allows to reset DAT-protection bit in a PTE, with less CPU synchronization overhead than IPTE instruction. In particular, IPTE can cause machine-wide synchronization overhead, and excessive IPTE usage can negatively impact machine performance. RDP can be used instead of IPTE, if the new PTE only differs in SW bits and _PAGE_PROTECT HW bit, for PTE protection changes from RO to RW. SW PTE bit changes are allowed, e.g. for dirty and young tracking, but none of the other HW-defined part of the PTE must change. This is because the architecture forbids such changes to an active and valid PTE, which is why invalidation with IPTE is always used first, before writing a new entry. The RDP optimization helps mainly for fault-driven SW dirty-bit tracking. Writable PTEs are initially always mapped with HW _PAGE_PROTECT bit set, to allow SW dirty-bit accounting on first write protection fault, where the DAT-protection would then be reset. The reset is now done with RDP instead of IPTE, if RDP instruction is available. RDP cannot always guarantee that the DAT-protection reset is propagated to all CPUs immediately. This means that spurious TLB protection faults on other CPUs can now occur. For this, common code provides a flush_tlb_fix_spurious_fault() handler, which will now be used to do a CPU-local TLB flush. However, this will clear the whole TLB of a CPU, and not just the affected entry. For more fine-grained flushing, by simply doing a (local) RDP again, flush_tlb_fix_spurious_fault() would need to also provide the PTE pointer. Note that spurious TLB protection faults cannot really be distinguished from racing pagetable updates, where another thread already installed the correct PTE. In such a case, the local TLB flush would be unnecessary overhead, but overall reduction of CPU synchronization overhead by not using IPTE is still expected to be beneficial. Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
1 parent d939474 commit 0807b85

File tree

4 files changed

+90
-1
lines changed

4 files changed

+90
-1
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,12 +182,20 @@ static inline int is_module_addr(void *addr)
182182
#define _PAGE_SOFT_DIRTY 0x000
183183
#endif
184184

185+
#define _PAGE_SW_BITS 0xffUL /* All SW bits */
186+
185187
#define _PAGE_SWP_EXCLUSIVE _PAGE_LARGE /* SW pte exclusive swap bit */
186188

187189
/* Set of bits not changed in pte_modify */
188190
#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
189191
_PAGE_YOUNG | _PAGE_SOFT_DIRTY)
190192

193+
/*
194+
* Mask of bits that must not be changed with RDP. Allow only _PAGE_PROTECT
195+
* HW bit and all SW bits.
196+
*/
197+
#define _PAGE_RDP_MASK ~(_PAGE_PROTECT | _PAGE_SW_BITS)
198+
191199
/*
192200
* handle_pte_fault uses pte_present and pte_none to find out the pte type
193201
* WITHOUT holding the page table lock. The _PAGE_PRESENT bit is used to
@@ -1052,6 +1060,19 @@ static inline pte_t pte_mkhuge(pte_t pte)
10521060
#define IPTE_NODAT 0x400
10531061
#define IPTE_GUEST_ASCE 0x800
10541062

1063+
static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep,
1064+
unsigned long opt, unsigned long asce,
1065+
int local)
1066+
{
1067+
unsigned long pto;
1068+
1069+
pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
1070+
asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]"
1071+
: "+m" (*ptep)
1072+
: [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt),
1073+
[asce] "a" (asce), [m4] "i" (local));
1074+
}
1075+
10551076
static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep,
10561077
unsigned long opt, unsigned long asce,
10571078
int local)
@@ -1202,14 +1223,53 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
12021223
ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte));
12031224
}
12041225

1226+
/*
1227+
* Check if PTEs only differ in _PAGE_PROTECT HW bit, but also allow SW PTE
1228+
* bits in the comparison. Those might change e.g. because of dirty and young
1229+
* tracking.
1230+
*/
1231+
static inline int pte_allow_rdp(pte_t old, pte_t new)
1232+
{
1233+
/*
1234+
* Only allow changes from RO to RW
1235+
*/
1236+
if (!(pte_val(old) & _PAGE_PROTECT) || pte_val(new) & _PAGE_PROTECT)
1237+
return 0;
1238+
1239+
return (pte_val(old) & _PAGE_RDP_MASK) == (pte_val(new) & _PAGE_RDP_MASK);
1240+
}
1241+
1242+
static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
1243+
unsigned long address)
1244+
{
1245+
/*
1246+
* RDP might not have propagated the PTE protection reset to all CPUs,
1247+
* so there could be spurious TLB protection faults.
1248+
* NOTE: This will also be called when a racing pagetable update on
1249+
* another thread already installed the correct PTE. Both cases cannot
1250+
* really be distinguished.
1251+
* Therefore, only do the local TLB flush when RDP can be used, to avoid
1252+
* unnecessary overhead.
1253+
*/
1254+
if (MACHINE_HAS_RDP)
1255+
asm volatile("ptlb" : : : "memory");
1256+
}
1257+
#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
1258+
1259+
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
1260+
pte_t new);
1261+
12051262
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
12061263
static inline int ptep_set_access_flags(struct vm_area_struct *vma,
12071264
unsigned long addr, pte_t *ptep,
12081265
pte_t entry, int dirty)
12091266
{
12101267
if (pte_same(*ptep, entry))
12111268
return 0;
1212-
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
1269+
if (MACHINE_HAS_RDP && !mm_has_pgste(vma->vm_mm) && pte_allow_rdp(*ptep, entry))
1270+
ptep_reset_dat_prot(vma->vm_mm, addr, ptep, entry);
1271+
else
1272+
ptep_xchg_direct(vma->vm_mm, addr, ptep, entry);
12131273
return 1;
12141274
}
12151275

arch/s390/include/asm/setup.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#define MACHINE_FLAG_GS BIT(16)
3535
#define MACHINE_FLAG_SCC BIT(17)
3636
#define MACHINE_FLAG_PCI_MIO BIT(18)
37+
#define MACHINE_FLAG_RDP BIT(19)
3738

3839
#define LPP_MAGIC BIT(31)
3940
#define LPP_PID_MASK _AC(0xffffffff, UL)
@@ -99,6 +100,7 @@ extern unsigned long mio_wb_bit_mask;
99100
#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
100101
#define MACHINE_HAS_SCC (S390_lowcore.machine_flags & MACHINE_FLAG_SCC)
101102
#define MACHINE_HAS_PCI_MIO (S390_lowcore.machine_flags & MACHINE_FLAG_PCI_MIO)
103+
#define MACHINE_HAS_RDP (S390_lowcore.machine_flags & MACHINE_FLAG_RDP)
102104

103105
/*
104106
* Console mode. Override with conmode=

arch/s390/kernel/early.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ static __init void detect_machine_facilities(void)
226226
S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
227227
/* the control bit is set during PCI initialization */
228228
}
229+
if (test_facility(194))
230+
S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
229231
}
230232

231233
static inline void save_vector_registers(void)

arch/s390/mm/pgtable.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,31 @@ pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
302302
}
303303
EXPORT_SYMBOL(ptep_xchg_direct);
304304

305+
/*
306+
* Caller must check that new PTE only differs in _PAGE_PROTECT HW bit, so that
307+
* RDP can be used instead of IPTE. See also comments at pte_allow_rdp().
308+
*/
309+
void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
310+
pte_t new)
311+
{
312+
preempt_disable();
313+
atomic_inc(&mm->context.flush_count);
314+
if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
315+
__ptep_rdp(addr, ptep, 0, 0, 1);
316+
else
317+
__ptep_rdp(addr, ptep, 0, 0, 0);
318+
/*
319+
* PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That
320+
* means it is still valid and active, and must not be changed according
321+
* to the architecture. But writing a new value that only differs in SW
322+
* bits is allowed.
323+
*/
324+
set_pte(ptep, new);
325+
atomic_dec(&mm->context.flush_count);
326+
preempt_enable();
327+
}
328+
EXPORT_SYMBOL(ptep_reset_dat_prot);
329+
305330
pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
306331
pte_t *ptep, pte_t new)
307332
{

0 commit comments

Comments
 (0)