Skip to content

Commit a6a74e0

Browse files
ahrensChristopher Siden
authored andcommitted
5498 kmem_reap does one xcall per page
5514 hat_unload_callback passes the wrong length to segvn_hat_unload_callback Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com> Reviewed by: Robert Mustacchi <rm@joyent.com> Approved by: Richard Lowe <richlowe@richlowe.net>
1 parent fca543c commit a6a74e0

File tree

4 files changed

+88
-42
lines changed

4 files changed

+88
-42
lines changed

usr/src/uts/i86pc/vm/hat_i86.c

Lines changed: 71 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
*/
2828
/*
2929
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
30+
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
3031
*/
3132

3233
/*
@@ -1928,6 +1929,7 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
19281929
{
19291930
hat_t *hat = (hat_t *)a1;
19301931
caddr_t addr = (caddr_t)a2;
1932+
size_t len = (size_t)a3;
19311933

19321934
/*
19331935
* If the target hat isn't the kernel and this CPU isn't operating
@@ -1937,10 +1939,11 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
19371939
return (0);
19381940

19391941
/*
1940-
* For a normal address, we just flush one page mapping
1942+
* For a normal address, we flush a range of contiguous mappings
19411943
*/
19421944
if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
1943-
mmu_tlbflush_entry(addr);
1945+
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
1946+
mmu_tlbflush_entry(addr + i);
19441947
return (0);
19451948
}
19461949

@@ -2035,7 +2038,7 @@ tlb_service(void)
20352038
* all CPUs using a given hat.
20362039
*/
20372040
void
2038-
hat_tlb_inval(hat_t *hat, uintptr_t va)
2041+
hat_tlb_inval_range(hat_t *hat, uintptr_t va, size_t len)
20392042
{
20402043
extern int flushes_require_xcalls; /* from mp_startup.c */
20412044
cpuset_t justme;
@@ -2068,12 +2071,15 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
20682071
*/
20692072
if (panicstr || !flushes_require_xcalls) {
20702073
#ifdef __xpv
2071-
if (va == DEMAP_ALL_ADDR)
2074+
if (va == DEMAP_ALL_ADDR) {
20722075
xen_flush_tlb();
2073-
else
2074-
xen_flush_va((caddr_t)va);
2076+
} else {
2077+
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2078+
xen_flush_va((caddr_t)(va + i));
2079+
}
20752080
#else
2076-
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
2081+
(void) hati_demap_func((xc_arg_t)hat,
2082+
(xc_arg_t)va, (xc_arg_t)len);
20772083
#endif
20782084
return;
20792085
}
@@ -2124,31 +2130,44 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
21242130
CPUSET_ISEQUAL(cpus_to_shootdown, justme)) {
21252131

21262132
#ifdef __xpv
2127-
if (va == DEMAP_ALL_ADDR)
2133+
if (va == DEMAP_ALL_ADDR) {
21282134
xen_flush_tlb();
2129-
else
2130-
xen_flush_va((caddr_t)va);
2135+
} else {
2136+
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
2137+
xen_flush_va((caddr_t)(va + i));
2138+
}
21312139
#else
2132-
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
2140+
(void) hati_demap_func((xc_arg_t)hat,
2141+
(xc_arg_t)va, (xc_arg_t)len);
21332142
#endif
21342143

21352144
} else {
21362145

21372146
CPUSET_ADD(cpus_to_shootdown, CPU->cpu_id);
21382147
#ifdef __xpv
2139-
if (va == DEMAP_ALL_ADDR)
2148+
if (va == DEMAP_ALL_ADDR) {
21402149
xen_gflush_tlb(cpus_to_shootdown);
2141-
else
2142-
xen_gflush_va((caddr_t)va, cpus_to_shootdown);
2150+
} else {
2151+
for (size_t i = 0; i < len; i += MMU_PAGESIZE) {
2152+
xen_gflush_va((caddr_t)(va + i),
2153+
cpus_to_shootdown);
2154+
}
2155+
}
21432156
#else
2144-
xc_call((xc_arg_t)hat, (xc_arg_t)va, NULL,
2157+
xc_call((xc_arg_t)hat, (xc_arg_t)va, (xc_arg_t)len,
21452158
CPUSET2BV(cpus_to_shootdown), hati_demap_func);
21462159
#endif
21472160

21482161
}
21492162
kpreempt_enable();
21502163
}
21512164

2165+
void
2166+
hat_tlb_inval(hat_t *hat, uintptr_t va)
2167+
{
2168+
hat_tlb_inval_range(hat, va, MMU_PAGESIZE);
2169+
}
2170+
21522171
/*
21532172
* Interior routine for HAT_UNLOADs from hat_unload_callback(),
21542173
* hat_kmap_unload() OR from hat_steal() code. This routine doesn't
@@ -2160,7 +2179,8 @@ hat_pte_unmap(
21602179
uint_t entry,
21612180
uint_t flags,
21622181
x86pte_t old_pte,
2163-
void *pte_ptr)
2182+
void *pte_ptr,
2183+
boolean_t tlb)
21642184
{
21652185
hat_t *hat = ht->ht_hat;
21662186
hment_t *hm = NULL;
@@ -2202,7 +2222,7 @@ hat_pte_unmap(
22022222
x86_hm_enter(pp);
22032223
}
22042224

2205-
old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr);
2225+
old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr, tlb);
22062226

22072227
/*
22082228
* If the page hadn't changed we've unmapped it and can proceed
@@ -2283,7 +2303,7 @@ hat_kmap_unload(caddr_t addr, size_t len, uint_t flags)
22832303
/*
22842304
* use mostly common code to unmap it.
22852305
*/
2286-
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr);
2306+
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr, B_TRUE);
22872307
}
22882308
}
22892309

@@ -2320,19 +2340,26 @@ typedef struct range_info {
23202340
level_t rng_level;
23212341
} range_info_t;
23222342

2343+
/*
2344+
* Invalidate the TLB, and perform the callback to the upper level VM system,
2345+
* for the specified ranges of contiguous pages.
2346+
*/
23232347
static void
2324-
handle_ranges(hat_callback_t *cb, uint_t cnt, range_info_t *range)
2348+
handle_ranges(hat_t *hat, hat_callback_t *cb, uint_t cnt, range_info_t *range)
23252349
{
2326-
/*
2327-
* do callbacks to upper level VM system
2328-
*/
2329-
while (cb != NULL && cnt > 0) {
2350+
while (cnt > 0) {
2351+
size_t len;
2352+
23302353
--cnt;
2331-
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
2332-
cb->hcb_end_addr = cb->hcb_start_addr;
2333-
cb->hcb_end_addr +=
2334-
range[cnt].rng_cnt << LEVEL_SIZE(range[cnt].rng_level);
2335-
cb->hcb_function(cb);
2354+
len = range[cnt].rng_cnt << LEVEL_SHIFT(range[cnt].rng_level);
2355+
hat_tlb_inval_range(hat, (uintptr_t)range[cnt].rng_va, len);
2356+
2357+
if (cb != NULL) {
2358+
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
2359+
cb->hcb_end_addr = cb->hcb_start_addr;
2360+
cb->hcb_end_addr += len;
2361+
cb->hcb_function(cb);
2362+
}
23362363
}
23372364
}
23382365

@@ -2376,8 +2403,10 @@ hat_unload_callback(
23762403
if (cb == NULL && len == MMU_PAGESIZE) {
23772404
ht = htable_getpte(hat, vaddr, &entry, &old_pte, 0);
23782405
if (ht != NULL) {
2379-
if (PTE_ISVALID(old_pte))
2380-
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
2406+
if (PTE_ISVALID(old_pte)) {
2407+
hat_pte_unmap(ht, entry, flags, old_pte,
2408+
NULL, B_TRUE);
2409+
}
23812410
htable_release(ht);
23822411
}
23832412
XPV_ALLOW_MIGRATE();
@@ -2400,7 +2429,7 @@ hat_unload_callback(
24002429
if (vaddr != contig_va ||
24012430
(r_cnt > 0 && r[r_cnt - 1].rng_level != ht->ht_level)) {
24022431
if (r_cnt == MAX_UNLOAD_CNT) {
2403-
handle_ranges(cb, r_cnt, r);
2432+
handle_ranges(hat, cb, r_cnt, r);
24042433
r_cnt = 0;
24052434
}
24062435
r[r_cnt].rng_va = vaddr;
@@ -2410,10 +2439,16 @@ hat_unload_callback(
24102439
}
24112440

24122441
/*
2413-
* Unload one mapping from the page tables.
2442+
* Unload one mapping (for a single page) from the page tables.
2443+
* Note that we do not remove the mapping from the TLB yet,
2444+
* as indicated by the tlb=FALSE argument to hat_pte_unmap().
2445+
* handle_ranges() will clear the TLB entries with one call to
2446+
* hat_tlb_inval_range() per contiguous range. This is
2447+
* safe because the page can not be reused until the
2448+
* callback is made (or we return).
24142449
*/
24152450
entry = htable_va2entry(vaddr, ht);
2416-
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
2451+
hat_pte_unmap(ht, entry, flags, old_pte, NULL, B_FALSE);
24172452
ASSERT(ht->ht_level <= mmu.max_page_level);
24182453
vaddr += LEVEL_SIZE(ht->ht_level);
24192454
contig_va = vaddr;
@@ -2426,7 +2461,7 @@ hat_unload_callback(
24262461
* handle last range for callbacks
24272462
*/
24282463
if (r_cnt > 0)
2429-
handle_ranges(cb, r_cnt, r);
2464+
handle_ranges(hat, cb, r_cnt, r);
24302465
XPV_ALLOW_MIGRATE();
24312466
}
24322467

@@ -3314,7 +3349,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
33143349
/*
33153350
* Invalidate the PTE and remove the hment.
33163351
*/
3317-
old_pte = x86pte_inval(ht, entry, 0, NULL);
3352+
old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
33183353
if (PTE2PFN(old_pte, ht->ht_level) != pfn) {
33193354
panic("x86pte_inval() failure found PTE = " FMT_PTE
33203355
" pfn being unmapped is %lx ht=0x%lx entry=0x%x",
@@ -4048,7 +4083,7 @@ clear_boot_mappings(uintptr_t low, uintptr_t high)
40484083
/*
40494084
* Unload the mapping from the page tables.
40504085
*/
4051-
(void) x86pte_inval(ht, entry, 0, NULL);
4086+
(void) x86pte_inval(ht, entry, 0, NULL, B_TRUE);
40524087
ASSERT(ht->ht_valid_cnt > 0);
40534088
HTABLE_DEC(ht->ht_valid_cnt);
40544089
PGCNT_DEC(ht->ht_hat, ht->ht_level);

usr/src/uts/i86pc/vm/hat_i86.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
2323
* Use is subject to license terms.
2424
*/
25+
/*
26+
* Copyright (c) 2014 by Delphix. All rights reserved.
27+
*/
2528

2629
#ifndef _VM_HAT_I86_H
2730
#define _VM_HAT_I86_H
@@ -227,7 +230,7 @@ extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
227230
extern void hat_kern_setup(void);
228231
extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
229232
extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
230-
x86pte_t old_pte, void *pte_ptr);
233+
x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
231234
extern void hat_init_finish(void);
232235
extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
233236
extern pfn_t hat_kpm_va2pfn(caddr_t);

usr/src/uts/i86pc/vm/htable.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
/*
2323
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24+
* Copyright (c) 2014 by Delphix. All rights reserved.
2425
*/
2526

2627
#include <sys/types.h>
@@ -475,7 +476,8 @@ htable_steal_active(hat_t *hat, uint_t cnt, uint_t threshold,
475476
pte = x86pte_get(ht, e);
476477
if (!PTE_ISVALID(pte))
477478
continue;
478-
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL);
479+
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL,
480+
B_TRUE);
479481
}
480482

481483
/*
@@ -2209,14 +2211,17 @@ x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new)
22092211
* Invalidate a page table entry as long as it currently maps something that
22102212
* matches the value determined by expect.
22112213
*
2212-
* Also invalidates any TLB entries and returns the previous value of the PTE.
2214+
* If tlb is set, also invalidates any TLB entries.
2215+
*
2216+
* Returns the previous value of the PTE.
22132217
*/
22142218
x86pte_t
22152219
x86pte_inval(
22162220
htable_t *ht,
22172221
uint_t entry,
22182222
x86pte_t expect,
2219-
x86pte_t *pte_ptr)
2223+
x86pte_t *pte_ptr,
2224+
boolean_t tlb)
22202225
{
22212226
x86pte_t *ptep;
22222227
x86pte_t oldpte;
@@ -2265,7 +2270,7 @@ x86pte_inval(
22652270
found = CAS_PTE(ptep, oldpte, 0);
22662271
XPV_DISALLOW_PAGETABLE_UPDATES();
22672272
} while (found != oldpte);
2268-
if (oldpte & (PT_REF | PT_MOD))
2273+
if (tlb && (oldpte & (PT_REF | PT_MOD)))
22692274
hat_tlb_inval(ht->ht_hat, htable_e2va(ht, entry));
22702275

22712276
done:

usr/src/uts/i86pc/vm/htable.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
2323
* Use is subject to license terms.
2424
*/
25+
/*
26+
* Copyright (c) 2014 by Delphix. All rights reserved.
27+
*/
2528

2629
#ifndef _VM_HTABLE_H
2730
#define _VM_HTABLE_H
@@ -271,7 +274,7 @@ extern x86pte_t x86pte_get(htable_t *, uint_t entry);
271274
extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
272275

273276
extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
274-
x86pte_t old, x86pte_t *ptr);
277+
x86pte_t old, x86pte_t *ptr, boolean_t tlb);
275278

276279
extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
277280
x86pte_t old, x86pte_t new);

0 commit comments

Comments
 (0)