Permalink
Browse files

5498 kmem_reap does one xcall per page

5514 hat_unload_callback passes the wrong length to segvn_hat_unload_callback
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
  • Loading branch information...
ahrens authored and Christopher Siden committed Jan 16, 2015
1 parent fca543c commit a6a74e0e62d62ff750cd4b790be5eacc99c3bb8c
Showing with 88 additions and 42 deletions.
  1. +71 −36 usr/src/uts/i86pc/vm/hat_i86.c
  2. +4 −1 usr/src/uts/i86pc/vm/hat_i86.h
  3. +9 −4 usr/src/uts/i86pc/vm/htable.c
  4. +4 −1 usr/src/uts/i86pc/vm/htable.h
@@ -27,6 +27,7 @@
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
*/

/*
@@ -1928,6 +1929,7 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
{
hat_t *hat = (hat_t *)a1;
caddr_t addr = (caddr_t)a2;
size_t len = (size_t)a3;

/*
* If the target hat isn't the kernel and this CPU isn't operating
@@ -1937,10 +1939,11 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
return (0);

/*
* For a normal address, we just flush one page mapping
* For a normal address, we flush a range of contiguous mappings
*/
if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
mmu_tlbflush_entry(addr);
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
mmu_tlbflush_entry(addr + i);
return (0);
}

@@ -2035,7 +2038,7 @@ tlb_service(void)
* all CPUs using a given hat.
*/
void
hat_tlb_inval(hat_t *hat, uintptr_t va)
hat_tlb_inval_range(hat_t *hat, uintptr_t va, size_t len)
{
extern int flushes_require_xcalls; /* from mp_startup.c */
cpuset_t justme;
@@ -2068,12 +2071,15 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
*/
if (panicstr || !flushes_require_xcalls) {
#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_flush_tlb();
else
xen_flush_va((caddr_t)va);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
xen_flush_va((caddr_t)(va + i));
}
#else
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
(void) hati_demap_func((xc_arg_t)hat,
(xc_arg_t)va, (xc_arg_t)len);
#endif
return;
}
@@ -2124,31 +2130,44 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
CPUSET_ISEQUAL(cpus_to_shootdown, justme)) {

#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_flush_tlb();
else
xen_flush_va((caddr_t)va);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
xen_flush_va((caddr_t)(va + i));
}
#else
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
(void) hati_demap_func((xc_arg_t)hat,
(xc_arg_t)va, (xc_arg_t)len);
#endif

} else {

CPUSET_ADD(cpus_to_shootdown, CPU->cpu_id);
#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_gflush_tlb(cpus_to_shootdown);
else
xen_gflush_va((caddr_t)va, cpus_to_shootdown);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE) {
xen_gflush_va((caddr_t)(va + i),
cpus_to_shootdown);
}
}
#else
xc_call((xc_arg_t)hat, (xc_arg_t)va, NULL,
xc_call((xc_arg_t)hat, (xc_arg_t)va, (xc_arg_t)len,
CPUSET2BV(cpus_to_shootdown), hati_demap_func);
#endif

}
kpreempt_enable();
}

void
hat_tlb_inval(hat_t *hat, uintptr_t va)
{
hat_tlb_inval_range(hat, va, MMU_PAGESIZE);
}

/*
* Interior routine for HAT_UNLOADs from hat_unload_callback(),
* hat_kmap_unload() OR from hat_steal() code. This routine doesn't
@@ -2160,7 +2179,8 @@ hat_pte_unmap(
uint_t entry,
uint_t flags,
x86pte_t old_pte,
void *pte_ptr)
void *pte_ptr,
boolean_t tlb)
{
hat_t *hat = ht->ht_hat;
hment_t *hm = NULL;
@@ -2202,7 +2222,7 @@ hat_pte_unmap(
x86_hm_enter(pp);
}

old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr);
old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr, tlb);

/*
* If the page hadn't changed we've unmapped it and can proceed
@@ -2283,7 +2303,7 @@ hat_kmap_unload(caddr_t addr, size_t len, uint_t flags)
/*
* use mostly common code to unmap it.
*/
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr);
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr, B_TRUE);
}
}

@@ -2320,19 +2340,26 @@ typedef struct range_info {
level_t rng_level;
} range_info_t;

/*
* Invalidate the TLB, and perform the callback to the upper level VM system,
* for the specified ranges of contiguous pages.
*/
static void
handle_ranges(hat_callback_t *cb, uint_t cnt, range_info_t *range)
handle_ranges(hat_t *hat, hat_callback_t *cb, uint_t cnt, range_info_t *range)
{
/*
* do callbacks to upper level VM system
*/
while (cb != NULL && cnt > 0) {
while (cnt > 0) {
size_t len;

--cnt;
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
cb->hcb_end_addr = cb->hcb_start_addr;
cb->hcb_end_addr +=
range[cnt].rng_cnt << LEVEL_SIZE(range[cnt].rng_level);
cb->hcb_function(cb);
len = range[cnt].rng_cnt << LEVEL_SHIFT(range[cnt].rng_level);
hat_tlb_inval_range(hat, (uintptr_t)range[cnt].rng_va, len);

if (cb != NULL) {
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
cb->hcb_end_addr = cb->hcb_start_addr;
cb->hcb_end_addr += len;
cb->hcb_function(cb);
}
}
}

@@ -2376,8 +2403,10 @@ hat_unload_callback(
if (cb == NULL && len == MMU_PAGESIZE) {
ht = htable_getpte(hat, vaddr, &entry, &old_pte, 0);
if (ht != NULL) {
if (PTE_ISVALID(old_pte))
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
if (PTE_ISVALID(old_pte)) {
hat_pte_unmap(ht, entry, flags, old_pte,
NULL, B_TRUE);
}
htable_release(ht);
}
XPV_ALLOW_MIGRATE();
@@ -2400,7 +2429,7 @@ hat_unload_callback(
if (vaddr != contig_va ||
(r_cnt > 0 && r[r_cnt - 1].rng_level != ht->ht_level)) {
if (r_cnt == MAX_UNLOAD_CNT) {
handle_ranges(cb, r_cnt, r);
handle_ranges(hat, cb, r_cnt, r);
r_cnt = 0;
}
r[r_cnt].rng_va = vaddr;
@@ -2410,10 +2439,16 @@ hat_unload_callback(
}

/*
* Unload one mapping from the page tables.
* Unload one mapping (for a single page) from the page tables.
* Note that we do not remove the mapping from the TLB yet,
* as indicated by the tlb=FALSE argument to hat_pte_unmap().
* handle_ranges() will clear the TLB entries with one call to
* hat_tlb_inval_range() per contiguous range. This is
* safe because the page can not be reused until the
* callback is made (or we return).
*/
entry = htable_va2entry(vaddr, ht);
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
hat_pte_unmap(ht, entry, flags, old_pte, NULL, B_FALSE);
ASSERT(ht->ht_level <= mmu.max_page_level);
vaddr += LEVEL_SIZE(ht->ht_level);
contig_va = vaddr;
@@ -2426,7 +2461,7 @@ hat_unload_callback(
* handle last range for callbacks
*/
if (r_cnt > 0)
handle_ranges(cb, r_cnt, r);
handle_ranges(hat, cb, r_cnt, r);
XPV_ALLOW_MIGRATE();
}

@@ -3314,7 +3349,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
/*
* Invalidate the PTE and remove the hment.
*/
old_pte = x86pte_inval(ht, entry, 0, NULL);
old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
if (PTE2PFN(old_pte, ht->ht_level) != pfn) {
panic("x86pte_inval() failure found PTE = " FMT_PTE
" pfn being unmapped is %lx ht=0x%lx entry=0x%x",
@@ -4048,7 +4083,7 @@ clear_boot_mappings(uintptr_t low, uintptr_t high)
/*
* Unload the mapping from the page tables.
*/
(void) x86pte_inval(ht, entry, 0, NULL);
(void) x86pte_inval(ht, entry, 0, NULL, B_TRUE);
ASSERT(ht->ht_valid_cnt > 0);
HTABLE_DEC(ht->ht_valid_cnt);
PGCNT_DEC(ht->ht_hat, ht->ht_level);
@@ -22,6 +22,9 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#ifndef _VM_HAT_I86_H
#define _VM_HAT_I86_H
@@ -227,7 +230,7 @@ extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
extern void hat_kern_setup(void);
extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
x86pte_t old_pte, void *pte_ptr);
x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
extern void hat_init_finish(void);
extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
extern pfn_t hat_kpm_va2pfn(caddr_t);
@@ -21,6 +21,7 @@

/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#include <sys/types.h>
@@ -475,7 +476,8 @@ htable_steal_active(hat_t *hat, uint_t cnt, uint_t threshold,
pte = x86pte_get(ht, e);
if (!PTE_ISVALID(pte))
continue;
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL);
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL,
B_TRUE);
}

/*
@@ -2209,14 +2211,17 @@ x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new)
* Invalidate a page table entry as long as it currently maps something that
* matches the value determined by expect.
*
* Also invalidates any TLB entries and returns the previous value of the PTE.
* If tlb is set, also invalidates any TLB entries.
*
* Returns the previous value of the PTE.
*/
x86pte_t
x86pte_inval(
htable_t *ht,
uint_t entry,
x86pte_t expect,
x86pte_t *pte_ptr)
x86pte_t *pte_ptr,
boolean_t tlb)
{
x86pte_t *ptep;
x86pte_t oldpte;
@@ -2265,7 +2270,7 @@ x86pte_inval(
found = CAS_PTE(ptep, oldpte, 0);
XPV_DISALLOW_PAGETABLE_UPDATES();
} while (found != oldpte);
if (oldpte & (PT_REF | PT_MOD))
if (tlb && (oldpte & (PT_REF | PT_MOD)))
hat_tlb_inval(ht->ht_hat, htable_e2va(ht, entry));

done:
@@ -22,6 +22,9 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#ifndef _VM_HTABLE_H
#define _VM_HTABLE_H
@@ -271,7 +274,7 @@ extern x86pte_t x86pte_get(htable_t *, uint_t entry);
extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);

extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
x86pte_t old, x86pte_t *ptr);
x86pte_t old, x86pte_t *ptr, boolean_t tlb);

extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
x86pte_t old, x86pte_t new);

0 comments on commit a6a74e0

Please sign in to comment.