Skip to content

Commit

Permalink
5498 kmem_reap does one xcall per page
Browse files Browse the repository at this point in the history
5514 hat_unload_callback passes the wrong length to segvn_hat_unload_callback
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
  • Loading branch information
ahrens authored and Christopher Siden committed Jan 16, 2015
1 parent fca543c commit a6a74e0
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 42 deletions.
107 changes: 71 additions & 36 deletions usr/src/uts/i86pc/vm/hat_i86.c
Expand Up @@ -27,6 +27,7 @@
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, 2015 by Delphix. All rights reserved.
*/

/*
Expand Down Expand Up @@ -1928,6 +1929,7 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
{
hat_t *hat = (hat_t *)a1;
caddr_t addr = (caddr_t)a2;
size_t len = (size_t)a3;

/*
* If the target hat isn't the kernel and this CPU isn't operating
Expand All @@ -1937,10 +1939,11 @@ hati_demap_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3)
return (0);

/*
* For a normal address, we just flush one page mapping
* For a normal address, we flush a range of contiguous mappings
*/
if ((uintptr_t)addr != DEMAP_ALL_ADDR) {
mmu_tlbflush_entry(addr);
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
mmu_tlbflush_entry(addr + i);
return (0);
}

Expand Down Expand Up @@ -2035,7 +2038,7 @@ tlb_service(void)
* all CPUs using a given hat.
*/
void
hat_tlb_inval(hat_t *hat, uintptr_t va)
hat_tlb_inval_range(hat_t *hat, uintptr_t va, size_t len)
{
extern int flushes_require_xcalls; /* from mp_startup.c */
cpuset_t justme;
Expand Down Expand Up @@ -2068,12 +2071,15 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
*/
if (panicstr || !flushes_require_xcalls) {
#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_flush_tlb();
else
xen_flush_va((caddr_t)va);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
xen_flush_va((caddr_t)(va + i));
}
#else
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
(void) hati_demap_func((xc_arg_t)hat,
(xc_arg_t)va, (xc_arg_t)len);
#endif
return;
}
Expand Down Expand Up @@ -2124,31 +2130,44 @@ hat_tlb_inval(hat_t *hat, uintptr_t va)
CPUSET_ISEQUAL(cpus_to_shootdown, justme)) {

#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_flush_tlb();
else
xen_flush_va((caddr_t)va);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE)
xen_flush_va((caddr_t)(va + i));
}
#else
(void) hati_demap_func((xc_arg_t)hat, (xc_arg_t)va, NULL);
(void) hati_demap_func((xc_arg_t)hat,
(xc_arg_t)va, (xc_arg_t)len);
#endif

} else {

CPUSET_ADD(cpus_to_shootdown, CPU->cpu_id);
#ifdef __xpv
if (va == DEMAP_ALL_ADDR)
if (va == DEMAP_ALL_ADDR) {
xen_gflush_tlb(cpus_to_shootdown);
else
xen_gflush_va((caddr_t)va, cpus_to_shootdown);
} else {
for (size_t i = 0; i < len; i += MMU_PAGESIZE) {
xen_gflush_va((caddr_t)(va + i),
cpus_to_shootdown);
}
}
#else
xc_call((xc_arg_t)hat, (xc_arg_t)va, NULL,
xc_call((xc_arg_t)hat, (xc_arg_t)va, (xc_arg_t)len,
CPUSET2BV(cpus_to_shootdown), hati_demap_func);
#endif

}
kpreempt_enable();
}

void
hat_tlb_inval(hat_t *hat, uintptr_t va)
{
hat_tlb_inval_range(hat, va, MMU_PAGESIZE);
}

/*
* Interior routine for HAT_UNLOADs from hat_unload_callback(),
* hat_kmap_unload() OR from hat_steal() code. This routine doesn't
Expand All @@ -2160,7 +2179,8 @@ hat_pte_unmap(
uint_t entry,
uint_t flags,
x86pte_t old_pte,
void *pte_ptr)
void *pte_ptr,
boolean_t tlb)
{
hat_t *hat = ht->ht_hat;
hment_t *hm = NULL;
Expand Down Expand Up @@ -2202,7 +2222,7 @@ hat_pte_unmap(
x86_hm_enter(pp);
}

old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr);
old_pte = x86pte_inval(ht, entry, old_pte, pte_ptr, tlb);

/*
* If the page hadn't changed we've unmapped it and can proceed
Expand Down Expand Up @@ -2283,7 +2303,7 @@ hat_kmap_unload(caddr_t addr, size_t len, uint_t flags)
/*
* use mostly common code to unmap it.
*/
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr);
hat_pte_unmap(ht, entry, flags, old_pte, pte_ptr, B_TRUE);
}
}

Expand Down Expand Up @@ -2320,19 +2340,26 @@ typedef struct range_info {
level_t rng_level;
} range_info_t;

/*
* Invalidate the TLB, and perform the callback to the upper level VM system,
* for the specified ranges of contiguous pages.
*/
static void
handle_ranges(hat_callback_t *cb, uint_t cnt, range_info_t *range)
handle_ranges(hat_t *hat, hat_callback_t *cb, uint_t cnt, range_info_t *range)
{
/*
* do callbacks to upper level VM system
*/
while (cb != NULL && cnt > 0) {
while (cnt > 0) {
size_t len;

--cnt;
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
cb->hcb_end_addr = cb->hcb_start_addr;
cb->hcb_end_addr +=
range[cnt].rng_cnt << LEVEL_SIZE(range[cnt].rng_level);
cb->hcb_function(cb);
len = range[cnt].rng_cnt << LEVEL_SHIFT(range[cnt].rng_level);
hat_tlb_inval_range(hat, (uintptr_t)range[cnt].rng_va, len);

if (cb != NULL) {
cb->hcb_start_addr = (caddr_t)range[cnt].rng_va;
cb->hcb_end_addr = cb->hcb_start_addr;
cb->hcb_end_addr += len;
cb->hcb_function(cb);
}
}
}

Expand Down Expand Up @@ -2376,8 +2403,10 @@ hat_unload_callback(
if (cb == NULL && len == MMU_PAGESIZE) {
ht = htable_getpte(hat, vaddr, &entry, &old_pte, 0);
if (ht != NULL) {
if (PTE_ISVALID(old_pte))
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
if (PTE_ISVALID(old_pte)) {
hat_pte_unmap(ht, entry, flags, old_pte,
NULL, B_TRUE);
}
htable_release(ht);
}
XPV_ALLOW_MIGRATE();
Expand All @@ -2400,7 +2429,7 @@ hat_unload_callback(
if (vaddr != contig_va ||
(r_cnt > 0 && r[r_cnt - 1].rng_level != ht->ht_level)) {
if (r_cnt == MAX_UNLOAD_CNT) {
handle_ranges(cb, r_cnt, r);
handle_ranges(hat, cb, r_cnt, r);
r_cnt = 0;
}
r[r_cnt].rng_va = vaddr;
Expand All @@ -2410,10 +2439,16 @@ hat_unload_callback(
}

/*
* Unload one mapping from the page tables.
* Unload one mapping (for a single page) from the page tables.
* Note that we do not remove the mapping from the TLB yet,
* as indicated by the tlb=FALSE argument to hat_pte_unmap().
* handle_ranges() will clear the TLB entries with one call to
* hat_tlb_inval_range() per contiguous range. This is
* safe because the page can not be reused until the
* callback is made (or we return).
*/
entry = htable_va2entry(vaddr, ht);
hat_pte_unmap(ht, entry, flags, old_pte, NULL);
hat_pte_unmap(ht, entry, flags, old_pte, NULL, B_FALSE);
ASSERT(ht->ht_level <= mmu.max_page_level);
vaddr += LEVEL_SIZE(ht->ht_level);
contig_va = vaddr;
Expand All @@ -2426,7 +2461,7 @@ hat_unload_callback(
* handle last range for callbacks
*/
if (r_cnt > 0)
handle_ranges(cb, r_cnt, r);
handle_ranges(hat, cb, r_cnt, r);
XPV_ALLOW_MIGRATE();
}

Expand Down Expand Up @@ -3314,7 +3349,7 @@ hati_page_unmap(page_t *pp, htable_t *ht, uint_t entry)
/*
* Invalidate the PTE and remove the hment.
*/
old_pte = x86pte_inval(ht, entry, 0, NULL);
old_pte = x86pte_inval(ht, entry, 0, NULL, B_TRUE);
if (PTE2PFN(old_pte, ht->ht_level) != pfn) {
panic("x86pte_inval() failure found PTE = " FMT_PTE
" pfn being unmapped is %lx ht=0x%lx entry=0x%x",
Expand Down Expand Up @@ -4048,7 +4083,7 @@ clear_boot_mappings(uintptr_t low, uintptr_t high)
/*
* Unload the mapping from the page tables.
*/
(void) x86pte_inval(ht, entry, 0, NULL);
(void) x86pte_inval(ht, entry, 0, NULL, B_TRUE);
ASSERT(ht->ht_valid_cnt > 0);
HTABLE_DEC(ht->ht_valid_cnt);
PGCNT_DEC(ht->ht_hat, ht->ht_level);
Expand Down
5 changes: 4 additions & 1 deletion usr/src/uts/i86pc/vm/hat_i86.h
Expand Up @@ -22,6 +22,9 @@
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#ifndef _VM_HAT_I86_H
#define _VM_HAT_I86_H
Expand Down Expand Up @@ -227,7 +230,7 @@ extern void hat_kern_alloc(caddr_t segmap_base, size_t segmap_size,
extern void hat_kern_setup(void);
extern void hat_tlb_inval(struct hat *hat, uintptr_t va);
extern void hat_pte_unmap(htable_t *ht, uint_t entry, uint_t flags,
x86pte_t old_pte, void *pte_ptr);
x86pte_t old_pte, void *pte_ptr, boolean_t tlb);
extern void hat_init_finish(void);
extern caddr_t hat_kpm_pfn2va(pfn_t pfn);
extern pfn_t hat_kpm_va2pfn(caddr_t);
Expand Down
13 changes: 9 additions & 4 deletions usr/src/uts/i86pc/vm/htable.c
Expand Up @@ -21,6 +21,7 @@

/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#include <sys/types.h>
Expand Down Expand Up @@ -475,7 +476,8 @@ htable_steal_active(hat_t *hat, uint_t cnt, uint_t threshold,
pte = x86pte_get(ht, e);
if (!PTE_ISVALID(pte))
continue;
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL);
hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL,
B_TRUE);
}

/*
Expand Down Expand Up @@ -2209,14 +2211,17 @@ x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new)
* Invalidate a page table entry as long as it currently maps something that
* matches the value determined by expect.
*
* Also invalidates any TLB entries and returns the previous value of the PTE.
* If tlb is set, also invalidates any TLB entries.
*
* Returns the previous value of the PTE.
*/
x86pte_t
x86pte_inval(
htable_t *ht,
uint_t entry,
x86pte_t expect,
x86pte_t *pte_ptr)
x86pte_t *pte_ptr,
boolean_t tlb)
{
x86pte_t *ptep;
x86pte_t oldpte;
Expand Down Expand Up @@ -2265,7 +2270,7 @@ x86pte_inval(
found = CAS_PTE(ptep, oldpte, 0);
XPV_DISALLOW_PAGETABLE_UPDATES();
} while (found != oldpte);
if (oldpte & (PT_REF | PT_MOD))
if (tlb && (oldpte & (PT_REF | PT_MOD)))
hat_tlb_inval(ht->ht_hat, htable_e2va(ht, entry));

done:
Expand Down
5 changes: 4 additions & 1 deletion usr/src/uts/i86pc/vm/htable.h
Expand Up @@ -22,6 +22,9 @@
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
*/

#ifndef _VM_HTABLE_H
#define _VM_HTABLE_H
Expand Down Expand Up @@ -271,7 +274,7 @@ extern x86pte_t x86pte_get(htable_t *, uint_t entry);
extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);

extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
x86pte_t old, x86pte_t *ptr);
x86pte_t old, x86pte_t *ptr, boolean_t tlb);

extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
x86pte_t old, x86pte_t new);
Expand Down

0 comments on commit a6a74e0

Please sign in to comment.