Skip to content

Commit

Permalink
runtime: use MADV_FREE on Linux if available
Browse files Browse the repository at this point in the history
On Linux, sysUnused currently uses madvise(MADV_DONTNEED) to signal the
kernel that a range of allocated memory contains unneeded data. After a
successful call, the range (but not the data it contained before the
call to madvise) is still available but the first access to that range
will unconditionally incur a page fault (needed to 0-fill the range).

A faster alternative is MADV_FREE, available since Linux 4.5. The
mechanism is very similar, but the page fault will only be incurred if
the kernel, between the call to madvise and the first access, decides to
reuse that memory for something else.

In sysUnused, test whether MADV_FREE is supported and fall back to
MADV_DONTNEED in case it isn't. This requires making the return value of
the madvise syscall available to the caller, so change runtime.madvise
to return it.

Fixes #23687

Change-Id: I962c3429000dd9f4a00846461ad128b71201bb04
Reviewed-on: https://go-review.googlesource.com/135395
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
tklauser committed Sep 18, 2018
1 parent a0f5d5f commit 77f9b27
Show file tree
Hide file tree
Showing 31 changed files with 77 additions and 37 deletions.
5 changes: 4 additions & 1 deletion src/runtime/defs2_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ const (
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED

MADV_DONTNEED = C.MADV_DONTNEED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE

SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK
Expand Down
5 changes: 4 additions & 1 deletion src/runtime/defs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,10 @@ const (
MAP_PRIVATE = C.MAP_PRIVATE
MAP_FIXED = C.MAP_FIXED

MADV_DONTNEED = C.MADV_DONTNEED
MADV_DONTNEED = C.MADV_DONTNEED
MADV_FREE = C.MADV_FREE
MADV_HUGEPAGE = C.MADV_HUGEPAGE
MADV_NOHUGEPAGE = C.MADV_HNOUGEPAGE

SA_RESTART = C.SA_RESTART
SA_ONSTACK = C.SA_ONSTACK
Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_386.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_arm.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_mips64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_mipsx.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_ppc64.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_ppc64le.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
1 change: 1 addition & 0 deletions src/runtime/defs_linux_s390x.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const (
_MAP_FIXED = 0x10

_MADV_DONTNEED = 0x4
_MADV_FREE = 0x8
_MADV_HUGEPAGE = 0xe
_MADV_NOHUGEPAGE = 0xf

Expand Down
13 changes: 11 additions & 2 deletions src/runtime/mem_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package runtime

import (
"runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
Expand Down Expand Up @@ -34,10 +35,12 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
return p
}

var adviseUnused = uint32(_MADV_FREE)

func sysUnused(v unsafe.Pointer, n uintptr) {
// By default, Linux's "transparent huge page" support will
// merge pages into a huge page if there's even a single
// present regular page, undoing the effects of the DONTNEED
// present regular page, undoing the effects of madvise(adviseUnused)
// below. On amd64, that means khugepaged can turn a single
// 4KB page to 2MB, bloating the process's RSS by as much as
// 512X. (See issue #8832 and Linux kernel bug
Expand Down Expand Up @@ -102,7 +105,13 @@ func sysUnused(v unsafe.Pointer, n uintptr) {
throw("unaligned sysUnused")
}

madvise(v, n, _MADV_DONTNEED)
advise := atomic.Load(&adviseUnused)
if errno := madvise(v, n, int32(advise)); advise == _MADV_FREE && errno != 0 {
// MADV_FREE was added in Linux 4.5. Fall back to MADV_DONTNEED if it is
// not supported.
atomic.Store(&adviseUnused, _MADV_DONTNEED)
madvise(v, n, _MADV_DONTNEED)
}
}

func sysUsed(v unsafe.Pointer, n uintptr) {
Expand Down
3 changes: 2 additions & 1 deletion src/runtime/stubs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ func write(fd uintptr, p unsafe.Pointer, n int32) int32
//go:noescape
func open(name *byte, mode, perm int32) int32

func madvise(addr unsafe.Pointer, n uintptr, flags int32)
// return value is only set on linux to be used in osinit()
func madvise(addr unsafe.Pointer, n uintptr, flags int32) int32

// exitThread terminates the current thread, writing *wait = 0 when
// the stack is safe to reclaim.
Expand Down
6 changes: 4 additions & 2 deletions src/runtime/sys_dragonfly_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $75, AX // madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/sys_freebsd_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // madvise
INT $0x80
// ignore failure - maybe pages are locked
JAE 2(PC)
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET

TEXT runtime·setitimer(SB), NOSPLIT, $-4
Expand Down
6 changes: 4 additions & 2 deletions src/runtime/sys_freebsd_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,11 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $75, AX // madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
MOVQ new+0(FP), DI
MOVQ old+8(FP), SI
Expand Down
15 changes: 8 additions & 7 deletions src/runtime/sys_freebsd_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,15 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
RET

TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW addr+0(FP), R0 // arg 1 addr
MOVW n+4(FP), R1 // arg 2 len
MOVW flags+8(FP), R2 // arg 3 flags
MOVW $SYS_madvise, R7
SWI $0
// ignore failure - maybe pages are locked
MOVW addr+0(FP), R0 // arg 1 addr
MOVW n+4(FP), R1 // arg 2 len
MOVW flags+8(FP), R2 // arg 3 flags
MOVW $SYS_madvise, R7
SWI $0
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
MOVW new+0(FP), R0
MOVW old+4(FP), R1
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL n+4(FP), CX
MOVL flags+8(FP), DX
INVOKE_SYSCALL
// ignore failure - maybe pages are locked
MOVL AX, ret+12(FP)
RET

// int32 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX
MOVQ $SYS_madvise, AX
SYSCALL
// ignore failure - maybe pages are locked
MOVL AX, ret+24(FP)
RET

// int64 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW flags+8(FP), R2
MOVW $SYS_madvise, R7
SWI $0
// ignore failure - maybe pages are locked
MOVW R0, ret+12(FP)
RET

TEXT runtime·setitimer(SB),NOSPLIT,$0
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R2
MOVD $SYS_madvise, R8
SVC
// ignore failure - maybe pages are locked
MOVW R0, ret+24(FP)
RET

// int64 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_mips64x.s
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R6
MOVV $SYS_madvise, R2
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+24(FP)
RET

// int64 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/sys_linux_mipsx.s
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,13 @@ TEXT runtime·munmap(SB),NOSPLIT,$0-8
UNDEF // crash
RET

TEXT runtime·madvise(SB),NOSPLIT,$0-12
TEXT runtime·madvise(SB),NOSPLIT,$0-16
MOVW addr+0(FP), R4
MOVW n+4(FP), R5
MOVW flags+8(FP), R6
MOVW $SYS_madvise, R2
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+12(FP)
RET

// int32 futex(int32 *uaddr, int32 op, int32 val, struct timespec *timeout, int32 *uaddr2, int32 val2);
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_ppc64x.s
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVD n+8(FP), R4
MOVW flags+16(FP), R5
SYSCALL $SYS_madvise
// ignore failure - maybe pages are locked
MOVW R3, ret+24(FP)
RET

// int64 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/sys_linux_s390x.s
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
MOVW flags+16(FP), R4
MOVW $SYS_madvise, R1
SYSCALL
// ignore failure - maybe pages are locked
MOVW R2, ret+24(FP)
RET

// int64 futex(int32 *uaddr, int32 op, int32 val,
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/sys_netbsd_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ TEXT runtime·munmap(SB),NOSPLIT,$-4
TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // sys_madvise
INT $0x80
// ignore failure - maybe pages are locked
JAE 2(PC)
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET

TEXT runtime·setitimer(SB),NOSPLIT,$-4
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/sys_netbsd_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX // arg 3 - behav
MOVQ $75, AX // sys_madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
Expand Down
11 changes: 6 additions & 5 deletions src/runtime/sys_netbsd_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,12 @@ TEXT runtime·munmap(SB),NOSPLIT,$0
RET

TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW addr+0(FP), R0 // arg 1 - addr
MOVW n+4(FP), R1 // arg 2 - len
MOVW flags+8(FP), R2 // arg 3 - behav
SWI $0xa0004b // sys_madvise
// ignore failure - maybe pages are locked
MOVW addr+0(FP), R0 // arg 1 - addr
MOVW n+4(FP), R1 // arg 2 - len
MOVW flags+8(FP), R2 // arg 3 - behav
SWI $0xa0004b // sys_madvise
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
Expand Down
3 changes: 2 additions & 1 deletion src/runtime/sys_openbsd_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$-4
MOVL $75, AX // sys_madvise
INT $0x80
JAE 2(PC)
MOVL $0xf1, 0xf1 // crash
MOVL $-1, AX
MOVL AX, ret+12(FP)
RET

TEXT runtime·setitimer(SB),NOSPLIT,$-4
Expand Down
4 changes: 3 additions & 1 deletion src/runtime/sys_openbsd_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,9 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVL flags+16(FP), DX // arg 3 - behav
MOVQ $75, AX // sys_madvise
SYSCALL
// ignore failure - maybe pages are locked
JCC 2(PC)
MOVL $-1, AX
MOVL AX, ret+24(FP)
RET

TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
Expand Down
4 changes: 2 additions & 2 deletions src/runtime/sys_openbsd_arm.s
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,8 @@ TEXT runtime·madvise(SB),NOSPLIT,$0
MOVW flags+8(FP), R2 // arg 2 - flags
MOVW $75, R12 // sys_madvise
SWI $0
MOVW.CS $0, R8 // crash on syscall failure
MOVW.CS R8, (R8)
MOVW.CS $-1, R0
MOVW R0, ret+12(FP)
RET

TEXT runtime·setitimer(SB),NOSPLIT,$0
Expand Down

0 comments on commit 77f9b27

Please sign in to comment.