Permalink
Browse files

Implement Address Space Layout Randomization (ASLR)

With this change, randomization can be enabled for all non-fixed
mappings.  It means that the base address for the mapping is selected
with a guaranteed amount of entropy (bits). If the mapping was
requested to be superpage aligned, the randomization honours the
superpage attributes.

Although the value of ASLR is diminshing over time as exploit authors
work out simple ASLR bypass techniques, it elimintates the trivial
exploitation of certain vulnerabilities, at least in theory.  This
implementation is relatively small and happens at the correct
architectural level.  Also, it is not expected to introduce
regressions in existing cases when turned off (default for now), or
cause any significant maintaince burden.

The randomization is done on a best-effort basis - that is, the
allocator falls back to a first fit strategy if fragmentation prevents
entropy injection.  It is trivial to implement a strong mode where
failure to guarantee the requested amount of entropy results in
mapping request failure, but I do not consider that to be usable.

I have not fine-tuned the amount of entropy injected right now. It is
only a quantitive change that will not change the implementation.  The
current amount is controlled by aslr_pages_rnd.

To not spoil coalescing optimizations, to reduce the page table
fragmentation inherent to ASLR, and to keep the transient superpage
promotion for the malloced memory, locality clustering is implemented
for anonymous private mappings, which are automatically grouped until
fragmentation kicks in.  The initial location for the anon group range
is, of course, randomized.  This is controlled by vm.cluster_anon,
enabled by default.

The default mode keeps the sbrk area unpopulated by other mappings,
but this can be turned off, which gives much more breathing bits on
architectures with small address space, such as i386.  This is tied
with the question of following an application's hint about the mmap(2)
base address. Testing shows that ignoring the hint does not affect the
function of common applications, but I would expect more demanding
code could break. By default sbrk is preserved and mmap hints are
satisfied, which can be changed by using the
kern.elf{32,64}.aslr.honor_sbrk sysctl.

ASLR is enabled on per-ABI basis, and currently it is only allowed on
FreeBSD native i386 and amd64 (including compat 32bit) ABIs.  Support
for additional architectures will be added after further testing.

Both per-process and per-image controls are implemented:
- procctl(2) adds PROC_ASLR_CTL/PROC_ASLR_STATUS;
- NT_FREEBSD_FCTL_ASLR_DISABLE feature control note bit makes it possible
  to force ASLR off for the given binary.  (A tool to edit the feature
  control note is in development.)
Global controls are:
- kern.elf{32,64}.aslr.enable - for non-fixed mappings done by mmap(2);
- kern.elf{32,64}.aslr.pie_enable - for PIE image activation mappings;
- kern.elf{32,64}.aslr.honor_sbrk - allow to use sbrk area for mmap(2);
- vm.cluster_anon - enables anon mapping clustering.

PR:	208580 (exp runs)
Exp-runs done by:	antoine
Reviewed by:	markj (previous version)
Discussed with:	emaste
Tested by:	pho
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D5603
  • Loading branch information...
kostikbel committed Feb 10, 2019
1 parent d856ea7 commit 08849e56bae3a92bfbfee3bc1cbd6628dbd685f6
@@ -73,7 +73,8 @@ struct sysentvec elf64_freebsd_sysvec = {
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_FREEBSD | SV_LP64 | SV_SHP | SV_TIMEKEEP,
.sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_LP64 | SV_SHP |
SV_TIMEKEEP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
@@ -82,7 +82,7 @@ struct sysentvec elf32_freebsd_sysvec = {
.sv_maxssiz = NULL,
.sv_flags =
#if __ARM_ARCH >= 6
SV_SHP | SV_TIMEKEEP |
SV_ASLR | SV_SHP | SV_TIMEKEEP |
#endif
SV_ABI_FREEBSD | SV_ILP32,
.sv_set_syscall_retval = cpu_set_syscall_retval,
@@ -3328,6 +3328,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
int error, error1, flags, signum;

switch (uap->com) {
case PROC_ASLR_CTL:
case PROC_SPROTECT:
case PROC_TRACE_CTL:
case PROC_TRAPCAP_CTL:
@@ -3359,6 +3360,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
return (error);
data = &x.rk;
break;
case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
data = &flags;
@@ -3387,6 +3389,7 @@ freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap)
if (error == 0)
error = error1;
break;
case PROC_ASLR_STATUS:
case PROC_TRACE_STATUS:
case PROC_TRAPCAP_STATUS:
if (error == 0)
@@ -119,7 +119,7 @@ struct sysentvec ia32_freebsd_sysvec = {
.sv_setregs = ia32_setregs,
.sv_fixlimit = ia32_fixlimit,
.sv_maxssiz = &ia32_maxssiz,
.sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 |
.sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
SV_SHP | SV_TIMEKEEP,
.sv_set_syscall_retval = ia32_set_syscall_retval,
.sv_fetch_syscall_args = ia32_fetch_syscall_args,
@@ -75,8 +75,8 @@ struct sysentvec elf32_freebsd_sysvec = {
.sv_setregs = exec_setregs,
.sv_fixlimit = NULL,
.sv_maxssiz = NULL,
.sv_flags = SV_ABI_FREEBSD | SV_IA32 | SV_ILP32 | SV_SHP |
SV_TIMEKEEP,
.sv_flags = SV_ABI_FREEBSD | SV_ASLR | SV_IA32 | SV_ILP32 |
SV_SHP | SV_TIMEKEEP,
.sv_set_syscall_retval = cpu_set_syscall_retval,
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = syscallnames,
@@ -136,6 +136,27 @@ SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0,
"enable execution from readable segments");
#endif

SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0,
"");
#define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr)

static int __elfN(aslr_enabled) = 0;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN,
&__elfN(aslr_enabled), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
": enable address map randomization");

static int __elfN(pie_aslr_enabled) = 0;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN,
&__elfN(pie_aslr_enabled), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
": enable address map randomization for PIE binaries");

static int __elfN(aslr_honor_sbrk) = 1;
SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW,
&__elfN(aslr_honor_sbrk), 0,
__XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used");

static Elf_Brandinfo *elf_brand_list[MAX_BRANDS];

#define trunc_page_ps(va, ps) rounddown2(va, ps)
@@ -773,6 +794,36 @@ __elfN(load_file)(struct proc *p, const char *file, u_long *addr,
return (error);
}

static u_long
__CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv,
u_int align)
{
u_long rbase, res;

MPASS(vm_map_min(map) <= minv);
MPASS(maxv <= vm_map_max(map));
MPASS(minv < maxv);
MPASS(minv + align < maxv);
arc4rand(&rbase, sizeof(rbase), 0);
res = roundup(minv, (u_long)align) + rbase % (maxv - minv);
res &= ~((u_long)align - 1);
if (res >= maxv)
res -= align;
KASSERT(res >= minv,
("res %#lx < minv %#lx, maxv %#lx rbase %#lx",
res, minv, maxv, rbase));
KASSERT(res < maxv,
("res %#lx > maxv %#lx, minv %#lx rbase %#lx",
res, maxv, minv, rbase));
return (res);
}

/*
* Impossible et_dyn_addr initial value indicating that the real base
* must be calculated later with some randomization applied.
*/
#define ET_DYN_ADDR_RAND 1

static int
__CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
{
@@ -781,13 +832,15 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
const Elf_Phdr *phdr;
Elf_Auxargs *elf_auxargs;
struct vmspace *vmspace;
vm_map_t map;
const char *err_str, *newinterp;
char *interp, *interp_buf, *path;
Elf_Brandinfo *brand_info;
struct sysentvec *sv;
vm_prot_t prot;
u_long text_size, data_size, total_size, text_addr, data_addr;
u_long seg_size, seg_addr, addr, baddr, et_dyn_addr, entry, proghdr;
u_long maxalign, mapsz, maxv, maxv1;
uint32_t fctl0;
int32_t osrel;
int error, i, n, interp_name_len, have_interp;
@@ -831,12 +884,17 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
err_str = newinterp = NULL;
interp = interp_buf = NULL;
td = curthread;
maxalign = PAGE_SIZE;
mapsz = 0;

for (i = 0; i < hdr->e_phnum; i++) {
switch (phdr[i].p_type) {
case PT_LOAD:
if (n == 0)
baddr = phdr[i].p_vaddr;
if (phdr[i].p_align > maxalign)
maxalign = phdr[i].p_align;
mapsz += phdr[i].p_memsz;
n++;
break;
case PT_INTERP:
@@ -897,6 +955,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
error = ENOEXEC;
goto ret;
}
sv = brand_info->sysvec;
et_dyn_addr = 0;
if (hdr->e_type == ET_DYN) {
if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
@@ -908,10 +967,18 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
* Honour the base load address from the dso if it is
* non-zero for some reason.
*/
if (baddr == 0)
et_dyn_addr = ET_DYN_LOAD_ADDR;
if (baddr == 0) {
if ((sv->sv_flags & SV_ASLR) == 0 ||
(fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0)
et_dyn_addr = ET_DYN_LOAD_ADDR;
else if ((__elfN(pie_aslr_enabled) &&
(imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) ||
(imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0)
et_dyn_addr = ET_DYN_ADDR_RAND;
else
et_dyn_addr = ET_DYN_LOAD_ADDR;
}
}
sv = brand_info->sysvec;
if (interp != NULL && brand_info->interp_newpath != NULL)
newinterp = brand_info->interp_newpath;

@@ -928,9 +995,54 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
*/
VOP_UNLOCK(imgp->vp, 0);

/*
* Decide whether to enable randomization of user mappings.
* First, reset user preferences for the setid binaries.
* Then, account for the support of the randomization by the
* ABI, by user preferences, and make special treatment for
* PIE binaries.
*/
if (imgp->credential_setid) {
PROC_LOCK(imgp->proc);
imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
PROC_UNLOCK(imgp->proc);
}
if ((sv->sv_flags & SV_ASLR) == 0 ||
(imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 ||
(fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) {
KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND,
("et_dyn_addr == RAND and !ASLR"));
} else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 ||
(__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) ||
et_dyn_addr == ET_DYN_ADDR_RAND) {
imgp->map_flags |= MAP_ASLR;
/*
* If user does not care about sbrk, utilize the bss
* grow region for mappings as well. We can select
* the base for the image anywere and still not suffer
* from the fragmentation.
*/
if (!__elfN(aslr_honor_sbrk) ||
(imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0)
imgp->map_flags |= MAP_ASLR_IGNSTART;
}

error = exec_new_vmspace(imgp, sv);
vmspace = imgp->proc->p_vmspace;
map = &vmspace->vm_map;

imgp->proc->p_sysent = sv;

maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK);
if (et_dyn_addr == ET_DYN_ADDR_RAND) {
KASSERT((map->flags & MAP_ASLR) != 0,
("ET_DYN_ADDR_RAND but !MAP_ASLR"));
et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map,
vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA),
/* reserve half of the address space to interpreter */
maxv / 2, 1UL << flsl(maxalign));
}

vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY);
if (error != 0)
goto ret;
@@ -1022,7 +1134,6 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
goto ret;
}

vmspace = imgp->proc->p_vmspace;
vmspace->vm_tsize = text_size >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
vmspace->vm_dsize = data_size >> PAGE_SHIFT;
@@ -1036,13 +1147,28 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
*/
addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td,
RLIMIT_DATA));
if ((map->flags & MAP_ASLR) != 0) {
maxv1 = maxv / 2 + addr / 2;
MPASS(maxv1 >= addr); /* No overflow */
map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]);
} else {
map->anon_loc = addr;
}
PROC_UNLOCK(imgp->proc);

imgp->entry_addr = entry;

if (interp != NULL) {
have_interp = FALSE;
VOP_UNLOCK(imgp->vp, 0);
if ((map->flags & MAP_ASLR) != 0) {
/* Assume that interpeter fits into 1/4 of AS */
maxv1 = maxv / 2 + addr / 2;
MPASS(maxv1 >= addr); /* No overflow */
addr = __CONCAT(rnd_, __elfN(base))(map, addr,
maxv1, PAGE_SIZE);
}
if (brand_info->emul_path != NULL &&
brand_info->emul_path[0] != '\0') {
path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
@@ -1104,9 +1104,13 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
shmexit(vmspace);
pmap_remove_pages(vmspace_pmap(vmspace));
vm_map_remove(map, vm_map_min(map), vm_map_max(map));
/* An exec terminates mlockall(MCL_FUTURE). */
/*
* An exec terminates mlockall(MCL_FUTURE), ASLR state
* must be re-evaluated.
*/
vm_map_lock(map);
vm_map_modflags(map, 0, MAP_WIREFUTURE);
vm_map_modflags(map, 0, MAP_WIREFUTURE | MAP_ASLR |
MAP_ASLR_IGNSTART);
vm_map_unlock(map);
} else {
error = vmspace_exec(p, sv_minuser, sv->sv_maxuser);
@@ -1115,6 +1119,7 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
vmspace = p->p_vmspace;
map = &vmspace->vm_map;
}
map->flags |= imgp->map_flags;

/* Map a shared page */
obj = sv->sv_shared_page_obj;
@@ -466,7 +466,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
* Increase reference counts on shared objects.
*/
p2->p_flag = P_INMEM;
p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
p2->p_flag2 = p1->p_flag2 & (P2_ASLR_DISABLE | P2_ASLR_ENABLE |
P2_ASLR_IGNSTART | P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
p2->p_swtick = ticks;
if (p1->p_flag & P_PROFIL)
startprofclock(p2);
Oops, something went wrong.

0 comments on commit 08849e5

Please sign in to comment.