Skip to content

Commit 52a3d79

Browse files
committed
Merge branch 'arm64-bpf'
Zi Shen Lim says: ==================== arm64 BPF JIT updates Updates for arm64 eBPF JIT. The main addition here is implementation of bpf_tail_call. Changes since v2: - None. Resubmit per David Miller. Changes since v1: - Added patch #1 to address build error due to missing header inclusion in linux/bpf.h. (Thanks to suggestion and ack by Daniel Borkmann) Ordered it ahead of bpf_tail_call patch #2 so build error is not triggered. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents f6664f1 + 643c332 commit 52a3d79

File tree

3 files changed

+99
-16
lines changed

3 files changed

+99
-16
lines changed

arch/arm64/net/bpf_jit.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* BPF JIT compiler for ARM64
33
*
4-
* Copyright (C) 2014-2015 Zi Shen Lim <zlim.lnx@gmail.com>
4+
* Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
55
*
66
* This program is free software; you can redistribute it and/or modify
77
* it under the terms of the GNU General Public License version 2 as
@@ -55,6 +55,7 @@
5555
#define A64_BL(imm26) A64_BRANCH((imm26) << 2, LINK)
5656

5757
/* Unconditional branch (register) */
58+
#define A64_BR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_NOLINK)
5859
#define A64_BLR(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_LINK)
5960
#define A64_RET(Rn) aarch64_insn_gen_branch_reg(Rn, AARCH64_INSN_BRANCH_RETURN)
6061

arch/arm64/net/bpf_jit_comp.c

Lines changed: 96 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#define pr_fmt(fmt) "bpf_jit: " fmt
2020

21+
#include <linux/bpf.h>
2122
#include <linux/filter.h>
2223
#include <linux/printk.h>
2324
#include <linux/skbuff.h>
@@ -33,6 +34,7 @@ int bpf_jit_enable __read_mostly;
3334

3435
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
3536
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
37+
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
3638

3739
/* Map BPF registers to A64 registers */
3840
static const int bpf2a64[] = {
@@ -54,6 +56,8 @@ static const int bpf2a64[] = {
5456
/* temporary registers for internal BPF JIT */
5557
[TMP_REG_1] = A64_R(10),
5658
[TMP_REG_2] = A64_R(11),
59+
/* tail_call_cnt */
60+
[TCALL_CNT] = A64_R(26),
5761
/* temporary register for blinding constants */
5862
[BPF_REG_AX] = A64_R(9),
5963
};
@@ -146,13 +150,18 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
146150

147151
#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
148152

149-
static void build_prologue(struct jit_ctx *ctx)
153+
#define PROLOGUE_OFFSET 8
154+
155+
static int build_prologue(struct jit_ctx *ctx)
150156
{
151157
const u8 r6 = bpf2a64[BPF_REG_6];
152158
const u8 r7 = bpf2a64[BPF_REG_7];
153159
const u8 r8 = bpf2a64[BPF_REG_8];
154160
const u8 r9 = bpf2a64[BPF_REG_9];
155161
const u8 fp = bpf2a64[BPF_REG_FP];
162+
const u8 tcc = bpf2a64[TCALL_CNT];
163+
const int idx0 = ctx->idx;
164+
int cur_offset;
156165

157166
/*
158167
* BPF prog stack layout
@@ -162,8 +171,6 @@ static void build_prologue(struct jit_ctx *ctx)
162171
* |FP/LR|
163172
* current A64_FP => -16:+-----+
164173
* | ... | callee saved registers
165-
* +-----+
166-
* | | x25/x26
167174
* BPF fp register => -64:+-----+ <= (BPF_FP)
168175
* | |
169176
* | ... | BPF prog stack
@@ -183,18 +190,90 @@ static void build_prologue(struct jit_ctx *ctx)
183190
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
184191
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
185192

186-
/* Save callee-saved register */
193+
/* Save callee-saved registers */
187194
emit(A64_PUSH(r6, r7, A64_SP), ctx);
188195
emit(A64_PUSH(r8, r9, A64_SP), ctx);
196+
emit(A64_PUSH(fp, tcc, A64_SP), ctx);
189197

190-
/* Save fp (x25) and x26. SP requires 16 bytes alignment */
191-
emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx);
192-
193-
/* Set up BPF prog stack base register (x25) */
198+
/* Set up BPF prog stack base register */
194199
emit(A64_MOV(1, fp, A64_SP), ctx);
195200

201+
/* Initialize tail_call_cnt */
202+
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
203+
196204
/* Set up function call stack */
197205
emit(A64_SUB_I(1, A64_SP, A64_SP, STACK_SIZE), ctx);
206+
207+
cur_offset = ctx->idx - idx0;
208+
if (cur_offset != PROLOGUE_OFFSET) {
209+
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
210+
cur_offset, PROLOGUE_OFFSET);
211+
return -1;
212+
}
213+
return 0;
214+
}
215+
216+
static int out_offset = -1; /* initialized on the first pass of build_body() */
217+
static int emit_bpf_tail_call(struct jit_ctx *ctx)
218+
{
219+
/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
220+
const u8 r2 = bpf2a64[BPF_REG_2];
221+
const u8 r3 = bpf2a64[BPF_REG_3];
222+
223+
const u8 tmp = bpf2a64[TMP_REG_1];
224+
const u8 prg = bpf2a64[TMP_REG_2];
225+
const u8 tcc = bpf2a64[TCALL_CNT];
226+
const int idx0 = ctx->idx;
227+
#define cur_offset (ctx->idx - idx0)
228+
#define jmp_offset (out_offset - (cur_offset))
229+
size_t off;
230+
231+
/* if (index >= array->map.max_entries)
232+
* goto out;
233+
*/
234+
off = offsetof(struct bpf_array, map.max_entries);
235+
emit_a64_mov_i64(tmp, off, ctx);
236+
emit(A64_LDR32(tmp, r2, tmp), ctx);
237+
emit(A64_CMP(0, r3, tmp), ctx);
238+
emit(A64_B_(A64_COND_GE, jmp_offset), ctx);
239+
240+
/* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
241+
* goto out;
242+
* tail_call_cnt++;
243+
*/
244+
emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
245+
emit(A64_CMP(1, tcc, tmp), ctx);
246+
emit(A64_B_(A64_COND_GT, jmp_offset), ctx);
247+
emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
248+
249+
/* prog = array->ptrs[index];
250+
* if (prog == NULL)
251+
* goto out;
252+
*/
253+
off = offsetof(struct bpf_array, ptrs);
254+
emit_a64_mov_i64(tmp, off, ctx);
255+
emit(A64_LDR64(tmp, r2, tmp), ctx);
256+
emit(A64_LDR64(prg, tmp, r3), ctx);
257+
emit(A64_CBZ(1, prg, jmp_offset), ctx);
258+
259+
/* goto *(prog->bpf_func + prologue_size); */
260+
off = offsetof(struct bpf_prog, bpf_func);
261+
emit_a64_mov_i64(tmp, off, ctx);
262+
emit(A64_LDR64(tmp, prg, tmp), ctx);
263+
emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
264+
emit(A64_BR(tmp), ctx);
265+
266+
/* out: */
267+
if (out_offset == -1)
268+
out_offset = cur_offset;
269+
if (cur_offset != out_offset) {
270+
pr_err_once("tail_call out_offset = %d, expected %d!\n",
271+
cur_offset, out_offset);
272+
return -1;
273+
}
274+
return 0;
275+
#undef cur_offset
276+
#undef jmp_offset
198277
}
199278

200279
static void build_epilogue(struct jit_ctx *ctx)
@@ -499,13 +578,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
499578
const u64 func = (u64)__bpf_call_base + imm;
500579

501580
emit_a64_mov_i64(tmp, func, ctx);
502-
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
503-
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
504581
emit(A64_BLR(tmp), ctx);
505582
emit(A64_MOV(1, r0, A64_R(0)), ctx);
506-
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
507583
break;
508584
}
585+
/* tail call */
586+
case BPF_JMP | BPF_CALL | BPF_X:
587+
if (emit_bpf_tail_call(ctx))
588+
return -EFAULT;
589+
break;
509590
/* function return */
510591
case BPF_JMP | BPF_EXIT:
511592
/* Optimization: when last instruction is EXIT,
@@ -650,11 +731,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
650731
emit_a64_mov_i64(r3, size, ctx);
651732
emit(A64_SUB_I(1, r4, fp, STACK_SIZE), ctx);
652733
emit_a64_mov_i64(r5, (unsigned long)bpf_load_pointer, ctx);
653-
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
654-
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
655734
emit(A64_BLR(r5), ctx);
656735
emit(A64_MOV(1, r0, A64_R(0)), ctx);
657-
emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
658736

659737
jmp_offset = epilogue_offset(ctx);
660738
check_imm19(jmp_offset);
@@ -780,7 +858,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
780858
goto out_off;
781859
}
782860

783-
build_prologue(&ctx);
861+
if (build_prologue(&ctx)) {
862+
prog = orig_prog;
863+
goto out_off;
864+
}
784865

785866
ctx.epilogue_offset = ctx.idx;
786867
build_epilogue(&ctx);

include/linux/bpf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/workqueue.h>
1212
#include <linux/file.h>
1313
#include <linux/percpu.h>
14+
#include <linux/err.h>
1415

1516
struct bpf_map;
1617

0 commit comments

Comments
 (0)