This repository has been archived by the owner on Apr 13, 2023. It is now read-only.
forked from sched-ext/sched_ext
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sched_ext: Add scx_example_cgfifo BPF scheduler
Add scx_example_cgfifo BPF scheduler which uses a draft implementation of BPF rbtree to implement flattened hierarchical scheduling among cgroups and performs simple FIFO scheduling within each cgroup. One shortcoming of the flattened hierarchy implementation is that it can be tricked by thundering herd among cgroups. IOW, if there are many cgroups which are waking up at the same time, the subtree may consume more CPU time than configured. However, outside those situations, it can implement hierarchical weight based control at a very low overhead. The benchmark setup: * AMD ryzen 7 3800X 8-core machine w/ 32G memory * Running two instances of apache, both serving a bash CGI script which invokes date, df, grep once and then does sha1sum of a 287k file. * Another machine running wrk (https://github.com/wg/wrk) to generate load. `wrk -t16 -c60 -d30s http://TARGETIP/cgi-bin/example.sh` Results (RPS): 1. No CPU control, single apache instance. RUN1 RUN2 RUN3 AVG CFS: 3620.49 3683.45 3664.30 3656.08 CGFIFO: 3813.13 3738.71 3764.70 3772.18 (+3.2%) 2. CPU control w/ 4 level nesting, two apache instances with weights 200 and 100 RUN1 RUN2 RUN3 AVG CFS-100: 1570.99 1604.35 1555.19 1576.84 CFS-200: 1871.55 1892.61 1842.94 1869.03 CFS-SUM: 3442.54 3496.96 3398.13 3445.87 CGFIFO-100: 1430.37 1484.21 1386.95 1433.84 CGFIFO-200: 2381.34 2321.86 2407.51 2370.24 CGFIFO-SUM: 3811.71 3806.07 3794.46 3804.08 (+10.4%) NOT_SIGNED_OFF
- Loading branch information
Showing
5 changed files
with
1,451 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
# SPDX-License-Identifier: GPL-2.0 | ||
include ../build/Build.include | ||
include ../scripts/Makefile.arch | ||
include ../scripts/Makefile.include | ||
|
||
ifneq ($(LLVM),) | ||
ifneq ($(filter %/,$(LLVM)),) | ||
LLVM_PREFIX := $(LLVM) | ||
else ifneq ($(filter -%,$(LLVM)),) | ||
LLVM_SUFFIX := $(LLVM) | ||
endif | ||
|
||
CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi | ||
CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu | ||
CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl | ||
CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu | ||
CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu | ||
CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu | ||
CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu | ||
CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu | ||
CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu | ||
CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) | ||
|
||
ifeq ($(CROSS_COMPILE),) | ||
ifeq ($(CLANG_TARGET_FLAGS),) | ||
$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk | ||
else | ||
CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) | ||
endif # CLANG_TARGET_FLAGS | ||
else | ||
CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) | ||
endif # CROSS_COMPILE | ||
|
||
CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as | ||
else | ||
CC := $(CROSS_COMPILE)gcc | ||
endif # LLVM | ||
|
||
CURDIR := $(abspath .) | ||
TOOLSDIR := $(abspath ..) | ||
LIBDIR := $(TOOLSDIR)/lib | ||
BPFDIR := $(LIBDIR)/bpf | ||
TOOLSINCDIR := $(TOOLSDIR)/include | ||
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool | ||
APIDIR := $(TOOLSINCDIR)/uapi | ||
GENDIR := $(abspath ../../include/generated) | ||
GENHDR := $(GENDIR)/autoconf.h | ||
|
||
SCRATCH_DIR := $(CURDIR)/tools | ||
BUILD_DIR := $(SCRATCH_DIR)/build | ||
INCLUDE_DIR := $(SCRATCH_DIR)/include | ||
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a | ||
ifneq ($(CROSS_COMPILE),) | ||
HOST_BUILD_DIR := $(BUILD_DIR)/host | ||
HOST_SCRATCH_DIR := host-tools | ||
HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include | ||
else | ||
HOST_BUILD_DIR := $(BUILD_DIR) | ||
HOST_SCRATCH_DIR := $(SCRATCH_DIR) | ||
HOST_INCLUDE_DIR := $(INCLUDE_DIR) | ||
endif | ||
HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a | ||
RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids | ||
DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool | ||
|
||
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ | ||
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ | ||
../../vmlinux \ | ||
/sys/kernel/btf/vmlinux \ | ||
/boot/vmlinux-$(shell uname -r) | ||
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) | ||
ifeq ($(VMLINUX_BTF),) | ||
$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") | ||
endif | ||
|
||
BPFTOOL ?= $(DEFAULT_BPFTOOL) | ||
|
||
ifneq ($(wildcard $(GENHDR)),) | ||
GENFLAGS := -DHAVE_GENHDR | ||
endif | ||
|
||
CFLAGS += -g -O2 -rdynamic -Wall -Werror $(GENFLAGS) \ | ||
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ | ||
-I$(TOOLSINCDIR) -I$(APIDIR) | ||
|
||
# Silence some warnings when compiled with clang | ||
ifneq ($(LLVM),) | ||
CFLAGS += -Wno-unused-command-line-argument | ||
endif | ||
|
||
LDFLAGS = -lelf -lz | ||
|
||
IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ | ||
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') | ||
|
||
# Get Clang's default includes on this system, as opposed to those seen by | ||
# '-target bpf'. This fixes "missing" files on some architectures/distros, | ||
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. | ||
# | ||
# Use '-idirafter': Don't interfere with include mechanics except where the | ||
# build would have failed anyways. | ||
define get_sys_includes | ||
$(shell $(1) -v -E - </dev/null 2>&1 \ | ||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ | ||
$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') | ||
endef | ||
|
||
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ | ||
$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) \ | ||
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ | ||
-I../../include \ | ||
$(call get_sys_includes,$(CLANG)) \ | ||
-Wno-compare-distinct-pointer-types | ||
|
||
all: scx_example_cgfifo | ||
|
||
# sort removes libbpf duplicates when not cross-building | ||
MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \ | ||
$(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \ | ||
$(INCLUDE_DIR)) | ||
|
||
$(MAKE_DIRS): | ||
$(call msg,MKDIR,,$@) | ||
$(Q)mkdir -p $@ | ||
|
||
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ | ||
$(APIDIR)/linux/bpf.h \ | ||
| $(BUILD_DIR)/libbpf | ||
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ | ||
EXTRA_CFLAGS='-g -O0' \ | ||
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers | ||
|
||
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ | ||
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool | ||
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ | ||
ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ | ||
EXTRA_CFLAGS='-g -O0' \ | ||
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ | ||
LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ | ||
LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ | ||
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin | ||
|
||
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) | ||
ifeq ($(VMLINUX_H),) | ||
$(call msg,GEN,,$@) | ||
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ | ||
else | ||
$(call msg,CP,,$@) | ||
$(Q)cp "$(VMLINUX_H)" $@ | ||
endif | ||
|
||
%.bpf.o: %.bpf.c $(INCLUDE_DIR)/vmlinux.h scx_common.h \ | ||
| $(BPFOBJ) | ||
$(call msg,CLNG-BPF,,$@) | ||
$(Q)$(CLANG) $(BPF_CFLAGS) -O2 -target bpf -c $< -mcpu=v3 -o $@ | ||
|
||
%.skel.h: %.bpf.o $(BPFTOOL) | ||
$(call msg,GEN-SKEL,,$@) | ||
$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< | ||
$(Q)$(BPFTOOL) gen object $(<:.o=.linked2.o) $(<:.o=.linked1.o) | ||
$(Q)$(BPFTOOL) gen object $(<:.o=.linked3.o) $(<:.o=.linked2.o) | ||
$(Q)diff $(<:.o=.linked2.o) $(<:.o=.linked3.o) | ||
$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked3.o) name $(<:.bpf.o=) > $@ | ||
$(Q)$(BPFTOOL) gen subskeleton $(<:.o=.linked3.o) name $(<:.bpf.o=) > $(@:.skel.h=.subskel.h) | ||
|
||
scx_example_cgfifo.bpf.o: scx_example_cgfifo.h | ||
|
||
scx_example_cgfifo: scx_example_cgfifo.c scx_example_cgfifo.skel.h | ||
$(CC) $(CFLAGS) -c $< -o $@.o | ||
$(CC) -o $@ $@.o $(HOST_BPFOBJ) $(LDFLAGS) | ||
|
||
clean: | ||
rm -rf $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) | ||
rm -f *.o *.skel.h *.subskel.h | ||
rm -f scx_example_cgfifo | ||
|
||
.PHONY: all clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#ifndef __SCHED_EXT_COMMON_H | ||
#define __SCHED_EXT_COMMON_H | ||
|
||
#include "vmlinux.h" | ||
#include <bpf/bpf_helpers.h> | ||
#include <bpf/bpf_tracing.h> | ||
#include <bpf/bpf_core_read.h> | ||
#include <stdbool.h> | ||
#include <errno.h> | ||
|
||
#warning "remove the following once bpf can handle 64bit enums" | ||
#define SCX_SLICE_INF 0xffffffffffffffffLLU | ||
#define SCX_DQ_FLAG_BUILTIN (1LLU << 63) | ||
#define SCX_DQ_FLAG_LOCAL_ON (1LLU << 61) | ||
#define SCX_DQ_INVALID (SCX_DQ_FLAG_BUILTIN | 0) | ||
#define SCX_DQ_GLOBAL (SCX_DQ_FLAG_BUILTIN | 1) | ||
#define SCX_DQ_LOCAL (SCX_DQ_FLAG_BUILTIN | 2) | ||
#define SCX_DQ_LOCAL_ON (SCX_DQ_FLAG_BUILTIN | SCX_DQ_FLAG_LOCAL_ON) | ||
#define SCX_ENQ_PREEMPT (1LLU << 32) | ||
#define SCX_ENQ_REENQ (1LLU << 40) | ||
#define SCX_ENQ_LAST (1LLU << 41) | ||
#define SCX_ENQ_SCD_LOCAL (1LLU << 42) | ||
|
||
extern s32 scx_bpf_create_dq(u64 dq_id, s32 node) __ksym; | ||
extern s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags) __ksym; | ||
extern u32 scx_bpf_dispatch_nr_slots(void) __ksym; | ||
extern s32 scx_bpf_dispatch(struct task_struct *p, u64 dq_id, u64 enq_flags) __ksym; | ||
extern bool scx_bpf_consume(u64 dq_id) __ksym; | ||
extern void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; | ||
extern s32 scx_bpf_dq_nr_queued(u64 dq_id) __ksym; | ||
extern bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; | ||
extern s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed) __ksym; | ||
extern bool scx_bpf_has_idle_cpus(void) __ksym; | ||
extern s32 scx_bpf_destroy_dq(u64 dq_id) __ksym; | ||
extern bool scx_bpf_task_running(const struct task_struct *p) __ksym; | ||
extern s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; | ||
extern const struct cpumask *scx_bpf_task_cpumask(const struct task_struct *p) __ksym; | ||
extern struct cgroup *scx_bpf_task_cgroup(const struct task_struct *p) __ksym; | ||
extern struct task_struct *scx_bpf_find_task_by_pid(s32 pid) __ksym; | ||
extern void scx_bpf_reenqueue_local(void) __ksym; | ||
|
||
extern s32 scx_bpf_pick_idle_cpu_untyped(unsigned long cpus_allowed) __ksym; | ||
extern bool scx_bpf_has_idle_cpus_among(const struct cpumask *cpus_allowed) __ksym; | ||
extern bool scx_bpf_has_idle_cpus_among_untyped(unsigned long cpus_allowed) __ksym; | ||
extern s32 scx_bpf_cpumask_test_cpu(s32 cpu, const struct cpumask *cpumask) __ksym; | ||
extern s32 scx_bpf_cpumask_first(const struct cpumask *cpus_allowed) __ksym; | ||
extern s32 scx_bpf_cpumask_first_untyped(unsigned long cpus_allowed) __ksym; | ||
extern bool scx_bpf_cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p) __ksym; | ||
|
||
extern int extl_bpf_init(u32 le_data_size_req, u32 sq_data_size_req) __ksym; | ||
extern int extl_bpf_enable(void) __ksym; | ||
extern struct extl_sq *extl_bpf_create_sq(u64 id) __ksym; | ||
extern void extl_bpf_set_task_sq(struct task_struct *p, struct extl_sq *sq) __ksym; | ||
extern struct extl_sq *extl_bpf_task_sq(struct task_struct *p) __ksym; | ||
extern struct extl_sq *extl_bpf_find_sq(u64 id) __ksym; | ||
extern void extl_bpf_sq_lock(struct extl_sq *sq) __ksym; | ||
extern void extl_bpf_sq_lock_by_task(struct task_struct *p) __ksym; | ||
extern void extl_bpf_sq_unlock(void) __ksym; | ||
extern void extl_bpf_sq_lock_double(struct extl_sq *sq0, struct extl_sq *sq1) __ksym; | ||
extern void extl_bpf_sq_lock_double_by_task(struct task_struct *p, struct extl_sq *sq) __ksym; | ||
extern void extl_bpf_sq_unlock_double(void) __ksym; | ||
extern void extl_bpf_enqueue_task(struct task_struct *p, u64 key) __ksym; | ||
extern bool extl_bpf_dequeue_task(struct task_struct *p) __ksym; | ||
extern void extl_bpf_dispatch_dequeue(struct task_struct *p) __ksym; | ||
extern struct task_struct *extl_bpf_sq_first_task(struct extl_sq *sq) __ksym; | ||
|
||
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ | ||
#define PF_EXITING 0x00000004 | ||
#define CLOCK_MONOTONIC 1 | ||
|
||
#define BPF_STRUCT_OPS(name, args...) \ | ||
SEC("struct_ops/"#name) \ | ||
BPF_PROG(name, ##args) | ||
|
||
/** | ||
* MEMBER_VPTR - Obtain the verified pointer to a struct or array member | ||
* @base: struct or array to index | ||
* @member: dereferenced member (e.g. ->field, [idx0][idx1], ...) | ||
* | ||
* The verifier often gets confused by the instruction sequence the compiler | ||
* generates for indexing struct fields or arrays. This macro forces the | ||
* compiler to generate a code sequence which first calculates the byte offset, | ||
* checks it against the struct or array size and add that byte offset to | ||
* generate the pointer to the member to help the verifier. | ||
* | ||
* Ideally, we want to abort if the calculated offset is out-of-bounds. However, | ||
* BPF currently doesn't support abort, so evaluate to NULL instead. The caller | ||
* must check for NULL and take appropriate action to appease the verifier. To | ||
* avoid confusing the verifier, it's best to check for NULL and dereference | ||
* immediately. | ||
* | ||
* vptr = MEMBER_VPTR(my_array, [i][j]); | ||
* if (!vptr) | ||
* return error; | ||
* *vptr = new_value; | ||
*/ | ||
#define MEMBER_VPTR(base, member) (typeof(base member) *)({ \ | ||
u64 __base = (u64)base; \ | ||
u64 __addr = (u64)&(base member) - __base; \ | ||
asm volatile ( \ | ||
"if %0 <= %[max] goto +2\n" \ | ||
"%0 = 0\n" \ | ||
"goto +1\n" \ | ||
"%0 += %1\n" \ | ||
: "+r"(__addr) \ | ||
: "r"(__base), \ | ||
[max]"i"(sizeof(base) - sizeof(base member))); \ | ||
__addr; \ | ||
}) | ||
|
||
#endif /* __SCHED_EXT_COMMON_H */ |
Oops, something went wrong.