Skip to content
Permalink
Browse files

MuQSS version 0.104

  • Loading branch information...
ckolivas committed Oct 9, 2016
1 parent 523d939 commit 1e3f40f5448c4a7a4257f0908f4620b3de679472

Large diffs are not rendered by default.

@@ -0,0 +1,7 @@
MuQSS - The Multiple Queue Skiplist Scheduler by Con Kolivas.

See sched-BFS.txt for basic design; MuQSS is a per-cpu runqueue variant with
one 8 level skiplist per runqueue, and fine grained locking for much more
scalability.

Con Kolivas <kernel@kolivas.org> Sun, 2nd October 2016
@@ -39,6 +39,7 @@ show up in /proc/sys/kernel:
- hung_task_timeout_secs
- hung_task_warnings
- kexec_load_disabled
- iso_cpu
- kptr_restrict
- kstack_depth_to_print [ X86 only ]
- l2cr [ PPC only ]
@@ -72,6 +73,7 @@ show up in /proc/sys/kernel:
- randomize_va_space
- real-root-dev ==> Documentation/initrd.txt
- reboot-cmd [ SPARC only ]
- rr_interval
- rtsig-max
- rtsig-nr
- sem
@@ -401,6 +403,16 @@ kernel stack.

==============================================================

iso_cpu: (MuQSS CPU scheduler only).

This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
run effectively at realtime priority, averaged over a rolling five
seconds over the -whole- system, meaning all cpus.

Set to 70 (percent) by default.

==============================================================

l2cr: (PPC only)

This flag controls the L2 cache of G3 processor boards. If
@@ -792,6 +804,20 @@ rebooting. ???

==============================================================

rr_interval: (MuQSS CPU scheduler only)

This is the smallest duration that any cpu process scheduling unit
will run for. Increasing this value can increase throughput of cpu
bound tasks substantially but at the expense of increased latencies
overall. Conversely decreasing it will decrease average and maximum
latencies but at the expense of throughput. This value is in
milliseconds and the default value chosen depends on the number of
cpus available at scheduler initialisation with a minimum of 6.

Valid values are from 1-1000.

==============================================================

rtsig-max & rtsig-nr:

The file rtsig-max can be used to tune the maximum number
@@ -63,11 +63,6 @@ static struct task_struct *spusched_task;
static struct timer_list spusched_timer;
static struct timer_list spuloadavg_timer;

/*
* Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
*/
#define NORMAL_PRIO 120

/*
* Frequency of the spu scheduler tick. By default we do one SPU scheduler
* tick for every 10 CPU scheduler ticks.
@@ -916,10 +916,26 @@ config SCHED_SMT
depends on SMP
---help---
SMT scheduler support improves the CPU scheduler's decision making
when dealing with Intel Pentium 4 chips with HyperThreading at a
when dealing with Intel P4/Core 2 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.

config SMT_NICE
bool "SMT (Hyperthreading) aware nice priority and policy support"
depends on SCHED_MUQSS && SCHED_SMT
default y
---help---
Enabling Hyperthreading on Intel CPUs decreases the effectiveness
of the use of 'nice' levels and different scheduling policies
(e.g. realtime) due to sharing of CPU power between hyperthreads.
SMT nice support makes each logical CPU aware of what is running on
its hyperthread siblings, maintaining appropriate distribution of
CPU according to nice levels and scheduling policies at the expense
of slightly increased overhead.

If unsure say Y here.


config SCHED_MC
def_bool y
prompt "Multi-core scheduler support"
@@ -2001,7 +2017,7 @@ config HOTPLUG_CPU
config BOOTPARAM_HOTPLUG_CPU0
bool "Set default setting of cpu0_hotpluggable"
default n
depends on HOTPLUG_CPU
depends on HOTPLUG_CPU && !SCHED_MUQSS
---help---
Set whether default state of cpu0_hotpluggable is on or off.

@@ -2030,7 +2046,7 @@ config BOOTPARAM_HOTPLUG_CPU0
config DEBUG_HOTPLUG_CPU0
def_bool n
prompt "Debug CPU0 hotplug"
depends on HOTPLUG_CPU
depends on HOTPLUG_CPU && !SCHED_MUQSS
---help---
Enabling this option offlines CPU0 (if CPU0 can be offlined) as
soon as possible and boots up userspace with CPU0 offlined. User
@@ -31,8 +31,8 @@ struct cs_dbs_tuners {
};

/* Conservative governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_DOWN_THRESHOLD (20)
#define DEF_FREQUENCY_UP_THRESHOLD (63)
#define DEF_FREQUENCY_DOWN_THRESHOLD (26)
#define DEF_FREQUENCY_STEP (5)
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (10)
@@ -20,7 +20,7 @@
#include "cpufreq_ondemand.h"

/* On-demand governor macros */
#define DEF_FREQUENCY_UP_THRESHOLD (80)
#define DEF_FREQUENCY_UP_THRESHOLD (63)
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
#define MICRO_FREQUENCY_UP_THRESHOLD (95)
@@ -134,7 +134,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
}

/*
* Every sampling_rate, we check, if current idle time is less than 20%
* Every sampling_rate, we check, if current idle time is less than 37%
* (default), then we try to increase frequency. Else, we adjust the frequency
* proportional to load.
*/
@@ -505,7 +505,7 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "0 0 0\n");
else
seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)tsk_seruntime(task),
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);

@@ -157,8 +157,6 @@ extern struct task_group root_task_group;
# define INIT_VTIME(tsk)
#endif

#define INIT_TASK_COMM "swapper"

#ifdef CONFIG_RT_MUTEXES
# define INIT_RT_MUTEXES(tsk) \
.pi_waiters = RB_ROOT, \
@@ -187,6 +185,77 @@ extern struct task_group root_task_group;
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
*/
#ifdef CONFIG_SCHED_MUQSS
#define INIT_TASK_COMM "MuQSS"
#define INIT_TASK(tsk) \
{ \
.state = 0, \
.stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
.flags = PF_KTHREAD, \
.prio = NORMAL_PRIO, \
.static_prio = MAX_PRIO-20, \
.normal_prio = NORMAL_PRIO, \
.deadline = 0, \
.policy = SCHED_NORMAL, \
.cpus_allowed = CPU_MASK_ALL, \
.mm = NULL, \
.active_mm = &init_mm, \
.restart_block = { \
.fn = do_no_restart_syscall, \
}, \
.time_slice = 1000000, \
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
.parent = &tsk, \
.children = LIST_HEAD_INIT(tsk.children), \
.sibling = LIST_HEAD_INIT(tsk.sibling), \
.group_leader = &tsk, \
RCU_POINTER_INITIALIZER(real_cred, &init_cred), \
RCU_POINTER_INITIALIZER(cred, &init_cred), \
.comm = INIT_TASK_COMM, \
.thread = INIT_THREAD, \
.fs = &init_fs, \
.files = &init_files, \
.signal = &init_signals, \
.sighand = &init_sighand, \
.nsproxy = &init_nsproxy, \
.pending = { \
.list = LIST_HEAD_INIT(tsk.pending.list), \
.signal = {{0}}}, \
.blocked = {{0}}, \
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
.pids = { \
[PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
[PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), \
}, \
.thread_group = LIST_HEAD_INIT(tsk.thread_group), \
.thread_node = LIST_HEAD_INIT(init_signals.thread_head), \
INIT_IDS \
INIT_PERF_EVENTS(tsk) \
INIT_TRACE_IRQFLAGS \
INIT_LOCKDEP \
INIT_FTRACE_GRAPH \
INIT_TRACE_RECURSION \
INIT_TASK_RCU_PREEMPT(tsk) \
INIT_TASK_RCU_TASKS(tsk) \
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
INIT_PREV_CPUTIME(tsk) \
INIT_VTIME(tsk) \
INIT_NUMA_BALANCING(tsk) \
INIT_KASAN(tsk) \
}
#else /* CONFIG_SCHED_MUQSS */
#define INIT_TASK_COMM "swapper"
#define INIT_TASK(tsk) \
{ \
.state = 0, \
@@ -261,7 +330,7 @@ extern struct task_group root_task_group;
INIT_NUMA_BALANCING(tsk) \
INIT_KASAN(tsk) \
}

#endif /* CONFIG_SCHED_MUQSS */

#define INIT_CPU_TIMERS(cpu_timers) \
{ \
@@ -52,6 +52,8 @@ enum {
*/
static inline int task_nice_ioprio(struct task_struct *task)
{
if (iso_task(task))
return 0;
return (task_nice(task) + 20) / 5;
}

@@ -164,7 +164,7 @@ static inline u64 get_jiffies_64(void)
* Have the 32 bit jiffies value wrap 5 minutes after boot
* so jiffies wrap bugs show up earlier.
*/
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-10*HZ))

/*
* Change timeval to jiffies, trying to avoid the

0 comments on commit 1e3f40f

Please sign in to comment.
You can’t perform that action at this time.