Skip to content

Commit

Permalink
cgroup_timer_slack by Kirill A. Shutemov, v10
Browse files Browse the repository at this point in the history
Every task_struct has timer_slack_ns value. This value uses to round up
poll() and select() timeout values. This feature can be useful in
mobile environment where combined wakeups are desired.

Originally, prctl() was the only way to change timer slack value of
a process. So you was not able change timer slack value of another
process.

cgroup subsys "timer_slack" implements timer slack controller. It
provides a way to set minimal timer slack value for a group of tasks.
If a task belongs to a cgroup with minimal timer slack value higher than
task's value, cgroup's value will be applied.

Timer slack controller allows to implement setting timer slack value of
a process based on a policy. For example, you can create foreground and
background cgroups and move tasks between them based on system state.

Idea-by: Jacob Pan <jacob.jun.pan <at> linux.intel.com>
Signed-off-by: Kirill A. Shutemov <kirill <at> shutemov.name>

task_get_effective_timer_slack() returns timer slack value to be used
to configure per-task timers. It can be equal or higher than task's
timer slack value.

For now task_get_effective_timer_slack() returns timer_slack_ns of the
task. Timer slack cgroup controller will implement a bit more
sophisticated logic.

Signed-off-by: Kirill A. Shutemov <kirill <at> shutemov.name>

PR_GET_EFFECTIVE_TIMERSLACK allows process to know its effective timer
slack value.

Signed-off-by: Kirill A. Shutemov <kirill <at> shutemov.name>
  • Loading branch information
gokhanmoral committed Aug 19, 2012
1 parent 89678d3 commit 2890c32
Show file tree
Hide file tree
Showing 12 changed files with 241 additions and 8 deletions.
72 changes: 72 additions & 0 deletions Documentation/cgroups/timer_slack.txt
@@ -0,0 +1,72 @@
Timer Slack Controller
======================

Overview
--------

Every task_struct has timer_slack_ns value. This value is used to round
up poll() and select() timeout values. This feature can be useful in
mobile environment where combined wakeups are desired.

Originally, prctl() was the only way to change timer slack value of
a process. So you was not able change timer slack value of another
process.

cgroup subsys "timer_slack" implements timer slack controller. It
provides a way to set minimal timer slack value for a group of tasks.
If a task belongs to a cgroup with minimal timer slack value higher than
task's value, cgroup's value will be applied.

Timer slack controller allows to implement setting timer slack value of
a process based on a policy. For example, you can create foreground and
background cgroups and move tasks between them based on system state.

User interface
--------------

To get timer slack controller functionality you need to enable it in
kernel configuration:

CONFIG_CGROUP_TIMER_SLACK=y

The controller provides two files:

# mount -t cgroup -o timer_slack none /sys/fs/cgroup
# ls /sys/fs/cgroup/timer_slack.*
/sys/fs/cgroup/timer_slack.effective_slack_ns
/sys/fs/cgroup/timer_slack.min_slack_ns

By default timer_slack.min_slack_ns is 0:

# cat /sys/fs/cgroup/timer_slack.min_slack_ns
0

You can set it to some value:

# echo 50000 > /sys/fs/cgroup/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/timer_slack.min_slack_ns
50000

Tasks still can set task's value below 50000 using prctl(), but in this
case cgroup's value will be applied.

Timer slack controller supports hierarchical groups.

# mkdir /sys/fs/cgroup/a
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
50000
# echo 70000 > /sys/fs/cgroup/a/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
70000

You can set any value you want, but effective value will the highest value
up by hierarchy. You can see effective timer slack value for the cgroup from
timer_slack.effective_slack_ns file:

# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns
70000
# echo 100000 > /sys/fs/cgroup/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
70000
# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns
100000
7 changes: 2 additions & 5 deletions fs/select.c
Expand Up @@ -69,7 +69,6 @@ static long __estimate_accuracy(struct timespec *tv)

long select_estimate_accuracy(struct timespec *tv)
{
unsigned long ret;
struct timespec now;

/*
Expand All @@ -81,10 +80,8 @@ long select_estimate_accuracy(struct timespec *tv)

ktime_get_ts(&now);
now = timespec_sub(*tv, now);
ret = __estimate_accuracy(&now);
if (ret < current->timer_slack_ns)
return current->timer_slack_ns;
return ret;
return min_t(long, __estimate_accuracy(&now),
task_get_effective_timer_slack(current));
}


Expand Down
7 changes: 7 additions & 0 deletions include/linux/cgroup_subsys.h
Expand Up @@ -64,3 +64,10 @@ SUBSYS(perf)
#endif

/* */

#ifdef CONFIG_CGROUP_TIMER_SLACK
SUBSYS(timer_slack)
#endif

/* */

6 changes: 6 additions & 0 deletions include/linux/prctl.h
Expand Up @@ -102,4 +102,10 @@

#define PR_MCE_KILL_GET 34

/*
* Get effective timerslack value for the process.
* It can be higher than PR_GET_TIMERSLACK.
*/
#define PR_GET_EFFECTIVE_TIMERSLACK 35

#endif /* _LINUX_PRCTL_H */
10 changes: 10 additions & 0 deletions include/linux/sched.h
Expand Up @@ -2701,6 +2701,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}

#ifdef CONFIG_CGROUP_TIMER_SLACK
extern unsigned long task_get_effective_timer_slack(struct task_struct *tsk);
#else
static inline unsigned long task_get_effective_timer_slack(
struct task_struct *tsk)
{
return tsk->timer_slack_ns;
}
#endif

#endif /* __KERNEL__ */

#endif
8 changes: 8 additions & 0 deletions init/Kconfig
Expand Up @@ -603,6 +603,14 @@ config CGROUP_FREEZER
Provides a way to freeze and unfreeze all tasks in a
cgroup.

config CGROUP_TIMER_SLACK
bool "Timer slack cgroup controller"
help
Provides a way to set minimal timer slack value for tasks in
a cgroup.
It's useful in mobile devices where certain background apps
are attached to a cgroup and combined wakeups are desired.

config CGROUP_DEVICE
bool "Device controller for cgroups"
help
Expand Down
1 change: 1 addition & 0 deletions kernel/Makefile
Expand Up @@ -61,6 +61,7 @@ obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_TIMER_SLACK) += cgroup_timer_slack.o
obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
Expand Down
125 changes: 125 additions & 0 deletions kernel/cgroup_timer_slack.c
@@ -0,0 +1,125 @@
/*
* cgroup_timer_slack.c - control group timer slack subsystem
*
* Copyright Nokia Corparation, 2011
* Author: Kirill A. Shutemov
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/cgroup.h>
#include <linux/init_task.h>
#include <linux/slab.h>

struct cgroup_subsys timer_slack_subsys;
struct tslack_cgroup {
struct cgroup_subsys_state css;
unsigned long min_slack_ns;
};

static struct tslack_cgroup *cgroup_to_tslack(struct cgroup *cgroup)
{
struct cgroup_subsys_state *css;

css = cgroup_subsys_state(cgroup, timer_slack_subsys.subsys_id);
return container_of(css, struct tslack_cgroup, css);
}

static struct cgroup_subsys_state *tslack_create(struct cgroup_subsys *subsys,
struct cgroup *cgroup)
{
struct tslack_cgroup *tslack_cgroup;

tslack_cgroup = kmalloc(sizeof(*tslack_cgroup), GFP_KERNEL);
if (!tslack_cgroup)
return ERR_PTR(-ENOMEM);

if (cgroup->parent) {
struct tslack_cgroup *parent;

parent = cgroup_to_tslack(cgroup->parent);
tslack_cgroup->min_slack_ns = parent->min_slack_ns;
} else
tslack_cgroup->min_slack_ns = 0UL;

return &tslack_cgroup->css;
}

static void tslack_destroy(struct cgroup_subsys *tslack_cgroup,
struct cgroup *cgroup)
{
kfree(cgroup_to_tslack(cgroup));
}

static u64 tslack_read_min(struct cgroup *cgroup, struct cftype *cft)
{
return cgroup_to_tslack(cgroup)->min_slack_ns;
}

static int tslack_write_min(struct cgroup *cgroup, struct cftype *cft, u64 val)
{
if (val > ULONG_MAX)
return -EINVAL;

cgroup_to_tslack(cgroup)->min_slack_ns = val;

return 0;
}

static u64 tslack_read_effective(struct cgroup *cgroup, struct cftype *cft)
{
unsigned long min;

min = cgroup_to_tslack(cgroup)->min_slack_ns;
while (cgroup->parent) {
cgroup = cgroup->parent;
min = max(cgroup_to_tslack(cgroup)->min_slack_ns, min);
}

return min;
}

static struct cftype files[] = {
{
.name = "min_slack_ns",
.read_u64 = tslack_read_min,
.write_u64 = tslack_write_min,
},
{
.name = "effective_slack_ns",
.read_u64 = tslack_read_effective,
},
};

static int tslack_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
return cgroup_add_files(cgroup, subsys, files, ARRAY_SIZE(files));
}

struct cgroup_subsys timer_slack_subsys = {
.name = "timer_slack",
.subsys_id = timer_slack_subsys_id,
.create = tslack_create,
.destroy = tslack_destroy,
.populate = tslack_populate,
};

unsigned long task_get_effective_timer_slack(struct task_struct *tsk)
{
struct cgroup *cgroup;
unsigned long slack;

rcu_read_lock();
cgroup = task_cgroup(tsk, timer_slack_subsys.subsys_id);
slack = tslack_read_effective(cgroup, NULL);
rcu_read_unlock();

return max(tsk->timer_slack_ns, slack);
}
4 changes: 4 additions & 0 deletions kernel/fork.c
Expand Up @@ -1167,6 +1167,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif

/*
* Save current task's (not effective) timer slack value as default
* timer slack value for new task.
*/
p->default_timer_slack_ns = current->timer_slack_ns;

task_io_accounting_init(&p->ioac);
Expand Down
4 changes: 2 additions & 2 deletions kernel/futex.c
Expand Up @@ -1875,7 +1875,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
task_get_effective_timer_slack(current));
}

retry:
Expand Down Expand Up @@ -2269,7 +2269,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
task_get_effective_timer_slack(current));
}

/*
Expand Down
2 changes: 1 addition & 1 deletion kernel/hrtimer.c
Expand Up @@ -1564,7 +1564,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
int ret = 0;
unsigned long slack;

slack = current->timer_slack_ns;
slack = task_get_effective_timer_slack(current);
if (rt_task(current))
slack = 0;

Expand Down
3 changes: 3 additions & 0 deletions kernel/sys.c
Expand Up @@ -1761,6 +1761,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;
case PR_GET_EFFECTIVE_TIMERSLACK:
error = task_get_effective_timer_slack(current);
break;
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
Expand Down

0 comments on commit 2890c32

Please sign in to comment.