Skip to content

Commit

Permalink
mm: oom_kill: reap memory of a task that receives SIGKILL
Browse files Browse the repository at this point in the history
Free the pages parallely for a task that receives SIGKILL using the
oom_reaper. This freeing of pages will help to give the pages to buddy
system well advance.
This reaps for the process which received SIGKILL through
either sys_kill from user or kill_pid from kernel and that sending
process has CAP_KILL capability.
Also sysctl interface, reap_mem_on_sigkill, is added to turn on/off this
feature.

Change-Id: I21adb95de5e380a80d7eb0b87d9b5b553f52e28a
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
(cherry picked from commit f9920cf)
(cherry picked from commit 2d164d5fe2bef69c444fe0c2be444853903d8aff)
  • Loading branch information
Charan Teja Reddy authored and freak07 committed Apr 6, 2019
1 parent 44248a5 commit 2221b4d
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 19 deletions.
19 changes: 19 additions & 0 deletions Documentation/sysctl/vm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Currently, these files are in /proc/sys/vm:
- nr_trim_pages (only if CONFIG_MMU=n)
- numa_zonelist_order
- oom_dump_tasks
- reap_mem_on_sigkill
- oom_kill_allocating_task
- overcommit_kbytes
- overcommit_memory
Expand Down Expand Up @@ -635,6 +636,24 @@ The default value is 1 (enabled).

==============================================================

reap_mem_on_sigkill

This enables or disables the memory reaping for a SIGKILL received
process and that the sending process must have the CAP_KILL capabilities.

If this is set to 1, when a process receives SIGKILL from a process
that has the capability, CAP_KILL, the process is added into the oom_reaper
queue which can be picked up by the oom_reaper thread to reap the memory of
that process. This reaps for the process which received SIGKILL through
either sys_kill from user or kill_pid from kernel.

If this is set to 0, we are not reaping memory of a SIGKILL, sent through
either sys_kill from user or kill_pid from kernel, received process.

The default value is 0 (disabled).

==============================================================

oom_kill_allocating_task

This enables or disables killing the OOM-triggering task in
Expand Down
4 changes: 4 additions & 0 deletions include/linux/oom.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,8 @@ extern void wake_oom_reaper(struct task_struct *tsk);
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
extern int sysctl_panic_on_oom;
extern int sysctl_reap_mem_on_sigkill;

/* calls for LMK reaper */
extern void add_to_oom_reaper(struct task_struct *p);
#endif /* _INCLUDE_LINUX_OOM_H */
7 changes: 6 additions & 1 deletion kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
#include <linux/compat.h>
#include <linux/cn_proc.h>
#include <linux/compiler.h>
#include <linux/oom.h>
#include <linux/capability.h>

#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
Expand Down Expand Up @@ -1311,8 +1313,11 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
ret = check_kill_permission(sig, info, p);
rcu_read_unlock();

if (!ret && sig)
if (!ret && sig) {
ret = do_send_sig_info(sig, info, p, true);
if (capable(CAP_KILL) && sig == SIGKILL)
add_to_oom_reaper(p);
}

return ret;
}
Expand Down
7 changes: 7 additions & 0 deletions kernel/sysctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,13 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "reap_mem_on_sigkill",
.data = &sysctl_reap_mem_on_sigkill,
.maxlen = sizeof(sysctl_reap_mem_on_sigkill),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "overcommit_ratio",
.data = &sysctl_overcommit_ratio,
Expand Down
56 changes: 38 additions & 18 deletions mm/oom_kill.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
int sysctl_reap_mem_on_sigkill;

DEFINE_MUTEX(oom_lock);

Expand Down Expand Up @@ -614,12 +615,15 @@ void wake_oom_reaper(struct task_struct *tsk)
if (!oom_reaper_th)
return;

/* move the lock here to avoid scenario of queuing
* the same task by both OOM killer and LMK.
/*
* Move the lock here to avoid scenario of queuing
* the same task by both OOM killer and any other SIGKILL
* path.
*/
spin_lock(&oom_reaper_lock);
/* mm is already queued? */
if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) {

/* tsk is already queued? */
if (tsk == oom_reaper_list || tsk->oom_reaper_list) {
spin_unlock(&oom_reaper_lock);
return;
}
Expand Down Expand Up @@ -650,6 +654,16 @@ static inline void wake_oom_reaper(struct task_struct *tsk)
}
#endif /* CONFIG_MMU */

static void __mark_oom_victim(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;

if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
atomic_inc(&tsk->signal->oom_mm->mm_count);
set_bit(MMF_OOM_VICTIM, &mm->flags);
}
}

/**
* mark_oom_victim - mark the given task as OOM victim
* @tsk: task to mark
Expand All @@ -662,18 +676,13 @@ static inline void wake_oom_reaper(struct task_struct *tsk)
*/
static void mark_oom_victim(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;

WARN_ON(oom_killer_disabled);
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;

/* oom_mm is bound to the signal struct life time. */
if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
atomic_inc(&tsk->signal->oom_mm->mm_count);
set_bit(MMF_OOM_VICTIM, &mm->flags);
}
__mark_oom_victim(tsk);

/*
* Make sure that the task is woken up from uninterruptible sleep
Expand Down Expand Up @@ -858,13 +867,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* still freeing memory.
*/
read_lock(&tasklist_lock);

/*
* The task 'p' might have already exited before reaching here. The
* put_task_struct() will free task_struct 'p' while the loop still try
* to access the field of 'p', so, get an extra reference.
*/
get_task_struct(p);
for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points;
Expand All @@ -884,7 +886,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
}
}
}
put_task_struct(p);
read_unlock(&tasklist_lock);

p = find_lock_task_mm(victim);
Expand Down Expand Up @@ -1097,3 +1098,22 @@ void pagefault_out_of_memory(void)
out_of_memory(&oc);
mutex_unlock(&oom_lock);
}

void add_to_oom_reaper(struct task_struct *p)
__releases(p->alloc_lock)
{
if (!sysctl_reap_mem_on_sigkill)
return;

p = find_lock_task_mm(p);
if (!p)
return;

get_task_struct(p);
if (task_will_free_mem(p)) {
__mark_oom_victim(p);
wake_oom_reaper(p);
}
task_unlock(p);
put_task_struct(p);
}

0 comments on commit 2221b4d

Please sign in to comment.