Skip to content

Commit f412ac0

Browse files
Hugh DickinsLinus Torvalds
authored andcommitted
[PATCH] mm: fix rss and mmlist locking
A couple of oddities were guarded by page_table_lock, no longer properly guarded when that is split. The mm_counters of file_rss and anon_rss: make those an atomic_t, or an atomic64_t if the architecture supports it, in such a case. Definitions by courtesy of Christoph Lameter: who spent considerable effort on more scalable ways of counting, but found insufficient benefit in practice. And adding an mm with swap to the mmlist for swapoff: the list is well- guarded by its own lock, but the list_empty check now has to be repeated inside it. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 4c21e2f commit f412ac0

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

include/linux/sched.h

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -249,13 +249,47 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
249249
extern void arch_unmap_area(struct mm_struct *, unsigned long);
250250
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
251251

252+
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
253+
/*
254+
* The mm counters are not protected by its page_table_lock,
255+
* so must be incremented atomically.
256+
*/
257+
#ifdef ATOMIC64_INIT
258+
#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
259+
#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
260+
#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
261+
#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
262+
#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
263+
typedef atomic64_t mm_counter_t;
264+
#else /* !ATOMIC64_INIT */
265+
/*
266+
* The counters wrap back to 0 at 2^32 * PAGE_SIZE,
267+
* that is, at 16TB if using 4kB page size.
268+
*/
269+
#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
270+
#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
271+
#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
272+
#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
273+
#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
274+
typedef atomic_t mm_counter_t;
275+
#endif /* !ATOMIC64_INIT */
276+
277+
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
278+
/*
279+
* The mm counters are protected by its page_table_lock,
280+
* so can be incremented directly.
281+
*/
252282
#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
253283
#define get_mm_counter(mm, member) ((mm)->_##member)
254284
#define add_mm_counter(mm, member, value) (mm)->_##member += (value)
255285
#define inc_mm_counter(mm, member) (mm)->_##member++
256286
#define dec_mm_counter(mm, member) (mm)->_##member--
257-
#define get_mm_rss(mm) ((mm)->_file_rss + (mm)->_anon_rss)
287+
typedef unsigned long mm_counter_t;
288+
289+
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
258290

291+
#define get_mm_rss(mm) \
292+
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
259293
#define update_hiwater_rss(mm) do { \
260294
unsigned long _rss = get_mm_rss(mm); \
261295
if ((mm)->hiwater_rss < _rss) \
@@ -266,8 +300,6 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
266300
(mm)->hiwater_vm = (mm)->total_vm; \
267301
} while (0)
268302

269-
typedef unsigned long mm_counter_t;
270-
271303
struct mm_struct {
272304
struct vm_area_struct * mmap; /* list of VMAs */
273305
struct rb_root mm_rb;
@@ -291,7 +323,9 @@ struct mm_struct {
291323
* by mmlist_lock
292324
*/
293325

294-
/* Special counters protected by the page_table_lock */
326+
/* Special counters, in some configurations protected by the
327+
* page_table_lock, in other configurations by being atomic.
328+
*/
295329
mm_counter_t _file_rss;
296330
mm_counter_t _anon_rss;
297331

mm/memory.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
372372
/* make sure dst_mm is on swapoff's mmlist. */
373373
if (unlikely(list_empty(&dst_mm->mmlist))) {
374374
spin_lock(&mmlist_lock);
375-
list_add(&dst_mm->mmlist, &src_mm->mmlist);
375+
if (list_empty(&dst_mm->mmlist))
376+
list_add(&dst_mm->mmlist,
377+
&src_mm->mmlist);
376378
spin_unlock(&mmlist_lock);
377379
}
378380
}

mm/rmap.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
559559
swap_duplicate(entry);
560560
if (list_empty(&mm->mmlist)) {
561561
spin_lock(&mmlist_lock);
562-
list_add(&mm->mmlist, &init_mm.mmlist);
562+
if (list_empty(&mm->mmlist))
563+
list_add(&mm->mmlist, &init_mm.mmlist);
563564
spin_unlock(&mmlist_lock);
564565
}
565566
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));

0 commit comments

Comments
 (0)