Skip to content

Commit 286e04b

Browse files
rgushchintorvalds
authored andcommitted
mm: memcg/slab: allocate obj_cgroups for non-root slab pages
Allocate and release memory to store obj_cgroup pointers for each non-root slab page. Reuse page->mem_cgroup pointer to store a pointer to the allocated space. This commit temporarily increases the memory footprint of the kernel memory accounting. To store obj_cgroup pointers we'll need a place for an objcg_pointer for each allocated object. However, the following patches in the series will enable sharing of slab pages between memory cgroups, which will dramatically increase the total slab utilization. And the final memory footprint will be significantly smaller than before. To distinguish between obj_cgroups and memcg pointers in case when it's not obvious which one is used (as in page_cgroup_ino()), let's always set the lowest bit in the obj_cgroup case. The original obj_cgroups pointer is marked to be ignored by kmemleak, which otherwise would report a memory leak for each allocated vector. Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Christoph Lameter <cl@linux.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Tejun Heo <tj@kernel.org> Link: http://lkml.kernel.org/r/20200623174037.3951353-8-guro@fb.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent bf4f059 commit 286e04b

File tree

5 files changed

+81
-4
lines changed

5 files changed

+81
-4
lines changed

include/linux/mm_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,10 @@ struct page {
198198
atomic_t _refcount;
199199

200200
#ifdef CONFIG_MEMCG
201-
struct mem_cgroup *mem_cgroup;
201+
union {
202+
struct mem_cgroup *mem_cgroup;
203+
struct obj_cgroup **obj_cgroups;
204+
};
202205
#endif
203206

204207
/*

include/linux/slab_def.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,10 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
114114
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
115115
}
116116

117+
static inline int objs_per_slab_page(const struct kmem_cache *cache,
118+
const struct page *page)
119+
{
120+
return cache->num;
121+
}
122+
117123
#endif /* _LINUX_SLAB_DEF_H */

include/linux/slub_def.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,4 +198,9 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
198198
return __obj_to_index(cache, page_address(page), obj);
199199
}
200200

201+
static inline int objs_per_slab_page(const struct kmem_cache *cache,
202+
const struct page *page)
203+
{
204+
return page->objects;
205+
}
201206
#endif /* _LINUX_SLUB_DEF_H */

mm/memcontrol.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,21 @@ ino_t page_cgroup_ino(struct page *page)
569569
unsigned long ino = 0;
570570

571571
rcu_read_lock();
572-
if (PageSlab(page) && !PageTail(page))
572+
if (PageSlab(page) && !PageTail(page)) {
573573
memcg = memcg_from_slab_page(page);
574-
else
575-
memcg = READ_ONCE(page->mem_cgroup);
574+
} else {
575+
memcg = page->mem_cgroup;
576+
577+
/*
578+
* The lowest bit set means that memcg isn't a valid
579+
* memcg pointer, but a obj_cgroups pointer.
580+
* In this case the page is shared and doesn't belong
581+
* to any specific memory cgroup.
582+
*/
583+
if ((unsigned long) memcg & 0x1UL)
584+
memcg = NULL;
585+
}
586+
576587
while (memcg && !(memcg->css.flags & CSS_ONLINE))
577588
memcg = parent_mem_cgroup(memcg);
578589
if (memcg)

mm/slab.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ struct memcg_cache_params {
109109
#include <linux/kmemleak.h>
110110
#include <linux/random.h>
111111
#include <linux/sched/mm.h>
112+
#include <linux/kmemleak.h>
112113

113114
/*
114115
* State of the slab allocator.
@@ -348,6 +349,18 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
348349
return s->memcg_params.root_cache;
349350
}
350351

352+
static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
353+
{
354+
/*
355+
* page->mem_cgroup and page->obj_cgroups are sharing the same
356+
* space. To distinguish between them in case we don't know for sure
357+
* that the page is a slab page (e.g. page_cgroup_ino()), let's
358+
* always set the lowest bit of obj_cgroups.
359+
*/
360+
return (struct obj_cgroup **)
361+
((unsigned long)page->obj_cgroups & ~0x1UL);
362+
}
363+
351364
/*
352365
* Expects a pointer to a slab page. Please note, that PageSlab() check
353366
* isn't sufficient, as it returns true also for tail compound slab pages,
@@ -435,6 +448,28 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
435448
percpu_ref_put_many(&s->memcg_params.refcnt, nr_pages);
436449
}
437450

451+
static inline int memcg_alloc_page_obj_cgroups(struct page *page,
452+
struct kmem_cache *s, gfp_t gfp)
453+
{
454+
unsigned int objects = objs_per_slab_page(s, page);
455+
void *vec;
456+
457+
vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
458+
page_to_nid(page));
459+
if (!vec)
460+
return -ENOMEM;
461+
462+
kmemleak_not_leak(vec);
463+
page->obj_cgroups = (struct obj_cgroup **) ((unsigned long)vec | 0x1UL);
464+
return 0;
465+
}
466+
467+
static inline void memcg_free_page_obj_cgroups(struct page *page)
468+
{
469+
kfree(page_obj_cgroups(page));
470+
page->obj_cgroups = NULL;
471+
}
472+
438473
extern void slab_init_memcg_params(struct kmem_cache *);
439474
extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg);
440475

@@ -484,6 +519,16 @@ static inline void memcg_uncharge_slab(struct page *page, int order,
484519
{
485520
}
486521

522+
static inline int memcg_alloc_page_obj_cgroups(struct page *page,
523+
struct kmem_cache *s, gfp_t gfp)
524+
{
525+
return 0;
526+
}
527+
528+
static inline void memcg_free_page_obj_cgroups(struct page *page)
529+
{
530+
}
531+
487532
static inline void slab_init_memcg_params(struct kmem_cache *s)
488533
{
489534
}
@@ -510,12 +555,18 @@ static __always_inline int charge_slab_page(struct page *page,
510555
gfp_t gfp, int order,
511556
struct kmem_cache *s)
512557
{
558+
int ret;
559+
513560
if (is_root_cache(s)) {
514561
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
515562
PAGE_SIZE << order);
516563
return 0;
517564
}
518565

566+
ret = memcg_alloc_page_obj_cgroups(page, s, gfp);
567+
if (ret)
568+
return ret;
569+
519570
return memcg_charge_slab(page, gfp, order, s);
520571
}
521572

@@ -528,6 +579,7 @@ static __always_inline void uncharge_slab_page(struct page *page, int order,
528579
return;
529580
}
530581

582+
memcg_free_page_obj_cgroups(page);
531583
memcg_uncharge_slab(page, order, s);
532584
}
533585

0 commit comments

Comments
 (0)