Skip to content

Commit 3c7be18

Browse files
rgushchintorvalds
authored andcommitted
mm: memcg/percpu: account percpu memory to memory cgroups
Percpu memory is becoming more and more widely used by various subsystems, and the total amount of memory controlled by the percpu allocator can make a good part of the total memory. As an example, bpf maps can consume a lot of percpu memory, and they are created by a user. Also, some cgroup internals (e.g. memory controller statistics) can be quite large. On a machine with many CPUs and big number of cgroups they can consume hundreds of megabytes. So the lack of memcg accounting is creating a breach in the memory isolation. Similar to the slab memory, percpu memory should be accounted by default. To implement the perpcu accounting it's possible to take the slab memory accounting as a model to follow. Let's introduce two types of percpu chunks: root and memcg. What makes memcg chunks different is an additional space allocated to store memcg membership information. If __GFP_ACCOUNT is passed on allocation, a memcg chunk should be be used. If it's possible to charge the corresponding size to the target memory cgroup, allocation is performed, and the memcg ownership data is recorded. System-wide allocations are performed using root chunks, so there is no additional memory overhead. To implement a fast reparenting of percpu memory on memcg removal, we don't store mem_cgroup pointers directly: instead we use obj_cgroup API, introduced for slab accounting. [akpm@linux-foundation.org: fix CONFIG_MEMCG_KMEM=n build errors and warning] [akpm@linux-foundation.org: move unreachable code, per Roman] [cuibixuan@huawei.com: mm/percpu: fix 'defined but not used' warning] Link: http://lkml.kernel.org/r/6d41b939-a741-b521-a7a2-e7296ec16219@huawei.com Signed-off-by: Roman Gushchin <guro@fb.com> Signed-off-by: Bixuan Cui <cuibixuan@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Reviewed-by: Shakeel Butt <shakeelb@google.com> Acked-by: Dennis Zhou <dennis@kernel.org> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Michal Hocko <mhocko@kernel.org> Cc: Pekka Enberg <penberg@kernel.org> Cc: Tejun Heo <tj@kernel.org> Cc: Tobin C. Harding <tobin@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Waiman Long <longman@redhat.com> Cc: Bixuan Cui <cuibixuan@huawei.com> Cc: Michal Koutný <mkoutny@suse.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Link: http://lkml.kernel.org/r/20200623184515.4132564-3-guro@fb.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5b32af9 commit 3c7be18

File tree

5 files changed

+246
-40
lines changed

5 files changed

+246
-40
lines changed

mm/percpu-internal.h

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,25 @@
55
#include <linux/types.h>
66
#include <linux/percpu.h>
77

8+
/*
9+
* There are two chunk types: root and memcg-aware.
10+
* Chunks of each type have separate slots list.
11+
*
12+
* Memcg-aware chunks have an attached vector of obj_cgroup pointers, which is
13+
* used to store memcg membership data of a percpu object. Obj_cgroups are
14+
* ref-counted pointers to a memory cgroup with an ability to switch dynamically
15+
* to the parent memory cgroup. This allows to reclaim a deleted memory cgroup
16+
* without reclaiming of all outstanding objects, which hold a reference at it.
17+
*/
18+
enum pcpu_chunk_type {
19+
PCPU_CHUNK_ROOT,
20+
#ifdef CONFIG_MEMCG_KMEM
21+
PCPU_CHUNK_MEMCG,
22+
#endif
23+
PCPU_NR_CHUNK_TYPES,
24+
PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES
25+
};
26+
827
/*
928
* pcpu_block_md is the metadata block struct.
1029
* Each chunk's bitmap is split into a number of full blocks.
@@ -54,6 +73,9 @@ struct pcpu_chunk {
5473
int end_offset; /* additional area required to
5574
have the region end page
5675
aligned */
76+
#ifdef CONFIG_MEMCG_KMEM
77+
struct obj_cgroup **obj_cgroups; /* vector of object cgroups */
78+
#endif
5779

5880
int nr_pages; /* # of pages served by this chunk */
5981
int nr_populated; /* # of populated pages */
@@ -63,7 +85,7 @@ struct pcpu_chunk {
6385

6486
extern spinlock_t pcpu_lock;
6587

66-
extern struct list_head *pcpu_slot;
88+
extern struct list_head *pcpu_chunk_lists;
6789
extern int pcpu_nr_slots;
6890
extern int pcpu_nr_empty_pop_pages;
6991

@@ -106,6 +128,37 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk)
106128
return pcpu_nr_pages_to_map_bits(chunk->nr_pages);
107129
}
108130

131+
#ifdef CONFIG_MEMCG_KMEM
132+
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
133+
{
134+
if (chunk->obj_cgroups)
135+
return PCPU_CHUNK_MEMCG;
136+
return PCPU_CHUNK_ROOT;
137+
}
138+
139+
static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
140+
{
141+
return chunk_type == PCPU_CHUNK_MEMCG;
142+
}
143+
144+
#else
145+
static inline enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk)
146+
{
147+
return PCPU_CHUNK_ROOT;
148+
}
149+
150+
static inline bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type)
151+
{
152+
return false;
153+
}
154+
#endif
155+
156+
static inline struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type)
157+
{
158+
return &pcpu_chunk_lists[pcpu_nr_slots *
159+
pcpu_is_memcg_chunk(chunk_type)];
160+
}
161+
109162
#ifdef CONFIG_PERCPU_STATS
110163

111164
#include <linux/spinlock.h>

mm/percpu-km.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,16 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
4444
/* nada */
4545
}
4646

47-
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
47+
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
48+
gfp_t gfp)
4849
{
4950
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
5051
struct pcpu_chunk *chunk;
5152
struct page *pages;
5253
unsigned long flags;
5354
int i;
5455

55-
chunk = pcpu_alloc_chunk(gfp);
56+
chunk = pcpu_alloc_chunk(type, gfp);
5657
if (!chunk)
5758
return NULL;
5859

mm/percpu-stats.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,15 @@ static int find_max_nr_alloc(void)
3434
{
3535
struct pcpu_chunk *chunk;
3636
int slot, max_nr_alloc;
37+
enum pcpu_chunk_type type;
3738

3839
max_nr_alloc = 0;
39-
for (slot = 0; slot < pcpu_nr_slots; slot++)
40-
list_for_each_entry(chunk, &pcpu_slot[slot], list)
41-
max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);
40+
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
41+
for (slot = 0; slot < pcpu_nr_slots; slot++)
42+
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
43+
list)
44+
max_nr_alloc = max(max_nr_alloc,
45+
chunk->nr_alloc);
4246

4347
return max_nr_alloc;
4448
}
@@ -129,6 +133,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
129133
P("cur_min_alloc", cur_min_alloc);
130134
P("cur_med_alloc", cur_med_alloc);
131135
P("cur_max_alloc", cur_max_alloc);
136+
#ifdef CONFIG_MEMCG_KMEM
137+
P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk)));
138+
#endif
132139
seq_putc(m, '\n');
133140
}
134141

@@ -137,6 +144,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
137144
struct pcpu_chunk *chunk;
138145
int slot, max_nr_alloc;
139146
int *buffer;
147+
enum pcpu_chunk_type type;
140148

141149
alloc_buffer:
142150
spin_lock_irq(&pcpu_lock);
@@ -202,18 +210,18 @@ static int percpu_stats_show(struct seq_file *m, void *v)
202210
chunk_map_stats(m, pcpu_reserved_chunk, buffer);
203211
}
204212

205-
for (slot = 0; slot < pcpu_nr_slots; slot++) {
206-
list_for_each_entry(chunk, &pcpu_slot[slot], list) {
207-
if (chunk == pcpu_first_chunk) {
208-
seq_puts(m, "Chunk: <- First Chunk\n");
209-
chunk_map_stats(m, chunk, buffer);
210-
211-
212-
} else {
213-
seq_puts(m, "Chunk:\n");
214-
chunk_map_stats(m, chunk, buffer);
213+
for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
214+
for (slot = 0; slot < pcpu_nr_slots; slot++) {
215+
list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot],
216+
list) {
217+
if (chunk == pcpu_first_chunk) {
218+
seq_puts(m, "Chunk: <- First Chunk\n");
219+
chunk_map_stats(m, chunk, buffer);
220+
} else {
221+
seq_puts(m, "Chunk:\n");
222+
chunk_map_stats(m, chunk, buffer);
223+
}
215224
}
216-
217225
}
218226
}
219227

mm/percpu-vm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
328328
pcpu_free_pages(chunk, pages, page_start, page_end);
329329
}
330330

331-
static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
331+
static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type,
332+
gfp_t gfp)
332333
{
333334
struct pcpu_chunk *chunk;
334335
struct vm_struct **vms;
335336

336-
chunk = pcpu_alloc_chunk(gfp);
337+
chunk = pcpu_alloc_chunk(type, gfp);
337338
if (!chunk)
338339
return NULL;
339340

0 commit comments

Comments
 (0)