Skip to content

Commit e8ea14c

Browse files
hnaztorvalds
authored andcommitted
mm: memcontrol: take a css reference for each charged page
Charges currently pin the css indirectly by playing tricks during css_offline(): user pages stall the offlining process until all of them have been reparented, whereas kmemcg acquires a keep-alive reference if outstanding kernel pages are detected at that point. In preparation for removing all this complexity, make the pinning explicit and acquire a css references for every charged page. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5ac8fb3 commit e8ea14c

File tree

3 files changed

+81
-13
lines changed

3 files changed

+81
-13
lines changed

include/linux/cgroup.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
112112
percpu_ref_get(&css->refcnt);
113113
}
114114

115+
/**
116+
* css_get_many - obtain references on the specified css
117+
* @css: target css
118+
* @n: number of references to get
119+
*
120+
* The caller must already have a reference.
121+
*/
122+
static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
123+
{
124+
if (!(css->flags & CSS_NO_REF))
125+
percpu_ref_get_many(&css->refcnt, n);
126+
}
127+
115128
/**
116129
* css_tryget - try to obtain a reference on the specified css
117130
* @css: target css
@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
159172
percpu_ref_put(&css->refcnt);
160173
}
161174

175+
/**
176+
* css_put_many - put css references
177+
* @css: target css
178+
* @n: number of references to put
179+
*
180+
* Put references obtained via css_get() and css_tryget_online().
181+
*/
182+
static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
183+
{
184+
if (!(css->flags & CSS_NO_REF))
185+
percpu_ref_put_many(&css->refcnt, n);
186+
}
187+
162188
/* bits in struct cgroup flags field */
163189
enum {
164190
/* Control Group requires release notifications to userspace */

include/linux/percpu-refcount.h

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -147,27 +147,41 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
147147
}
148148

149149
/**
150-
* percpu_ref_get - increment a percpu refcount
150+
* percpu_ref_get_many - increment a percpu refcount
151151
* @ref: percpu_ref to get
152+
* @nr: number of references to get
152153
*
153-
* Analagous to atomic_long_inc().
154+
* Analogous to atomic_long_add().
154155
*
155156
* This function is safe to call as long as @ref is between init and exit.
156157
*/
157-
static inline void percpu_ref_get(struct percpu_ref *ref)
158+
static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
158159
{
159160
unsigned long __percpu *percpu_count;
160161

161162
rcu_read_lock_sched();
162163

163164
if (__ref_is_percpu(ref, &percpu_count))
164-
this_cpu_inc(*percpu_count);
165+
this_cpu_add(*percpu_count, nr);
165166
else
166-
atomic_long_inc(&ref->count);
167+
atomic_long_add(nr, &ref->count);
167168

168169
rcu_read_unlock_sched();
169170
}
170171

172+
/**
173+
* percpu_ref_get - increment a percpu refcount
174+
* @ref: percpu_ref to get
175+
*
176+
* Analagous to atomic_long_inc().
177+
*
178+
* This function is safe to call as long as @ref is between init and exit.
179+
*/
180+
static inline void percpu_ref_get(struct percpu_ref *ref)
181+
{
182+
percpu_ref_get_many(ref, 1);
183+
}
184+
171185
/**
172186
* percpu_ref_tryget - try to increment a percpu refcount
173187
* @ref: percpu_ref to try-get
@@ -231,28 +245,43 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
231245
}
232246

233247
/**
234-
* percpu_ref_put - decrement a percpu refcount
248+
* percpu_ref_put_many - decrement a percpu refcount
235249
* @ref: percpu_ref to put
250+
* @nr: number of references to put
236251
*
237252
* Decrement the refcount, and if 0, call the release function (which was passed
238253
* to percpu_ref_init())
239254
*
240255
* This function is safe to call as long as @ref is between init and exit.
241256
*/
242-
static inline void percpu_ref_put(struct percpu_ref *ref)
257+
static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
243258
{
244259
unsigned long __percpu *percpu_count;
245260

246261
rcu_read_lock_sched();
247262

248263
if (__ref_is_percpu(ref, &percpu_count))
249-
this_cpu_dec(*percpu_count);
250-
else if (unlikely(atomic_long_dec_and_test(&ref->count)))
264+
this_cpu_sub(*percpu_count, nr);
265+
else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
251266
ref->release(ref);
252267

253268
rcu_read_unlock_sched();
254269
}
255270

271+
/**
272+
* percpu_ref_put - decrement a percpu refcount
273+
* @ref: percpu_ref to put
274+
*
275+
* Decrement the refcount, and if 0, call the release function (which was passed
276+
* to percpu_ref_init())
277+
*
278+
* This function is safe to call as long as @ref is between init and exit.
279+
*/
280+
static inline void percpu_ref_put(struct percpu_ref *ref)
281+
{
282+
percpu_ref_put_many(ref, 1);
283+
}
284+
256285
/**
257286
* percpu_ref_is_zero - test whether a percpu refcount reached zero
258287
* @ref: percpu_ref to test

mm/memcontrol.c

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
22732273
page_counter_uncharge(&old->memory, stock->nr_pages);
22742274
if (do_swap_account)
22752275
page_counter_uncharge(&old->memsw, stock->nr_pages);
2276+
css_put_many(&old->css, stock->nr_pages);
22762277
stock->nr_pages = 0;
22772278
}
22782279
stock->cached = NULL;
@@ -2530,6 +2531,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
25302531
return -EINTR;
25312532

25322533
done_restock:
2534+
css_get_many(&memcg->css, batch);
25332535
if (batch > nr_pages)
25342536
refill_stock(memcg, batch - nr_pages);
25352537
done:
@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
25442546
page_counter_uncharge(&memcg->memory, nr_pages);
25452547
if (do_swap_account)
25462548
page_counter_uncharge(&memcg->memsw, nr_pages);
2549+
2550+
css_put_many(&memcg->css, nr_pages);
25472551
}
25482552

25492553
/*
@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
27392743
page_counter_charge(&memcg->memory, nr_pages);
27402744
if (do_swap_account)
27412745
page_counter_charge(&memcg->memsw, nr_pages);
2746+
css_get_many(&memcg->css, nr_pages);
27422747
ret = 0;
27432748
} else if (ret)
27442749
page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
27542759
page_counter_uncharge(&memcg->memsw, nr_pages);
27552760

27562761
/* Not down to 0 */
2757-
if (page_counter_uncharge(&memcg->kmem, nr_pages))
2762+
if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
2763+
css_put_many(&memcg->css, nr_pages);
27582764
return;
2765+
}
27592766

27602767
/*
27612768
* Releases a reference taken in kmem_cgroup_css_offline in case
@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
27672774
*/
27682775
if (memcg_kmem_test_and_clear_dead(memcg))
27692776
css_put(&memcg->css);
2777+
2778+
css_put_many(&memcg->css, nr_pages);
27702779
}
27712780

27722781
/*
@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page,
33943403
ret = mem_cgroup_move_account(page, nr_pages,
33953404
pc, child, parent);
33963405
if (!ret) {
3406+
if (!mem_cgroup_is_root(parent))
3407+
css_get_many(&parent->css, nr_pages);
33973408
/* Take charge off the local counters */
33983409
page_counter_cancel(&child->memory, nr_pages);
33993410
if (do_swap_account)
34003411
page_counter_cancel(&child->memsw, nr_pages);
3412+
css_put_many(&child->css, nr_pages);
34013413
}
34023414

34033415
if (nr_pages > 1)
@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void)
57675779
{
57685780
struct mem_cgroup *from = mc.from;
57695781
struct mem_cgroup *to = mc.to;
5770-
int i;
57715782

57725783
/* we must uncharge all the leftover precharges from mc.to */
57735784
if (mc.precharge) {
@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void)
57955806
if (!mem_cgroup_is_root(mc.to))
57965807
page_counter_uncharge(&mc.to->memory, mc.moved_swap);
57975808

5798-
for (i = 0; i < mc.moved_swap; i++)
5799-
css_put(&mc.from->css);
5809+
css_put_many(&mc.from->css, mc.moved_swap);
58005810

58015811
/* we've already done css_get(mc.to) */
58025812
mc.moved_swap = 0;
@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
63436353
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
63446354
memcg_check_events(memcg, dummy_page);
63456355
local_irq_restore(flags);
6356+
6357+
if (!mem_cgroup_is_root(memcg))
6358+
css_put_many(&memcg->css, max(nr_mem, nr_memsw));
63466359
}
63476360

63486361
static void uncharge_list(struct list_head *page_list)

0 commit comments

Comments
 (0)