Skip to content

Commit c80509e

Browse files
shakeelbakpm00
authored andcommitted
memcg: completely decouple memcg and obj stocks
Let's completely decouple the memcg and obj per-cpu stocks. This will enable us to make memcg per-cpu stocks to used without disabling irqs. Also it will enable us to make obj stocks nmi safe independently which is required to make kmalloc/slab safe for allocations from nmi context. Link: https://lkml.kernel.org/r/20250506225533.2580386-4-shakeel.butt@linux.dev Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Eric Dumaze <edumazet@google.com> Cc: Jakub Kacinski <kuba@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Muchun Song <muchun.song@linux.dev> Cc: Roman Gushchin <roman.gushchin@linux.dev> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 3523dd7 commit c80509e

File tree

1 file changed

+92
-57
lines changed

1 file changed

+92
-57
lines changed

mm/memcontrol.c

Lines changed: 92 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1778,12 +1778,22 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
17781778
* nr_pages in a single cacheline. This may change in future.
17791779
*/
17801780
#define NR_MEMCG_STOCK 7
1781+
#define FLUSHING_CACHED_CHARGE 0
17811782
struct memcg_stock_pcp {
1782-
local_trylock_t memcg_lock;
1783+
local_trylock_t lock;
17831784
uint8_t nr_pages[NR_MEMCG_STOCK];
17841785
struct mem_cgroup *cached[NR_MEMCG_STOCK];
17851786

1786-
local_trylock_t obj_lock;
1787+
struct work_struct work;
1788+
unsigned long flags;
1789+
};
1790+
1791+
static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) = {
1792+
.lock = INIT_LOCAL_TRYLOCK(lock),
1793+
};
1794+
1795+
struct obj_stock_pcp {
1796+
local_trylock_t lock;
17871797
unsigned int nr_bytes;
17881798
struct obj_cgroup *cached_objcg;
17891799
struct pglist_data *cached_pgdat;
@@ -1792,16 +1802,16 @@ struct memcg_stock_pcp {
17921802

17931803
struct work_struct work;
17941804
unsigned long flags;
1795-
#define FLUSHING_CACHED_CHARGE 0
17961805
};
1797-
static DEFINE_PER_CPU_ALIGNED(struct memcg_stock_pcp, memcg_stock) = {
1798-
.memcg_lock = INIT_LOCAL_TRYLOCK(memcg_lock),
1799-
.obj_lock = INIT_LOCAL_TRYLOCK(obj_lock),
1806+
1807+
static DEFINE_PER_CPU_ALIGNED(struct obj_stock_pcp, obj_stock) = {
1808+
.lock = INIT_LOCAL_TRYLOCK(lock),
18001809
};
1810+
18011811
static DEFINE_MUTEX(percpu_charge_mutex);
18021812

1803-
static void drain_obj_stock(struct memcg_stock_pcp *stock);
1804-
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
1813+
static void drain_obj_stock(struct obj_stock_pcp *stock);
1814+
static bool obj_stock_flush_required(struct obj_stock_pcp *stock,
18051815
struct mem_cgroup *root_memcg);
18061816

18071817
/**
@@ -1824,7 +1834,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
18241834
int i;
18251835

18261836
if (nr_pages > MEMCG_CHARGE_BATCH ||
1827-
!local_trylock_irqsave(&memcg_stock.memcg_lock, flags))
1837+
!local_trylock_irqsave(&memcg_stock.lock, flags))
18281838
return ret;
18291839

18301840
stock = this_cpu_ptr(&memcg_stock);
@@ -1841,7 +1851,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
18411851
break;
18421852
}
18431853

1844-
local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
1854+
local_unlock_irqrestore(&memcg_stock.lock, flags);
18451855

18461856
return ret;
18471857
}
@@ -1882,24 +1892,38 @@ static void drain_stock_fully(struct memcg_stock_pcp *stock)
18821892
drain_stock(stock, i);
18831893
}
18841894

1885-
static void drain_local_stock(struct work_struct *dummy)
1895+
static void drain_local_memcg_stock(struct work_struct *dummy)
18861896
{
18871897
struct memcg_stock_pcp *stock;
18881898
unsigned long flags;
18891899

18901900
if (WARN_ONCE(!in_task(), "drain in non-task context"))
18911901
return;
18921902

1893-
local_lock_irqsave(&memcg_stock.obj_lock, flags);
1894-
stock = this_cpu_ptr(&memcg_stock);
1895-
drain_obj_stock(stock);
1896-
local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
1903+
local_lock_irqsave(&memcg_stock.lock, flags);
18971904

1898-
local_lock_irqsave(&memcg_stock.memcg_lock, flags);
18991905
stock = this_cpu_ptr(&memcg_stock);
19001906
drain_stock_fully(stock);
19011907
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
1902-
local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
1908+
1909+
local_unlock_irqrestore(&memcg_stock.lock, flags);
1910+
}
1911+
1912+
static void drain_local_obj_stock(struct work_struct *dummy)
1913+
{
1914+
struct obj_stock_pcp *stock;
1915+
unsigned long flags;
1916+
1917+
if (WARN_ONCE(!in_task(), "drain in non-task context"))
1918+
return;
1919+
1920+
local_lock_irqsave(&obj_stock.lock, flags);
1921+
1922+
stock = this_cpu_ptr(&obj_stock);
1923+
drain_obj_stock(stock);
1924+
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
1925+
1926+
local_unlock_irqrestore(&obj_stock.lock, flags);
19031927
}
19041928

19051929
static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -1922,10 +1946,10 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
19221946
VM_WARN_ON_ONCE(mem_cgroup_is_root(memcg));
19231947

19241948
if (nr_pages > MEMCG_CHARGE_BATCH ||
1925-
!local_trylock_irqsave(&memcg_stock.memcg_lock, flags)) {
1949+
!local_trylock_irqsave(&memcg_stock.lock, flags)) {
19261950
/*
19271951
* In case of larger than batch refill or unlikely failure to
1928-
* lock the percpu memcg_lock, uncharge memcg directly.
1952+
* lock the percpu memcg_stock.lock, uncharge memcg directly.
19291953
*/
19301954
memcg_uncharge(memcg, nr_pages);
19311955
return;
@@ -1957,23 +1981,17 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
19571981
WRITE_ONCE(stock->nr_pages[i], nr_pages);
19581982
}
19591983

1960-
local_unlock_irqrestore(&memcg_stock.memcg_lock, flags);
1984+
local_unlock_irqrestore(&memcg_stock.lock, flags);
19611985
}
19621986

1963-
static bool is_drain_needed(struct memcg_stock_pcp *stock,
1964-
struct mem_cgroup *root_memcg)
1987+
static bool is_memcg_drain_needed(struct memcg_stock_pcp *stock,
1988+
struct mem_cgroup *root_memcg)
19651989
{
19661990
struct mem_cgroup *memcg;
19671991
bool flush = false;
19681992
int i;
19691993

19701994
rcu_read_lock();
1971-
1972-
if (obj_stock_flush_required(stock, root_memcg)) {
1973-
flush = true;
1974-
goto out;
1975-
}
1976-
19771995
for (i = 0; i < NR_MEMCG_STOCK; ++i) {
19781996
memcg = READ_ONCE(stock->cached[i]);
19791997
if (!memcg)
@@ -1985,7 +2003,6 @@ static bool is_drain_needed(struct memcg_stock_pcp *stock,
19852003
break;
19862004
}
19872005
}
1988-
out:
19892006
rcu_read_unlock();
19902007
return flush;
19912008
}
@@ -2010,15 +2027,27 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
20102027
migrate_disable();
20112028
curcpu = smp_processor_id();
20122029
for_each_online_cpu(cpu) {
2013-
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
2014-
bool flush = is_drain_needed(stock, root_memcg);
2030+
struct memcg_stock_pcp *memcg_st = &per_cpu(memcg_stock, cpu);
2031+
struct obj_stock_pcp *obj_st = &per_cpu(obj_stock, cpu);
20152032

2016-
if (flush &&
2017-
!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
2033+
if (!test_bit(FLUSHING_CACHED_CHARGE, &memcg_st->flags) &&
2034+
is_memcg_drain_needed(memcg_st, root_memcg) &&
2035+
!test_and_set_bit(FLUSHING_CACHED_CHARGE,
2036+
&memcg_st->flags)) {
20182037
if (cpu == curcpu)
2019-
drain_local_stock(&stock->work);
2038+
drain_local_memcg_stock(&memcg_st->work);
20202039
else if (!cpu_is_isolated(cpu))
2021-
schedule_work_on(cpu, &stock->work);
2040+
schedule_work_on(cpu, &memcg_st->work);
2041+
}
2042+
2043+
if (!test_bit(FLUSHING_CACHED_CHARGE, &obj_st->flags) &&
2044+
obj_stock_flush_required(obj_st, root_memcg) &&
2045+
!test_and_set_bit(FLUSHING_CACHED_CHARGE,
2046+
&obj_st->flags)) {
2047+
if (cpu == curcpu)
2048+
drain_local_obj_stock(&obj_st->work);
2049+
else if (!cpu_is_isolated(cpu))
2050+
schedule_work_on(cpu, &obj_st->work);
20222051
}
20232052
}
20242053
migrate_enable();
@@ -2027,18 +2056,18 @@ void drain_all_stock(struct mem_cgroup *root_memcg)
20272056

20282057
static int memcg_hotplug_cpu_dead(unsigned int cpu)
20292058
{
2030-
struct memcg_stock_pcp *stock;
2059+
struct obj_stock_pcp *obj_st;
20312060
unsigned long flags;
20322061

2033-
stock = &per_cpu(memcg_stock, cpu);
2062+
obj_st = &per_cpu(obj_stock, cpu);
20342063

2035-
/* drain_obj_stock requires obj_lock */
2036-
local_lock_irqsave(&memcg_stock.obj_lock, flags);
2037-
drain_obj_stock(stock);
2038-
local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
2064+
/* drain_obj_stock requires objstock.lock */
2065+
local_lock_irqsave(&obj_stock.lock, flags);
2066+
drain_obj_stock(obj_st);
2067+
local_unlock_irqrestore(&obj_stock.lock, flags);
20392068

20402069
/* no need for the local lock */
2041-
drain_stock_fully(stock);
2070+
drain_stock_fully(&per_cpu(memcg_stock, cpu));
20422071

20432072
return 0;
20442073
}
@@ -2835,7 +2864,7 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
28352864
}
28362865

28372866
static void __account_obj_stock(struct obj_cgroup *objcg,
2838-
struct memcg_stock_pcp *stock, int nr,
2867+
struct obj_stock_pcp *stock, int nr,
28392868
struct pglist_data *pgdat, enum node_stat_item idx)
28402869
{
28412870
int *bytes;
@@ -2886,13 +2915,13 @@ static void __account_obj_stock(struct obj_cgroup *objcg,
28862915
static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
28872916
struct pglist_data *pgdat, enum node_stat_item idx)
28882917
{
2889-
struct memcg_stock_pcp *stock;
2918+
struct obj_stock_pcp *stock;
28902919
unsigned long flags;
28912920
bool ret = false;
28922921

2893-
local_lock_irqsave(&memcg_stock.obj_lock, flags);
2922+
local_lock_irqsave(&obj_stock.lock, flags);
28942923

2895-
stock = this_cpu_ptr(&memcg_stock);
2924+
stock = this_cpu_ptr(&obj_stock);
28962925
if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
28972926
stock->nr_bytes -= nr_bytes;
28982927
ret = true;
@@ -2901,12 +2930,12 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
29012930
__account_obj_stock(objcg, stock, nr_bytes, pgdat, idx);
29022931
}
29032932

2904-
local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
2933+
local_unlock_irqrestore(&obj_stock.lock, flags);
29052934

29062935
return ret;
29072936
}
29082937

2909-
static void drain_obj_stock(struct memcg_stock_pcp *stock)
2938+
static void drain_obj_stock(struct obj_stock_pcp *stock)
29102939
{
29112940
struct obj_cgroup *old = READ_ONCE(stock->cached_objcg);
29122941

@@ -2967,32 +2996,35 @@ static void drain_obj_stock(struct memcg_stock_pcp *stock)
29672996
obj_cgroup_put(old);
29682997
}
29692998

2970-
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
2999+
static bool obj_stock_flush_required(struct obj_stock_pcp *stock,
29713000
struct mem_cgroup *root_memcg)
29723001
{
29733002
struct obj_cgroup *objcg = READ_ONCE(stock->cached_objcg);
29743003
struct mem_cgroup *memcg;
3004+
bool flush = false;
29753005

3006+
rcu_read_lock();
29763007
if (objcg) {
29773008
memcg = obj_cgroup_memcg(objcg);
29783009
if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
2979-
return true;
3010+
flush = true;
29803011
}
3012+
rcu_read_unlock();
29813013

2982-
return false;
3014+
return flush;
29833015
}
29843016

29853017
static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
29863018
bool allow_uncharge, int nr_acct, struct pglist_data *pgdat,
29873019
enum node_stat_item idx)
29883020
{
2989-
struct memcg_stock_pcp *stock;
3021+
struct obj_stock_pcp *stock;
29903022
unsigned long flags;
29913023
unsigned int nr_pages = 0;
29923024

2993-
local_lock_irqsave(&memcg_stock.obj_lock, flags);
3025+
local_lock_irqsave(&obj_stock.lock, flags);
29943026

2995-
stock = this_cpu_ptr(&memcg_stock);
3027+
stock = this_cpu_ptr(&obj_stock);
29963028
if (READ_ONCE(stock->cached_objcg) != objcg) { /* reset if necessary */
29973029
drain_obj_stock(stock);
29983030
obj_cgroup_get(objcg);
@@ -3012,7 +3044,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
30123044
stock->nr_bytes &= (PAGE_SIZE - 1);
30133045
}
30143046

3015-
local_unlock_irqrestore(&memcg_stock.obj_lock, flags);
3047+
local_unlock_irqrestore(&obj_stock.lock, flags);
30163048

30173049
if (nr_pages)
30183050
obj_cgroup_uncharge_pages(objcg, nr_pages);
@@ -5077,9 +5109,12 @@ int __init mem_cgroup_init(void)
50775109
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
50785110
memcg_hotplug_cpu_dead);
50795111

5080-
for_each_possible_cpu(cpu)
5112+
for_each_possible_cpu(cpu) {
50815113
INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
5082-
drain_local_stock);
5114+
drain_local_memcg_stock);
5115+
INIT_WORK(&per_cpu_ptr(&obj_stock, cpu)->work,
5116+
drain_local_obj_stock);
5117+
}
50835118

50845119
memcg_size = struct_size_t(struct mem_cgroup, nodeinfo, nr_node_ids);
50855120
memcg_cachep = kmem_cache_create("mem_cgroup", memcg_size, 0,

0 commit comments

Comments
 (0)