99
1010#include <trace/events/cgroup.h>
1111
12- static DEFINE_SPINLOCK (cgroup_rstat_lock );
13- static DEFINE_PER_CPU (raw_spinlock_t , cgroup_rstat_cpu_lock ) ;
12+ static DEFINE_SPINLOCK (rstat_base_lock );
13+ static DEFINE_PER_CPU (raw_spinlock_t , rstat_base_cpu_lock ) ;
1414
1515static void cgroup_base_stat_flush (struct cgroup * cgrp , int cpu );
1616
@@ -26,30 +26,48 @@ static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
2626 return per_cpu_ptr (cgrp -> rstat_base_cpu , cpu );
2727}
2828
29+ static spinlock_t * ss_rstat_lock (struct cgroup_subsys * ss )
30+ {
31+ if (ss )
32+ return & ss -> rstat_ss_lock ;
33+
34+ return & rstat_base_lock ;
35+ }
36+
37+ static raw_spinlock_t * ss_rstat_cpu_lock (struct cgroup_subsys * ss , int cpu )
38+ {
39+ if (ss )
40+ return per_cpu_ptr (ss -> rstat_ss_cpu_lock , cpu );
41+
42+ return per_cpu_ptr (& rstat_base_cpu_lock , cpu );
43+ }
44+
2945/*
30- * Helper functions for rstat per CPU lock (cgroup_rstat_cpu_lock) .
46+ * Helper functions for rstat per CPU locks .
3147 *
3248 * This makes it easier to diagnose locking issues and contention in
3349 * production environments. The parameter @fast_path determine the
3450 * tracepoints being added, allowing us to diagnose "flush" related
3551 * operations without handling high-frequency fast-path "update" events.
3652 */
3753static __always_inline
38- unsigned long _css_rstat_cpu_lock (raw_spinlock_t * cpu_lock , int cpu ,
39- struct cgroup_subsys_state * css , const bool fast_path )
54+ unsigned long _css_rstat_cpu_lock (struct cgroup_subsys_state * css , int cpu ,
55+ const bool fast_path )
4056{
4157 struct cgroup * cgrp = css -> cgroup ;
58+ raw_spinlock_t * cpu_lock ;
4259 unsigned long flags ;
4360 bool contended ;
4461
4562 /*
46- * The _irqsave() is needed because cgroup_rstat_lock is
47- * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring
48- * this lock with the _irq() suffix only disables interrupts on
49- * a non-PREEMPT_RT kernel. The raw_spinlock_t below disables
50- * interrupts on both configurations. The _irqsave() ensures
51- * that interrupts are always disabled and later restored.
63+ * The _irqsave() is needed because the locks used for flushing are
64+ * spinlock_t which is a sleeping lock on PREEMPT_RT. Acquiring this lock
65+ * with the _irq() suffix only disables interrupts on a non-PREEMPT_RT
66+ * kernel. The raw_spinlock_t below disables interrupts on both
67+ * configurations. The _irqsave() ensures that interrupts are always
68+ * disabled and later restored.
5269 */
70+ cpu_lock = ss_rstat_cpu_lock (css -> ss , cpu );
5371 contended = !raw_spin_trylock_irqsave (cpu_lock , flags );
5472 if (contended ) {
5573 if (fast_path )
@@ -69,17 +87,18 @@ unsigned long _css_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
6987}
7088
7189static __always_inline
72- void _css_rstat_cpu_unlock (raw_spinlock_t * cpu_lock , int cpu ,
73- struct cgroup_subsys_state * css , unsigned long flags ,
74- const bool fast_path )
90+ void _css_rstat_cpu_unlock (struct cgroup_subsys_state * css , int cpu ,
91+ unsigned long flags , const bool fast_path )
7592{
7693 struct cgroup * cgrp = css -> cgroup ;
94+ raw_spinlock_t * cpu_lock ;
7795
7896 if (fast_path )
7997 trace_cgroup_rstat_cpu_unlock_fastpath (cgrp , cpu , false);
8098 else
8199 trace_cgroup_rstat_cpu_unlock (cgrp , cpu , false);
82100
101+ cpu_lock = ss_rstat_cpu_lock (css -> ss , cpu );
83102 raw_spin_unlock_irqrestore (cpu_lock , flags );
84103}
85104
@@ -94,7 +113,6 @@ void _css_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
94113 */
95114__bpf_kfunc void css_rstat_updated (struct cgroup_subsys_state * css , int cpu )
96115{
97- raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
98116 unsigned long flags ;
99117
100118 /*
@@ -115,7 +133,7 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
115133 if (data_race (css_rstat_cpu (css , cpu )-> updated_next ))
116134 return ;
117135
118- flags = _css_rstat_cpu_lock (cpu_lock , cpu , css , true);
136+ flags = _css_rstat_cpu_lock (css , cpu , true);
119137
120138 /* put @css and all ancestors on the corresponding updated lists */
121139 while (true) {
@@ -143,7 +161,7 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
143161 css = parent ;
144162 }
145163
146- _css_rstat_cpu_unlock (cpu_lock , cpu , css , flags , true);
164+ _css_rstat_cpu_unlock (css , cpu , flags , true);
147165}
148166
149167/**
@@ -171,11 +189,11 @@ static struct cgroup_subsys_state *css_rstat_push_children(
171189 child -> rstat_flush_next = NULL ;
172190
173191 /*
174- * The cgroup_rstat_lock must be held for the whole duration from
192+ * The subsystem rstat lock must be held for the whole duration from
175193 * here as the rstat_flush_next list is being constructed to when
176194 * it is consumed later in css_rstat_flush().
177195 */
178- lockdep_assert_held (& cgroup_rstat_lock );
196+ lockdep_assert_held (ss_rstat_lock ( head -> ss ) );
179197
180198 /*
181199 * Notation: -> updated_next pointer
@@ -245,12 +263,11 @@ static struct cgroup_subsys_state *css_rstat_push_children(
245263static struct cgroup_subsys_state * css_rstat_updated_list (
246264 struct cgroup_subsys_state * root , int cpu )
247265{
248- raw_spinlock_t * cpu_lock = per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu );
249266 struct css_rstat_cpu * rstatc = css_rstat_cpu (root , cpu );
250267 struct cgroup_subsys_state * head = NULL , * parent , * child ;
251268 unsigned long flags ;
252269
253- flags = _css_rstat_cpu_lock (cpu_lock , cpu , root , false);
270+ flags = _css_rstat_cpu_lock (root , cpu , false);
254271
255272 /* Return NULL if this subtree is not on-list */
256273 if (!rstatc -> updated_next )
@@ -287,7 +304,7 @@ static struct cgroup_subsys_state *css_rstat_updated_list(
287304 if (child != root )
288305 head = css_rstat_push_children (head , child , cpu );
289306unlock_ret :
290- _css_rstat_cpu_unlock (cpu_lock , cpu , root , flags , false);
307+ _css_rstat_cpu_unlock (root , cpu , flags , false);
291308 return head ;
292309}
293310
@@ -314,7 +331,7 @@ __weak noinline void bpf_rstat_flush(struct cgroup *cgrp,
314331__bpf_hook_end ();
315332
316333/*
317- * Helper functions for locking cgroup_rstat_lock .
334+ * Helper functions for locking.
318335 *
319336 * This makes it easier to diagnose locking issues and contention in
320337 * production environments. The parameter @cpu_in_loop indicate lock
@@ -324,27 +341,31 @@ __bpf_hook_end();
324341 */
325342static inline void __css_rstat_lock (struct cgroup_subsys_state * css ,
326343 int cpu_in_loop )
327- __acquires (& cgroup_rstat_lock )
344+ __acquires (ss_rstat_lock ( css - > ss ) )
328345{
329346 struct cgroup * cgrp = css -> cgroup ;
347+ spinlock_t * lock ;
330348 bool contended ;
331349
332- contended = !spin_trylock_irq (& cgroup_rstat_lock );
350+ lock = ss_rstat_lock (css -> ss );
351+ contended = !spin_trylock_irq (lock );
333352 if (contended ) {
334353 trace_cgroup_rstat_lock_contended (cgrp , cpu_in_loop , contended );
335- spin_lock_irq (& cgroup_rstat_lock );
354+ spin_lock_irq (lock );
336355 }
337356 trace_cgroup_rstat_locked (cgrp , cpu_in_loop , contended );
338357}
339358
340359static inline void __css_rstat_unlock (struct cgroup_subsys_state * css ,
341360 int cpu_in_loop )
342- __releases (& cgroup_rstat_lock )
361+ __releases (ss_rstat_lock ( css - > ss ) )
343362{
344363 struct cgroup * cgrp = css -> cgroup ;
364+ spinlock_t * lock ;
345365
366+ lock = ss_rstat_lock (css -> ss );
346367 trace_cgroup_rstat_unlock (cgrp , cpu_in_loop , false);
347- spin_unlock_irq (& cgroup_rstat_lock );
368+ spin_unlock_irq (lock );
348369}
349370
350371/**
@@ -466,12 +487,29 @@ void css_rstat_exit(struct cgroup_subsys_state *css)
466487 css -> rstat_cpu = NULL ;
467488}
468489
469- void __init cgroup_rstat_boot (void )
490+ /**
491+ * ss_rstat_init - subsystem-specific rstat initialization
492+ * @ss: target subsystem
493+ *
494+ * If @ss is NULL, the static locks associated with the base stats
495+ * are initialized. If @ss is non-NULL, the subsystem-specific locks
496+ * are initialized.
497+ */
498+ int __init ss_rstat_init (struct cgroup_subsys * ss )
470499{
471500 int cpu ;
472501
502+ if (ss ) {
503+ ss -> rstat_ss_cpu_lock = alloc_percpu (raw_spinlock_t );
504+ if (!ss -> rstat_ss_cpu_lock )
505+ return - ENOMEM ;
506+ }
507+
508+ spin_lock_init (ss_rstat_lock (ss ));
473509 for_each_possible_cpu (cpu )
474- raw_spin_lock_init (per_cpu_ptr (& cgroup_rstat_cpu_lock , cpu ));
510+ raw_spin_lock_init (ss_rstat_cpu_lock (ss , cpu ));
511+
512+ return 0 ;
475513}
476514
477515/*
0 commit comments