@@ -50,8 +50,41 @@ static inline void set_nr_cpu_ids(unsigned int nr)
5050#endif
5151}
5252
53- /* Deprecated. Always use nr_cpu_ids. */
54- #define nr_cpumask_bits nr_cpu_ids
53+ /*
54+ * We have several different "preferred sizes" for the cpumask
55+ * operations, depending on operation.
56+ *
57+ * For example, the bitmap scanning and operating operations have
58+ * optimized routines that work for the single-word case, but only when
59+ * the size is constant. So if NR_CPUS fits in one single word, we are
60+ * better off using that small constant, in order to trigger the
61+ * optimized bit finding. That is 'small_cpumask_size'.
62+ *
63+ * The clearing and copying operations will similarly perform better
64+ * with a constant size, but we limit that size arbitrarily to four
65+ * words. We call this 'large_cpumask_size'.
66+ *
67+ * Finally, some operations just want the exact limit, either because
68+ * they set bits or just don't have any faster fixed-sized versions. We
69+ * call this just 'nr_cpumask_size'.
70+ *
71+ * Note that these optional constants are always guaranteed to be at
72+ * least as big as 'nr_cpu_ids' itself is, and all our cpumask
73+ * allocations are at least that size (see cpumask_size()). The
74+ * optimization comes from being able to potentially use a compile-time
75+ * constant instead of a run-time generated exact number of CPUs.
76+ */
77+ #if NR_CPUS <= BITS_PER_LONG
78+ #define small_cpumask_bits ((unsigned int)NR_CPUS)
79+ #define large_cpumask_bits ((unsigned int)NR_CPUS)
80+ #elif NR_CPUS <= 4 * BITS_PER_LONG
81+ #define small_cpumask_bits nr_cpu_ids
82+ #define large_cpumask_bits ((unsigned int)NR_CPUS)
83+ #else
84+ #define small_cpumask_bits nr_cpu_ids
85+ #define large_cpumask_bits nr_cpu_ids
86+ #endif
87+ #define nr_cpumask_bits nr_cpu_ids
5588
5689/*
5790 * The following particular system cpumasks and operations manage
@@ -126,7 +159,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
126159 */
127160static inline unsigned int cpumask_first (const struct cpumask * srcp )
128161{
129- return find_first_bit (cpumask_bits (srcp ), nr_cpumask_bits );
162+ return find_first_bit (cpumask_bits (srcp ), small_cpumask_bits );
130163}
131164
132165/**
@@ -137,7 +170,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
137170 */
138171static inline unsigned int cpumask_first_zero (const struct cpumask * srcp )
139172{
140- return find_first_zero_bit (cpumask_bits (srcp ), nr_cpumask_bits );
173+ return find_first_zero_bit (cpumask_bits (srcp ), small_cpumask_bits );
141174}
142175
143176/**
@@ -150,7 +183,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
150183static inline
151184unsigned int cpumask_first_and (const struct cpumask * srcp1 , const struct cpumask * srcp2 )
152185{
153- return find_first_and_bit (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ), nr_cpumask_bits );
186+ return find_first_and_bit (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ), small_cpumask_bits );
154187}
155188
156189/**
@@ -161,7 +194,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
161194 */
162195static inline unsigned int cpumask_last (const struct cpumask * srcp )
163196{
164- return find_last_bit (cpumask_bits (srcp ), nr_cpumask_bits );
197+ return find_last_bit (cpumask_bits (srcp ), small_cpumask_bits );
165198}
166199
167200/**
@@ -177,7 +210,7 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
177210 /* -1 is a legal arg here. */
178211 if (n != -1 )
179212 cpumask_check (n );
180- return find_next_bit (cpumask_bits (srcp ), nr_cpumask_bits , n + 1 );
213+ return find_next_bit (cpumask_bits (srcp ), small_cpumask_bits , n + 1 );
181214}
182215
183216/**
@@ -192,7 +225,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
192225 /* -1 is a legal arg here. */
193226 if (n != -1 )
194227 cpumask_check (n );
195- return find_next_zero_bit (cpumask_bits (srcp ), nr_cpumask_bits , n + 1 );
228+ return find_next_zero_bit (cpumask_bits (srcp ), small_cpumask_bits , n + 1 );
196229}
197230
198231#if NR_CPUS == 1
@@ -235,7 +268,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
235268 if (n != -1 )
236269 cpumask_check (n );
237270 return find_next_and_bit (cpumask_bits (src1p ), cpumask_bits (src2p ),
238- nr_cpumask_bits , n + 1 );
271+ small_cpumask_bits , n + 1 );
239272}
240273
241274/**
@@ -246,17 +279,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
246279 * After the loop, cpu is >= nr_cpu_ids.
247280 */
248281#define for_each_cpu (cpu , mask ) \
249- for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
250-
251- /**
252- * for_each_cpu_not - iterate over every cpu in a complemented mask
253- * @cpu: the (optionally unsigned) integer iterator
254- * @mask: the cpumask pointer
255- *
256- * After the loop, cpu is >= nr_cpu_ids.
257- */
258- #define for_each_cpu_not (cpu , mask ) \
259- for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)
282+ for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
260283
261284#if NR_CPUS == 1
262285static inline
@@ -290,7 +313,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
290313 * After the loop, cpu is >= nr_cpu_ids.
291314 */
292315#define for_each_cpu_wrap (cpu , mask , start ) \
293- for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits , start)
316+ for_each_set_bit_wrap(cpu, cpumask_bits(mask), small_cpumask_bits , start)
294317
295318/**
296319 * for_each_cpu_and - iterate over every cpu in both masks
@@ -307,7 +330,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
307330 * After the loop, cpu is >= nr_cpu_ids.
308331 */
309332#define for_each_cpu_and (cpu , mask1 , mask2 ) \
310- for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits )
333+ for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits )
311334
312335/**
313336 * for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
@@ -325,7 +348,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
325348 * After the loop, cpu is >= nr_cpu_ids.
326349 */
327350#define for_each_cpu_andnot (cpu , mask1 , mask2 ) \
328- for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits )
351+ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits )
329352
330353/**
331354 * cpumask_any_but - return a "random" in a cpumask, but not this one.
@@ -356,7 +379,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
356379 */
357380static inline unsigned int cpumask_nth (unsigned int cpu , const struct cpumask * srcp )
358381{
359- return find_nth_bit (cpumask_bits (srcp ), nr_cpumask_bits , cpumask_check (cpu ));
382+ return find_nth_bit (cpumask_bits (srcp ), small_cpumask_bits , cpumask_check (cpu ));
360383}
361384
362385/**
@@ -372,7 +395,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
372395 const struct cpumask * srcp2 )
373396{
374397 return find_nth_and_bit (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ),
375- nr_cpumask_bits , cpumask_check (cpu ));
398+ small_cpumask_bits , cpumask_check (cpu ));
376399}
377400
378401/**
@@ -388,7 +411,7 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
388411 const struct cpumask * srcp2 )
389412{
390413 return find_nth_andnot_bit (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ),
391- nr_cpumask_bits , cpumask_check (cpu ));
414+ small_cpumask_bits , cpumask_check (cpu ));
392415}
393416
394417/**
@@ -408,7 +431,7 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
408431 return find_nth_and_andnot_bit (cpumask_bits (srcp1 ),
409432 cpumask_bits (srcp2 ),
410433 cpumask_bits (srcp3 ),
411- nr_cpumask_bits , cpumask_check (cpu ));
434+ small_cpumask_bits , cpumask_check (cpu ));
412435}
413436
414437#define CPU_BITS_NONE \
@@ -495,10 +518,14 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
495518/**
496519 * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
497520 * @dstp: the cpumask pointer
521+ *
522+ * Note: since we set bits, we should use the tighter 'bitmap_set()' with
523+ * the eact number of bits, not 'bitmap_fill()' that will fill past the
524+ * end.
498525 */
499526static inline void cpumask_setall (struct cpumask * dstp )
500527{
501- bitmap_fill (cpumask_bits (dstp ), nr_cpumask_bits );
528+ bitmap_set (cpumask_bits (dstp ), 0 , nr_cpumask_bits );
502529}
503530
504531/**
@@ -507,7 +534,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
507534 */
508535static inline void cpumask_clear (struct cpumask * dstp )
509536{
510- bitmap_zero (cpumask_bits (dstp ), nr_cpumask_bits );
537+ bitmap_zero (cpumask_bits (dstp ), large_cpumask_bits );
511538}
512539
513540/**
@@ -523,7 +550,7 @@ static inline bool cpumask_and(struct cpumask *dstp,
523550 const struct cpumask * src2p )
524551{
525552 return bitmap_and (cpumask_bits (dstp ), cpumask_bits (src1p ),
526- cpumask_bits (src2p ), nr_cpumask_bits );
553+ cpumask_bits (src2p ), small_cpumask_bits );
527554}
528555
529556/**
@@ -536,7 +563,7 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
536563 const struct cpumask * src2p )
537564{
538565 bitmap_or (cpumask_bits (dstp ), cpumask_bits (src1p ),
539- cpumask_bits (src2p ), nr_cpumask_bits );
566+ cpumask_bits (src2p ), small_cpumask_bits );
540567}
541568
542569/**
@@ -550,7 +577,7 @@ static inline void cpumask_xor(struct cpumask *dstp,
550577 const struct cpumask * src2p )
551578{
552579 bitmap_xor (cpumask_bits (dstp ), cpumask_bits (src1p ),
553- cpumask_bits (src2p ), nr_cpumask_bits );
580+ cpumask_bits (src2p ), small_cpumask_bits );
554581}
555582
556583/**
@@ -566,19 +593,7 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
566593 const struct cpumask * src2p )
567594{
568595 return bitmap_andnot (cpumask_bits (dstp ), cpumask_bits (src1p ),
569- cpumask_bits (src2p ), nr_cpumask_bits );
570- }
571-
572- /**
573- * cpumask_complement - *dstp = ~*srcp
574- * @dstp: the cpumask result
575- * @srcp: the input to invert
576- */
577- static inline void cpumask_complement (struct cpumask * dstp ,
578- const struct cpumask * srcp )
579- {
580- bitmap_complement (cpumask_bits (dstp ), cpumask_bits (srcp ),
581- nr_cpumask_bits );
596+ cpumask_bits (src2p ), small_cpumask_bits );
582597}
583598
584599/**
@@ -590,7 +605,7 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
590605 const struct cpumask * src2p )
591606{
592607 return bitmap_equal (cpumask_bits (src1p ), cpumask_bits (src2p ),
593- nr_cpumask_bits );
608+ small_cpumask_bits );
594609}
595610
596611/**
@@ -604,7 +619,7 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
604619 const struct cpumask * src3p )
605620{
606621 return bitmap_or_equal (cpumask_bits (src1p ), cpumask_bits (src2p ),
607- cpumask_bits (src3p ), nr_cpumask_bits );
622+ cpumask_bits (src3p ), small_cpumask_bits );
608623}
609624
610625/**
@@ -616,7 +631,7 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
616631 const struct cpumask * src2p )
617632{
618633 return bitmap_intersects (cpumask_bits (src1p ), cpumask_bits (src2p ),
619- nr_cpumask_bits );
634+ small_cpumask_bits );
620635}
621636
622637/**
@@ -630,7 +645,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
630645 const struct cpumask * src2p )
631646{
632647 return bitmap_subset (cpumask_bits (src1p ), cpumask_bits (src2p ),
633- nr_cpumask_bits );
648+ small_cpumask_bits );
634649}
635650
636651/**
@@ -639,7 +654,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
639654 */
640655static inline bool cpumask_empty (const struct cpumask * srcp )
641656{
642- return bitmap_empty (cpumask_bits (srcp ), nr_cpumask_bits );
657+ return bitmap_empty (cpumask_bits (srcp ), small_cpumask_bits );
643658}
644659
645660/**
@@ -657,7 +672,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
657672 */
658673static inline unsigned int cpumask_weight (const struct cpumask * srcp )
659674{
660- return bitmap_weight (cpumask_bits (srcp ), nr_cpumask_bits );
675+ return bitmap_weight (cpumask_bits (srcp ), small_cpumask_bits );
661676}
662677
663678/**
@@ -668,7 +683,7 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
668683static inline unsigned int cpumask_weight_and (const struct cpumask * srcp1 ,
669684 const struct cpumask * srcp2 )
670685{
671- return bitmap_weight_and (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ), nr_cpumask_bits );
686+ return bitmap_weight_and (cpumask_bits (srcp1 ), cpumask_bits (srcp2 ), small_cpumask_bits );
672687}
673688
674689/**
@@ -681,7 +696,7 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
681696 const struct cpumask * srcp , int n )
682697{
683698 bitmap_shift_right (cpumask_bits (dstp ), cpumask_bits (srcp ), n ,
684- nr_cpumask_bits );
699+ small_cpumask_bits );
685700}
686701
687702/**
@@ -705,7 +720,7 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
705720static inline void cpumask_copy (struct cpumask * dstp ,
706721 const struct cpumask * srcp )
707722{
708- bitmap_copy (cpumask_bits (dstp ), cpumask_bits (srcp ), nr_cpumask_bits );
723+ bitmap_copy (cpumask_bits (dstp ), cpumask_bits (srcp ), large_cpumask_bits );
709724}
710725
711726/**
@@ -789,7 +804,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
789804 */
790805static inline unsigned int cpumask_size (void )
791806{
792- return BITS_TO_LONGS (nr_cpumask_bits ) * sizeof (long );
807+ return BITS_TO_LONGS (large_cpumask_bits ) * sizeof (long );
793808}
794809
795810/*
0 commit comments