Skip to content

Commit d690cb8

Browse files
Christoph Hellwigaxboe
authored andcommitted
block: add an API to atomically update queue limits
Add a new queue_limits_{start,commit}_update pair of functions that allows taking an atomic snapshot of queue limits, update it, and commit it if it passes validity checking. Also use the low-level validation helper to implement blk_set_default_limits instead of duplicating the initialization. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Damien Le Moal <dlemoal@kernel.org> Reviewed-by: Hannes Reinecke <hare@suse.de> Link: https://lore.kernel.org/r/20240213073425.1621680-5-hch@lst.de Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent c490f22 commit d690cb8

File tree

4 files changed

+217
-37
lines changed

4 files changed

+217
-37
lines changed

block/blk-core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ struct request_queue *blk_alloc_queue(int node_id)
425425
mutex_init(&q->debugfs_mutex);
426426
mutex_init(&q->sysfs_lock);
427427
mutex_init(&q->sysfs_dir_lock);
428+
mutex_init(&q->limits_lock);
428429
mutex_init(&q->rq_qos_mutex);
429430
spin_lock_init(&q->queue_lock);
430431

block/blk-settings.c

Lines changed: 192 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -25,42 +25,6 @@ void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
2525
}
2626
EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
2727

28-
/**
29-
* blk_set_default_limits - reset limits to default values
30-
* @lim: the queue_limits structure to reset
31-
*
32-
* Description:
33-
* Returns a queue_limit struct to its default state.
34-
*/
35-
void blk_set_default_limits(struct queue_limits *lim)
36-
{
37-
lim->max_segments = BLK_MAX_SEGMENTS;
38-
lim->max_discard_segments = 1;
39-
lim->max_integrity_segments = 0;
40-
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
41-
lim->virt_boundary_mask = 0;
42-
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
43-
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
44-
lim->max_user_sectors = lim->max_dev_sectors = 0;
45-
lim->chunk_sectors = 0;
46-
lim->max_write_zeroes_sectors = 0;
47-
lim->max_zone_append_sectors = 0;
48-
lim->max_discard_sectors = 0;
49-
lim->max_hw_discard_sectors = 0;
50-
lim->max_secure_erase_sectors = 0;
51-
lim->discard_granularity = 512;
52-
lim->discard_alignment = 0;
53-
lim->discard_misaligned = 0;
54-
lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
55-
lim->bounce = BLK_BOUNCE_NONE;
56-
lim->alignment_offset = 0;
57-
lim->io_opt = 0;
58-
lim->misaligned = 0;
59-
lim->zoned = false;
60-
lim->zone_write_granularity = 0;
61-
lim->dma_alignment = 511;
62-
}
63-
6428
/**
6529
* blk_set_stacking_limits - set default limits for stacking devices
6630
* @lim: the queue_limits structure to reset
@@ -101,6 +65,198 @@ static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
10165
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
10266
}
10367

68+
static int blk_validate_zoned_limits(struct queue_limits *lim)
69+
{
70+
if (!lim->zoned) {
71+
if (WARN_ON_ONCE(lim->max_open_zones) ||
72+
WARN_ON_ONCE(lim->max_active_zones) ||
73+
WARN_ON_ONCE(lim->zone_write_granularity) ||
74+
WARN_ON_ONCE(lim->max_zone_append_sectors))
75+
return -EINVAL;
76+
return 0;
77+
}
78+
79+
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
80+
return -EINVAL;
81+
82+
if (lim->zone_write_granularity < lim->logical_block_size)
83+
lim->zone_write_granularity = lim->logical_block_size;
84+
85+
if (lim->max_zone_append_sectors) {
86+
/*
87+
* The Zone Append size is limited by the maximum I/O size
88+
* and the zone size given that it can't span zones.
89+
*/
90+
lim->max_zone_append_sectors =
91+
min3(lim->max_hw_sectors,
92+
lim->max_zone_append_sectors,
93+
lim->chunk_sectors);
94+
}
95+
96+
return 0;
97+
}
98+
99+
/*
100+
* Check that the limits in lim are valid, initialize defaults for unset
101+
* values, and cap values based on others where needed.
102+
*/
103+
static int blk_validate_limits(struct queue_limits *lim)
104+
{
105+
unsigned int max_hw_sectors;
106+
107+
/*
108+
* Unless otherwise specified, default to 512 byte logical blocks and a
109+
* physical block size equal to the logical block size.
110+
*/
111+
if (!lim->logical_block_size)
112+
lim->logical_block_size = SECTOR_SIZE;
113+
if (lim->physical_block_size < lim->logical_block_size)
114+
lim->physical_block_size = lim->logical_block_size;
115+
116+
/*
117+
* The minimum I/O size defaults to the physical block size unless
118+
* explicitly overridden.
119+
*/
120+
if (lim->io_min < lim->physical_block_size)
121+
lim->io_min = lim->physical_block_size;
122+
123+
/*
124+
* max_hw_sectors has a somewhat weird default for historical reason,
125+
* but driver really should set their own instead of relying on this
126+
* value.
127+
*
128+
* The block layer relies on the fact that every driver can
129+
* handle at lest a page worth of data per I/O, and needs the value
130+
* aligned to the logical block size.
131+
*/
132+
if (!lim->max_hw_sectors)
133+
lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
134+
if (WARN_ON_ONCE(lim->max_hw_sectors < PAGE_SECTORS))
135+
return -EINVAL;
136+
lim->max_hw_sectors = round_down(lim->max_hw_sectors,
137+
lim->logical_block_size >> SECTOR_SHIFT);
138+
139+
/*
140+
* The actual max_sectors value is a complex beast and also takes the
141+
* max_dev_sectors value (set by SCSI ULPs) and a user configurable
142+
* value into account. The ->max_sectors value is always calculated
143+
* from these, so directly setting it won't have any effect.
144+
*/
145+
max_hw_sectors = min_not_zero(lim->max_hw_sectors,
146+
lim->max_dev_sectors);
147+
if (lim->max_user_sectors) {
148+
if (lim->max_user_sectors > max_hw_sectors ||
149+
lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
150+
return -EINVAL;
151+
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
152+
} else {
153+
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
154+
}
155+
lim->max_sectors = round_down(lim->max_sectors,
156+
lim->logical_block_size >> SECTOR_SHIFT);
157+
158+
/*
159+
* Random default for the maximum number of segments. Driver should not
160+
* rely on this and set their own.
161+
*/
162+
if (!lim->max_segments)
163+
lim->max_segments = BLK_MAX_SEGMENTS;
164+
165+
lim->max_discard_sectors = lim->max_hw_discard_sectors;
166+
if (!lim->max_discard_segments)
167+
lim->max_discard_segments = 1;
168+
169+
if (lim->discard_granularity < lim->physical_block_size)
170+
lim->discard_granularity = lim->physical_block_size;
171+
172+
/*
173+
* By default there is no limit on the segment boundary alignment,
174+
* but if there is one it can't be smaller than the page size as
175+
* that would break all the normal I/O patterns.
176+
*/
177+
if (!lim->seg_boundary_mask)
178+
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
179+
if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1))
180+
return -EINVAL;
181+
182+
/*
183+
* The maximum segment size has an odd historic 64k default that
184+
* drivers probably should override. Just like the I/O size we
185+
* require drivers to at least handle a full page per segment.
186+
*/
187+
if (!lim->max_segment_size)
188+
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
189+
if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE))
190+
return -EINVAL;
191+
192+
/*
193+
* Devices that require a virtual boundary do not support scatter/gather
194+
* I/O natively, but instead require a descriptor list entry for each
195+
* page (which might not be identical to the Linux PAGE_SIZE). Because
196+
* of that they are not limited by our notion of "segment size".
197+
*/
198+
if (lim->virt_boundary_mask) {
199+
if (WARN_ON_ONCE(lim->max_segment_size &&
200+
lim->max_segment_size != UINT_MAX))
201+
return -EINVAL;
202+
lim->max_segment_size = UINT_MAX;
203+
}
204+
205+
/*
206+
* We require drivers to at least do logical block aligned I/O, but
207+
* historically could not check for that due to the separate calls
208+
* to set the limits. Once the transition is finished the check
209+
* below should be narrowed down to check the logical block size.
210+
*/
211+
if (!lim->dma_alignment)
212+
lim->dma_alignment = SECTOR_SIZE - 1;
213+
if (WARN_ON_ONCE(lim->dma_alignment > PAGE_SIZE))
214+
return -EINVAL;
215+
216+
if (lim->alignment_offset) {
217+
lim->alignment_offset &= (lim->physical_block_size - 1);
218+
lim->misaligned = 0;
219+
}
220+
221+
return blk_validate_zoned_limits(lim);
222+
}
223+
224+
/*
225+
* Set the default limits for a newly allocated queue. @lim contains the
226+
* initial limits set by the driver, which could be no limit in which case
227+
* all fields are cleared to zero.
228+
*/
229+
int blk_set_default_limits(struct queue_limits *lim)
230+
{
231+
return blk_validate_limits(lim);
232+
}
233+
234+
/**
235+
* queue_limits_commit_update - commit an atomic update of queue limits
236+
* @q: queue to update
237+
* @lim: limits to apply
238+
*
239+
* Apply the limits in @lim that were obtained from queue_limits_start_update()
240+
* and updated by the caller to @q.
241+
*
242+
* Returns 0 if successful, else a negative error code.
243+
*/
244+
int queue_limits_commit_update(struct request_queue *q,
245+
struct queue_limits *lim)
246+
__releases(q->limits_lock)
247+
{
248+
int error = blk_validate_limits(lim);
249+
250+
if (!error) {
251+
q->limits = *lim;
252+
if (q->disk)
253+
blk_apply_bdi_limits(q->disk->bdi, lim);
254+
}
255+
mutex_unlock(&q->limits_lock);
256+
return error;
257+
}
258+
EXPORT_SYMBOL_GPL(queue_limits_commit_update);
259+
104260
/**
105261
* blk_queue_bounce_limit - set bounce buffer limit for queue
106262
* @q: the request queue for the device

block/blk.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ void blk_rq_set_mixed_merge(struct request *rq);
330330
bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
331331
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
332332

333-
void blk_set_default_limits(struct queue_limits *lim);
333+
int blk_set_default_limits(struct queue_limits *lim);
334334
int blk_dev_init(void);
335335

336336
/*

include/linux/blkdev.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ struct request_queue {
474474

475475
struct mutex sysfs_lock;
476476
struct mutex sysfs_dir_lock;
477+
struct mutex limits_lock;
477478

478479
/*
479480
* for reusing dead hctx instance in case of updating
@@ -862,6 +863,28 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
862863
return chunk_sectors - (offset & (chunk_sectors - 1));
863864
}
864865

866+
/**
867+
* queue_limits_start_update - start an atomic update of queue limits
868+
* @q: queue to update
869+
*
870+
* This functions starts an atomic update of the queue limits. It takes a lock
871+
* to prevent other updates and returns a snapshot of the current limits that
872+
* the caller can modify. The caller must call queue_limits_commit_update()
873+
* to finish the update.
874+
*
875+
* Context: process context. The caller must have frozen the queue or ensured
876+
* that there is outstanding I/O by other means.
877+
*/
878+
static inline struct queue_limits
879+
queue_limits_start_update(struct request_queue *q)
880+
__acquires(q->limits_lock)
881+
{
882+
mutex_lock(&q->limits_lock);
883+
return q->limits;
884+
}
885+
int queue_limits_commit_update(struct request_queue *q,
886+
struct queue_limits *lim);
887+
865888
/*
866889
* Access functions for manipulating queue properties
867890
*/

0 commit comments

Comments
 (0)