Skip to content

Commit 30f5300

Browse files
committed
Merge patch series "iomap: incremental per-operation iter advance"
Brian Foster <bfoster@redhat.com> says: This is a first pass at supporting more incremental, per-operation iomap_iter advancement. The motivation for this is folio_batch support for zero range, where the fs provides a batch of folios to process in certain situations. Since the batch may not be logically contiguous, processing loops require a bit more flexibility than the typical offset based iteration. The current iteration model basically has the operation _iter() handler lift the pos/length wrt to the current iomap out of the iomap_iter, process it locally, then return the result to be stored in iter.processed. The latter is overloaded with error status, so the handler must decide whether to return error or a partial completion (i.e. consider a short write). iomap_iter() then uses the result to advance the iter and look up the next iomap. The updated model proposed in this series is to allow an operation to advance the iter itself as subranges are processed and then return success or failure in iter.processed. Note that at least initially, this is implemented as an optional mode to minimize churn. This series converts operations that use iomap_write_begin(): buffered write, unshare, and zero range. The main advantage of this is that the future folio_batch work can be plumbed down into the folio get path more naturally, and the associated codepath can advance the iter itself when appropriate rather than require each operation to manage the gaps in the range being processed. Some secondary advantages are a little less boilerplate code for walking ranges and more clear semantics for partial completions in the event of errors, etc. * patches from https://lore.kernel.org/r/20250207143253.314068-1-bfoster@redhat.com: iomap: advance the iter directly on zero range iomap: advance the iter directly on unshare range iomap: advance the iter directly on buffered writes iomap: support incremental iomap_iter advances iomap: export iomap_iter_advance() and return remaining length iomap: lift iter termination logic from iomap_iter_advance() iomap: lift error code check out of iomap_iter_advance() iomap: refactor iomap_iter() length check and tracepoint iomap: split out iomap check and reset logic from iter advance iomap: factor out iomap length helper Link: https://lore.kernel.org/r/20250207143253.314068-1-bfoster@redhat.com Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents f878973 + cbad829 commit 30f5300

File tree

3 files changed

+122
-79
lines changed

3 files changed

+122
-79
lines changed

fs/iomap/buffered-io.c

Lines changed: 31 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -905,8 +905,6 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
905905

906906
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
907907
{
908-
loff_t length = iomap_length(iter);
909-
loff_t pos = iter->pos;
910908
ssize_t total_written = 0;
911909
long status = 0;
912910
struct address_space *mapping = iter->inode->i_mapping;
@@ -919,7 +917,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
919917
size_t offset; /* Offset into folio */
920918
size_t bytes; /* Bytes to write to folio */
921919
size_t copied; /* Bytes copied from user */
922-
size_t written; /* Bytes have been written */
920+
u64 written; /* Bytes have been written */
921+
loff_t pos = iter->pos;
923922

924923
bytes = iov_iter_count(i);
925924
retry:
@@ -930,8 +929,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
930929
if (unlikely(status))
931930
break;
932931

933-
if (bytes > length)
934-
bytes = length;
932+
if (bytes > iomap_length(iter))
933+
bytes = iomap_length(iter);
935934

936935
/*
937936
* Bring in the user page that we'll copy from _first_.
@@ -1002,17 +1001,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
10021001
goto retry;
10031002
}
10041003
} else {
1005-
pos += written;
10061004
total_written += written;
1007-
length -= written;
1005+
iomap_iter_advance(iter, &written);
10081006
}
1009-
} while (iov_iter_count(i) && length);
1007+
} while (iov_iter_count(i) && iomap_length(iter));
10101008

1011-
if (status == -EAGAIN) {
1012-
iov_iter_revert(i, total_written);
1013-
return -EAGAIN;
1014-
}
1015-
return total_written ? total_written : status;
1009+
return total_written ? 0 : status;
10161010
}
10171011

10181012
ssize_t
@@ -1269,20 +1263,19 @@ EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
12691263
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
12701264
{
12711265
struct iomap *iomap = &iter->iomap;
1272-
loff_t pos = iter->pos;
1273-
loff_t length = iomap_length(iter);
1274-
loff_t written = 0;
1266+
u64 bytes = iomap_length(iter);
1267+
int status;
12751268

12761269
if (!iomap_want_unshare_iter(iter))
1277-
return length;
1270+
return iomap_iter_advance(iter, &bytes);
12781271

12791272
do {
12801273
struct folio *folio;
1281-
int status;
12821274
size_t offset;
1283-
size_t bytes = min_t(u64, SIZE_MAX, length);
1275+
loff_t pos = iter->pos;
12841276
bool ret;
12851277

1278+
bytes = min_t(u64, SIZE_MAX, bytes);
12861279
status = iomap_write_begin(iter, pos, bytes, &folio);
12871280
if (unlikely(status))
12881281
return status;
@@ -1300,14 +1293,14 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
13001293

13011294
cond_resched();
13021295

1303-
pos += bytes;
1304-
written += bytes;
1305-
length -= bytes;
1306-
13071296
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
1308-
} while (length > 0);
13091297

1310-
return written;
1298+
status = iomap_iter_advance(iter, &bytes);
1299+
if (status)
1300+
break;
1301+
} while (bytes > 0);
1302+
1303+
return status;
13111304
}
13121305

13131306
int
@@ -1348,17 +1341,16 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
13481341

13491342
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
13501343
{
1351-
loff_t pos = iter->pos;
1352-
loff_t length = iomap_length(iter);
1353-
loff_t written = 0;
1344+
u64 bytes = iomap_length(iter);
1345+
int status;
13541346

13551347
do {
13561348
struct folio *folio;
1357-
int status;
13581349
size_t offset;
1359-
size_t bytes = min_t(u64, SIZE_MAX, length);
1350+
loff_t pos = iter->pos;
13601351
bool ret;
13611352

1353+
bytes = min_t(u64, SIZE_MAX, bytes);
13621354
status = iomap_write_begin(iter, pos, bytes, &folio);
13631355
if (status)
13641356
return status;
@@ -1379,14 +1371,14 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
13791371
if (WARN_ON_ONCE(!ret))
13801372
return -EIO;
13811373

1382-
pos += bytes;
1383-
length -= bytes;
1384-
written += bytes;
1385-
} while (length > 0);
1374+
status = iomap_iter_advance(iter, &bytes);
1375+
if (status)
1376+
break;
1377+
} while (bytes > 0);
13861378

13871379
if (did_zero)
13881380
*did_zero = true;
1389-
return written;
1381+
return status;
13901382
}
13911383

13921384
int
@@ -1440,11 +1432,14 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
14401432

14411433
if (srcmap->type == IOMAP_HOLE ||
14421434
srcmap->type == IOMAP_UNWRITTEN) {
1443-
loff_t proc = iomap_length(&iter);
1435+
s64 proc;
14441436

14451437
if (range_dirty) {
14461438
range_dirty = false;
14471439
proc = iomap_zero_iter_flush_and_stale(&iter);
1440+
} else {
1441+
u64 length = iomap_length(&iter);
1442+
proc = iomap_iter_advance(&iter, &length);
14481443
}
14491444
iter.processed = proc;
14501445
continue;

fs/iomap/iter.c

Lines changed: 65 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,25 @@
77
#include <linux/iomap.h>
88
#include "trace.h"
99

10-
/*
11-
* Advance to the next range we need to map.
12-
*
13-
* If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
14-
* processed - it was aborted because the extent the iomap spanned may have been
15-
* changed during the operation. In this case, the iteration behaviour is to
16-
* remap the unprocessed range of the iter, and that means we may need to remap
17-
* even when we've made no progress (i.e. iter->processed = 0). Hence the
18-
* "finished iterating" case needs to distinguish between
19-
* (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
20-
* need to remap the entire remaining range.
21-
*/
22-
static inline int iomap_iter_advance(struct iomap_iter *iter)
10+
static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
2311
{
24-
bool stale = iter->iomap.flags & IOMAP_F_STALE;
25-
int ret = 1;
26-
27-
/* handle the previous iteration (if any) */
28-
if (iter->iomap.length) {
29-
if (iter->processed < 0)
30-
return iter->processed;
31-
if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
32-
return -EIO;
33-
iter->pos += iter->processed;
34-
iter->len -= iter->processed;
35-
if (!iter->len || (!iter->processed && !stale))
36-
ret = 0;
37-
}
38-
39-
/* clear the per iteration state */
4012
iter->processed = 0;
4113
memset(&iter->iomap, 0, sizeof(iter->iomap));
4214
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
43-
return ret;
15+
}
16+
17+
/*
18+
* Advance the current iterator position and output the length remaining for the
19+
* current mapping.
20+
*/
21+
int iomap_iter_advance(struct iomap_iter *iter, u64 *count)
22+
{
23+
if (WARN_ON_ONCE(*count > iomap_length(iter)))
24+
return -EIO;
25+
iter->pos += *count;
26+
iter->len -= *count;
27+
*count = iomap_length(iter);
28+
return 0;
4429
}
4530

4631
static inline void iomap_iter_done(struct iomap_iter *iter)
@@ -50,6 +35,8 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
5035
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
5136
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
5237

38+
iter->iter_start_pos = iter->pos;
39+
5340
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
5441
if (iter->srcmap.type != IOMAP_HOLE)
5542
trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
@@ -72,21 +59,62 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
7259
*/
7360
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
7461
{
62+
bool stale = iter->iomap.flags & IOMAP_F_STALE;
63+
ssize_t advanced = iter->processed > 0 ? iter->processed : 0;
64+
u64 olen = iter->len;
65+
s64 processed;
7566
int ret;
7667

77-
if (iter->iomap.length && ops->iomap_end) {
78-
ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
79-
iter->processed > 0 ? iter->processed : 0,
80-
iter->flags, &iter->iomap);
81-
if (ret < 0 && !iter->processed)
68+
trace_iomap_iter(iter, ops, _RET_IP_);
69+
70+
if (!iter->iomap.length)
71+
goto begin;
72+
73+
/*
74+
* If iter.processed is zero, the op may still have advanced the iter
75+
* itself. Calculate the advanced and original length bytes based on how
76+
* far pos has advanced for ->iomap_end().
77+
*/
78+
if (!advanced) {
79+
advanced = iter->pos - iter->iter_start_pos;
80+
olen += advanced;
81+
}
82+
83+
if (ops->iomap_end) {
84+
ret = ops->iomap_end(iter->inode, iter->iter_start_pos,
85+
iomap_length_trim(iter, iter->iter_start_pos,
86+
olen),
87+
advanced, iter->flags, &iter->iomap);
88+
if (ret < 0 && !advanced)
8289
return ret;
8390
}
8491

85-
trace_iomap_iter(iter, ops, _RET_IP_);
86-
ret = iomap_iter_advance(iter);
92+
processed = iter->processed;
93+
if (processed < 0) {
94+
iomap_iter_reset_iomap(iter);
95+
return processed;
96+
}
97+
98+
/*
99+
* Advance the iter and clear state from the previous iteration. This
100+
* passes iter->processed because that reflects the bytes processed but
101+
* not yet advanced by the iter handler.
102+
*
103+
* Use iter->len to determine whether to continue onto the next mapping.
104+
* Explicitly terminate in the case where the current iter has not
105+
* advanced at all (i.e. no work was done for some reason) unless the
106+
* mapping has been marked stale and needs to be reprocessed.
107+
*/
108+
ret = iomap_iter_advance(iter, &processed);
109+
if (!ret && iter->len > 0)
110+
ret = 1;
111+
if (ret > 0 && !advanced && !stale)
112+
ret = 0;
113+
iomap_iter_reset_iomap(iter);
87114
if (ret <= 0)
88115
return ret;
89116

117+
begin:
90118
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
91119
&iter->iomap, &iter->srcmap);
92120
if (ret < 0)

include/linux/iomap.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -218,8 +218,11 @@ struct iomap_ops {
218218
* calls to iomap_iter(). Treat as read-only in the body.
219219
* @len: The remaining length of the file segment we're operating on.
220220
* It is updated at the same time as @pos.
221-
* @processed: The number of bytes processed by the body in the most recent
222-
* iteration, or a negative errno. 0 causes the iteration to stop.
221+
* @iter_start_pos: The original start pos for the current iomap. Used for
222+
* incremental iter advance.
223+
* @processed: The number of bytes the most recent iteration needs iomap_iter()
224+
* to advance the iter, zero if the iter was already advanced, or a
225+
* negative errno for an error during the operation.
223226
* @flags: Zero or more of the iomap_begin flags above.
224227
* @iomap: Map describing the I/O iteration
225228
* @srcmap: Source map for COW operations
@@ -228,6 +231,7 @@ struct iomap_iter {
228231
struct inode *inode;
229232
loff_t pos;
230233
u64 len;
234+
loff_t iter_start_pos;
231235
s64 processed;
232236
unsigned flags;
233237
struct iomap iomap;
@@ -236,20 +240,36 @@ struct iomap_iter {
236240
};
237241

238242
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
243+
int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
239244

240245
/**
241-
* iomap_length - length of the current iomap iteration
246+
* iomap_length_trim - trimmed length of the current iomap iteration
242247
* @iter: iteration structure
248+
* @pos: File position to trim from.
249+
* @len: Length of the mapping to trim to.
243250
*
244-
* Returns the length that the operation applies to for the current iteration.
251+
* Returns a trimmed length that the operation applies to for the current
252+
* iteration.
245253
*/
246-
static inline u64 iomap_length(const struct iomap_iter *iter)
254+
static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos,
255+
u64 len)
247256
{
248257
u64 end = iter->iomap.offset + iter->iomap.length;
249258

250259
if (iter->srcmap.type != IOMAP_HOLE)
251260
end = min(end, iter->srcmap.offset + iter->srcmap.length);
252-
return min(iter->len, end - iter->pos);
261+
return min(len, end - pos);
262+
}
263+
264+
/**
265+
* iomap_length - length of the current iomap iteration
266+
* @iter: iteration structure
267+
*
268+
* Returns the length that the operation applies to for the current iteration.
269+
*/
270+
static inline u64 iomap_length(const struct iomap_iter *iter)
271+
{
272+
return iomap_length_trim(iter, iter->pos, iter->len);
253273
}
254274

255275
/**

0 commit comments

Comments
 (0)