@@ -312,80 +312,80 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
312312}
313313
314314/*
315- * Figure out the bio's operation flags from the dio request, the
316- * mapping, and whether or not we want FUA. Note that we can end up
317- * clearing the WRITE_THROUGH flag in the dio request.
315+ * Use a FUA write if we need datasync semantics and this is a pure data I/O
316+ * that doesn't require any metadata updates (including after I/O completion
317+ * such as unwritten extent conversion) and the underlying device either
318+ * doesn't have a volatile write cache or supports FUA.
319+ * This allows us to avoid cache flushes on I/O completion.
318320 */
319- static inline blk_opf_t iomap_dio_bio_opflags ( struct iomap_dio * dio ,
320- const struct iomap * iomap , bool use_fua , bool atomic_hw )
321+ static inline bool iomap_dio_can_use_fua ( const struct iomap * iomap ,
322+ struct iomap_dio * dio )
321323{
322- blk_opf_t opflags = REQ_SYNC | REQ_IDLE ;
323-
324- if (!(dio -> flags & IOMAP_DIO_WRITE ))
325- return REQ_OP_READ ;
326-
327- opflags |= REQ_OP_WRITE ;
328- if (use_fua )
329- opflags |= REQ_FUA ;
330- else
331- dio -> flags &= ~IOMAP_DIO_WRITE_THROUGH ;
332- if (atomic_hw )
333- opflags |= REQ_ATOMIC ;
334-
335- return opflags ;
324+ if (iomap -> flags & (IOMAP_F_SHARED | IOMAP_F_DIRTY ))
325+ return false;
326+ if (!(dio -> flags & IOMAP_DIO_WRITE_THROUGH ))
327+ return false;
328+ return !bdev_write_cache (iomap -> bdev ) || bdev_fua (iomap -> bdev );
336329}
337330
338331static int iomap_dio_bio_iter (struct iomap_iter * iter , struct iomap_dio * dio )
339332{
340333 const struct iomap * iomap = & iter -> iomap ;
341334 struct inode * inode = iter -> inode ;
342335 unsigned int fs_block_size = i_blocksize (inode ), pad ;
343- bool atomic_hw = iter -> flags & IOMAP_ATOMIC_HW ;
344336 const loff_t length = iomap_length (iter );
345337 loff_t pos = iter -> pos ;
346- blk_opf_t bio_opf ;
338+ blk_opf_t bio_opf = REQ_SYNC | REQ_IDLE ;
347339 struct bio * bio ;
348340 bool need_zeroout = false;
349- bool use_fua = false;
350341 int nr_pages , ret = 0 ;
351342 u64 copied = 0 ;
352343 size_t orig_count ;
353344
354- if (atomic_hw && length != iter -> len )
355- return - EINVAL ;
356-
357345 if ((pos | length ) & (bdev_logical_block_size (iomap -> bdev ) - 1 ) ||
358346 !bdev_iter_is_aligned (iomap -> bdev , dio -> submit .iter ))
359347 return - EINVAL ;
360348
361- if (iomap -> type == IOMAP_UNWRITTEN ) {
362- dio -> flags |= IOMAP_DIO_UNWRITTEN ;
363- need_zeroout = true;
364- }
349+ if (dio -> flags & IOMAP_DIO_WRITE ) {
350+ bio_opf |= REQ_OP_WRITE ;
351+
352+ if (iter -> flags & IOMAP_ATOMIC_HW ) {
353+ if (length != iter -> len )
354+ return - EINVAL ;
355+ bio_opf |= REQ_ATOMIC ;
356+ }
357+
358+ if (iomap -> type == IOMAP_UNWRITTEN ) {
359+ dio -> flags |= IOMAP_DIO_UNWRITTEN ;
360+ need_zeroout = true;
361+ }
365362
366- if (iomap -> flags & IOMAP_F_SHARED )
367- dio -> flags |= IOMAP_DIO_COW ;
363+ if (iomap -> flags & IOMAP_F_SHARED )
364+ dio -> flags |= IOMAP_DIO_COW ;
365+
366+ if (iomap -> flags & IOMAP_F_NEW ) {
367+ need_zeroout = true;
368+ } else if (iomap -> type == IOMAP_MAPPED ) {
369+ if (iomap_dio_can_use_fua (iomap , dio ))
370+ bio_opf |= REQ_FUA ;
371+ else
372+ dio -> flags &= ~IOMAP_DIO_WRITE_THROUGH ;
373+ }
368374
369- if (iomap -> flags & IOMAP_F_NEW ) {
370- need_zeroout = true;
371- } else if (iomap -> type == IOMAP_MAPPED ) {
372375 /*
373- * Use a FUA write if we need datasync semantics, this is a pure
374- * data IO that doesn't require any metadata updates (including
375- * after IO completion such as unwritten extent conversion) and
376- * the underlying device either supports FUA or doesn't have
377- * a volatile write cache. This allows us to avoid cache flushes
378- * on IO completion. If we can't use writethrough and need to
379- * sync, disable in-task completions as dio completion will
380- * need to call generic_write_sync() which will do a blocking
381- * fsync / cache flush call.
376+ * We can only do deferred completion for pure overwrites that
377+ * don't require additional I/O at completion time.
378+ *
379+ * This rules out writes that need zeroing or extent conversion,
380+ * extend the file size, or issue metadata I/O or cache flushes
381+ * during completion processing.
382382 */
383- if (!(iomap -> flags & (IOMAP_F_SHARED |IOMAP_F_DIRTY )) &&
384- (dio -> flags & IOMAP_DIO_WRITE_THROUGH ) &&
385- (bdev_fua (iomap -> bdev ) || !bdev_write_cache (iomap -> bdev )))
386- use_fua = true;
387- else if (dio -> flags & IOMAP_DIO_NEED_SYNC )
383+ if (need_zeroout || (pos >= i_size_read (inode )) ||
384+ ((dio -> flags & IOMAP_DIO_NEED_SYNC ) &&
385+ !(bio_opf & REQ_FUA )))
388386 dio -> flags &= ~IOMAP_DIO_CALLER_COMP ;
387+ } else {
388+ bio_opf |= REQ_OP_READ ;
389389 }
390390
391391 /*
@@ -399,18 +399,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
399399 if (!iov_iter_count (dio -> submit .iter ))
400400 goto out ;
401401
402- /*
403- * We can only do deferred completion for pure overwrites that
404- * don't require additional IO at completion. This rules out
405- * writes that need zeroing or extent conversion, extend
406- * the file size, or issue journal IO or cache flushes
407- * during completion processing.
408- */
409- if (need_zeroout ||
410- ((dio -> flags & IOMAP_DIO_NEED_SYNC ) && !use_fua ) ||
411- ((dio -> flags & IOMAP_DIO_WRITE ) && pos >= i_size_read (inode )))
412- dio -> flags &= ~IOMAP_DIO_CALLER_COMP ;
413-
414402 /*
415403 * The rules for polled IO completions follow the guidelines as the
416404 * ones we set for inline and deferred completions. If none of those
@@ -428,8 +416,6 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
428416 goto out ;
429417 }
430418
431- bio_opf = iomap_dio_bio_opflags (dio , iomap , use_fua , atomic_hw );
432-
433419 nr_pages = bio_iov_vecs_to_alloc (dio -> submit .iter , BIO_MAX_VECS );
434420 do {
435421 size_t n ;
@@ -461,7 +447,7 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio)
461447 }
462448
463449 n = bio -> bi_iter .bi_size ;
464- if (WARN_ON_ONCE (atomic_hw && n != length )) {
450+ if (WARN_ON_ONCE (( bio_opf & REQ_ATOMIC ) && n != length )) {
465451 /*
466452 * This bio should have covered the complete length,
467453 * which it doesn't, so error. We may need to zero out
0 commit comments