@@ -270,22 +270,180 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
270270 return ret ;
271271}
272272
273+ struct blkdev_dio {
274+ union {
275+ struct kiocb * iocb ;
276+ struct task_struct * waiter ;
277+ };
278+ size_t size ;
279+ atomic_t ref ;
280+ bool multi_bio : 1 ;
281+ bool should_dirty : 1 ;
282+ bool is_sync : 1 ;
283+ struct bio bio ;
284+ };
285+
286+ static struct bio_set * blkdev_dio_pool __read_mostly ;
287+
288+ static void blkdev_bio_end_io (struct bio * bio )
289+ {
290+ struct blkdev_dio * dio = bio -> bi_private ;
291+ bool should_dirty = dio -> should_dirty ;
292+
293+ if (dio -> multi_bio && !atomic_dec_and_test (& dio -> ref )) {
294+ if (bio -> bi_error && !dio -> bio .bi_error )
295+ dio -> bio .bi_error = bio -> bi_error ;
296+ } else {
297+ if (!dio -> is_sync ) {
298+ struct kiocb * iocb = dio -> iocb ;
299+ ssize_t ret = dio -> bio .bi_error ;
300+
301+ if (likely (!ret )) {
302+ ret = dio -> size ;
303+ iocb -> ki_pos += ret ;
304+ }
305+
306+ dio -> iocb -> ki_complete (iocb , ret , 0 );
307+ bio_put (& dio -> bio );
308+ } else {
309+ struct task_struct * waiter = dio -> waiter ;
310+
311+ WRITE_ONCE (dio -> waiter , NULL );
312+ wake_up_process (waiter );
313+ }
314+ }
315+
316+ if (should_dirty ) {
317+ bio_check_pages_dirty (bio );
318+ } else {
319+ struct bio_vec * bvec ;
320+ int i ;
321+
322+ bio_for_each_segment_all (bvec , bio , i )
323+ put_page (bvec -> bv_page );
324+ bio_put (bio );
325+ }
326+ }
327+
273328static ssize_t
274- blkdev_direct_IO (struct kiocb * iocb , struct iov_iter * iter )
329+ __blkdev_direct_IO (struct kiocb * iocb , struct iov_iter * iter , int nr_pages )
275330{
276331 struct file * file = iocb -> ki_filp ;
277332 struct inode * inode = bdev_file_inode (file );
333+ struct block_device * bdev = I_BDEV (inode );
334+ unsigned blkbits = blksize_bits (bdev_logical_block_size (bdev ));
335+ struct blkdev_dio * dio ;
336+ struct bio * bio ;
337+ bool is_read = (iov_iter_rw (iter ) == READ );
338+ loff_t pos = iocb -> ki_pos ;
339+ blk_qc_t qc = BLK_QC_T_NONE ;
340+ int ret ;
341+
342+ if ((pos | iov_iter_alignment (iter )) & ((1 << blkbits ) - 1 ))
343+ return - EINVAL ;
344+
345+ bio = bio_alloc_bioset (GFP_KERNEL , nr_pages , blkdev_dio_pool );
346+ bio_get (bio ); /* extra ref for the completion handler */
347+
348+ dio = container_of (bio , struct blkdev_dio , bio );
349+ dio -> is_sync = is_sync_kiocb (iocb );
350+ if (dio -> is_sync )
351+ dio -> waiter = current ;
352+ else
353+ dio -> iocb = iocb ;
354+
355+ dio -> size = 0 ;
356+ dio -> multi_bio = false;
357+ dio -> should_dirty = is_read && (iter -> type == ITER_IOVEC );
358+
359+ for (;;) {
360+ bio -> bi_bdev = bdev ;
361+ bio -> bi_iter .bi_sector = pos >> blkbits ;
362+ bio -> bi_private = dio ;
363+ bio -> bi_end_io = blkdev_bio_end_io ;
364+
365+ ret = bio_iov_iter_get_pages (bio , iter );
366+ if (unlikely (ret )) {
367+ bio -> bi_error = ret ;
368+ bio_endio (bio );
369+ break ;
370+ }
371+
372+ if (is_read ) {
373+ bio -> bi_opf = REQ_OP_READ ;
374+ if (dio -> should_dirty )
375+ bio_set_pages_dirty (bio );
376+ } else {
377+ bio -> bi_opf = dio_bio_write_op (iocb );
378+ task_io_account_write (bio -> bi_iter .bi_size );
379+ }
380+
381+ dio -> size += bio -> bi_iter .bi_size ;
382+ pos += bio -> bi_iter .bi_size ;
383+
384+ nr_pages = iov_iter_npages (iter , BIO_MAX_PAGES );
385+ if (!nr_pages ) {
386+ qc = submit_bio (bio );
387+ break ;
388+ }
389+
390+ if (!dio -> multi_bio ) {
391+ dio -> multi_bio = true;
392+ atomic_set (& dio -> ref , 2 );
393+ } else {
394+ atomic_inc (& dio -> ref );
395+ }
396+
397+ submit_bio (bio );
398+ bio = bio_alloc (GFP_KERNEL , nr_pages );
399+ }
400+
401+ if (!dio -> is_sync )
402+ return - EIOCBQUEUED ;
403+
404+ for (;;) {
405+ set_current_state (TASK_UNINTERRUPTIBLE );
406+ if (!READ_ONCE (dio -> waiter ))
407+ break ;
408+
409+ if (!(iocb -> ki_flags & IOCB_HIPRI ) ||
410+ !blk_mq_poll (bdev_get_queue (bdev ), qc ))
411+ io_schedule ();
412+ }
413+ __set_current_state (TASK_RUNNING );
414+
415+ ret = dio -> bio .bi_error ;
416+ if (likely (!ret )) {
417+ ret = dio -> size ;
418+ iocb -> ki_pos += ret ;
419+ }
420+
421+ bio_put (& dio -> bio );
422+ return ret ;
423+ }
424+
425+ static ssize_t
426+ blkdev_direct_IO (struct kiocb * iocb , struct iov_iter * iter )
427+ {
278428 int nr_pages ;
279429
280430 nr_pages = iov_iter_npages (iter , BIO_MAX_PAGES + 1 );
281431 if (!nr_pages )
282432 return 0 ;
283433 if (is_sync_kiocb (iocb ) && nr_pages <= BIO_MAX_PAGES )
284434 return __blkdev_direct_IO_simple (iocb , iter , nr_pages );
285- return __blockdev_direct_IO (iocb , inode , I_BDEV (inode ), iter ,
286- blkdev_get_block , NULL , NULL ,
287- DIO_SKIP_DIO_COUNT );
435+
436+ return __blkdev_direct_IO (iocb , iter , min (nr_pages , BIO_MAX_PAGES ));
437+ }
438+
439+ static __init int blkdev_init (void )
440+ {
441+ blkdev_dio_pool = bioset_create (4 , offsetof(struct blkdev_dio , bio ));
442+ if (!blkdev_dio_pool )
443+ return - ENOMEM ;
444+ return 0 ;
288445}
446+ module_init (blkdev_init );
289447
290448int __sync_blockdev (struct block_device * bdev , int wait )
291449{
0 commit comments