@@ -165,6 +165,19 @@ struct io_kiocb {
165165#define IO_PLUG_THRESHOLD 2
166166#define IO_IOPOLL_BATCH 8
167167
168+ struct io_submit_state {
169+ struct blk_plug plug ;
170+
171+ /*
172+ * File reference cache
173+ */
174+ struct file * file ;
175+ unsigned int fd ;
176+ unsigned int has_refs ;
177+ unsigned int used_refs ;
178+ unsigned int ios_left ;
179+ };
180+
168181static struct kmem_cache * req_cachep ;
169182
170183static const struct file_operations io_uring_fops ;
@@ -332,9 +345,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
332345 struct list_head * done )
333346{
334347 void * reqs [IO_IOPOLL_BATCH ];
348+ int file_count , to_free ;
349+ struct file * file = NULL ;
335350 struct io_kiocb * req ;
336- int to_free = 0 ;
337351
352+ file_count = to_free = 0 ;
338353 while (!list_empty (done )) {
339354 req = list_first_entry (done , struct io_kiocb , list );
340355 list_del (& req -> list );
@@ -344,12 +359,28 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
344359 reqs [to_free ++ ] = req ;
345360 (* nr_events )++ ;
346361
347- fput (req -> rw .ki_filp );
362+ /*
363+ * Batched puts of the same file, to avoid dirtying the
364+ * file usage count multiple times, if avoidable.
365+ */
366+ if (!file ) {
367+ file = req -> rw .ki_filp ;
368+ file_count = 1 ;
369+ } else if (file == req -> rw .ki_filp ) {
370+ file_count ++ ;
371+ } else {
372+ fput_many (file , file_count );
373+ file = req -> rw .ki_filp ;
374+ file_count = 1 ;
375+ }
376+
348377 if (to_free == ARRAY_SIZE (reqs ))
349378 io_free_req_many (ctx , reqs , & to_free );
350379 }
351380 io_commit_cqring (ctx );
352381
382+ if (file )
383+ fput_many (file , file_count );
353384 io_free_req_many (ctx , reqs , & to_free );
354385}
355386
@@ -530,6 +561,48 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
530561 list_add_tail (& req -> list , & ctx -> poll_list );
531562}
532563
564+ static void io_file_put (struct io_submit_state * state , struct file * file )
565+ {
566+ if (!state ) {
567+ fput (file );
568+ } else if (state -> file ) {
569+ int diff = state -> has_refs - state -> used_refs ;
570+
571+ if (diff )
572+ fput_many (state -> file , diff );
573+ state -> file = NULL ;
574+ }
575+ }
576+
577+ /*
578+ * Get as many references to a file as we have IOs left in this submission,
579+ * assuming most submissions are for one file, or at least that each file
580+ * has more than one submission.
581+ */
582+ static struct file * io_file_get (struct io_submit_state * state , int fd )
583+ {
584+ if (!state )
585+ return fget (fd );
586+
587+ if (state -> file ) {
588+ if (state -> fd == fd ) {
589+ state -> used_refs ++ ;
590+ state -> ios_left -- ;
591+ return state -> file ;
592+ }
593+ io_file_put (state , NULL );
594+ }
595+ state -> file = fget_many (fd , state -> ios_left );
596+ if (!state -> file )
597+ return NULL ;
598+
599+ state -> fd = fd ;
600+ state -> has_refs = state -> ios_left ;
601+ state -> used_refs = 1 ;
602+ state -> ios_left -- ;
603+ return state -> file ;
604+ }
605+
533606/*
534607 * If we tracked the file through the SCM inflight mechanism, we could support
535608 * any file. For now, just ensure that anything potentially problematic is done
@@ -548,7 +621,7 @@ static bool io_file_supports_async(struct file *file)
548621}
549622
550623static int io_prep_rw (struct io_kiocb * req , const struct io_uring_sqe * sqe ,
551- bool force_nonblock )
624+ bool force_nonblock , struct io_submit_state * state )
552625{
553626 struct io_ring_ctx * ctx = req -> ctx ;
554627 struct kiocb * kiocb = & req -> rw ;
@@ -560,7 +633,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
560633 return 0 ;
561634
562635 fd = READ_ONCE (sqe -> fd );
563- kiocb -> ki_filp = fget ( fd );
636+ kiocb -> ki_filp = io_file_get ( state , fd );
564637 if (unlikely (!kiocb -> ki_filp ))
565638 return - EBADF ;
566639 if (force_nonblock && !io_file_supports_async (kiocb -> ki_filp ))
@@ -604,7 +677,10 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
604677 }
605678 return 0 ;
606679out_fput :
607- fput (kiocb -> ki_filp );
680+ /* in case of error, we didn't use this file reference. drop it. */
681+ if (state )
682+ state -> used_refs -- ;
683+ io_file_put (state , kiocb -> ki_filp );
608684 return ret ;
609685}
610686
@@ -650,15 +726,15 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
650726}
651727
652728static ssize_t io_read (struct io_kiocb * req , const struct sqe_submit * s ,
653- bool force_nonblock )
729+ bool force_nonblock , struct io_submit_state * state )
654730{
655731 struct iovec inline_vecs [UIO_FASTIOV ], * iovec = inline_vecs ;
656732 struct kiocb * kiocb = & req -> rw ;
657733 struct iov_iter iter ;
658734 struct file * file ;
659735 ssize_t ret ;
660736
661- ret = io_prep_rw (req , s -> sqe , force_nonblock );
737+ ret = io_prep_rw (req , s -> sqe , force_nonblock , state );
662738 if (ret )
663739 return ret ;
664740 file = kiocb -> ki_filp ;
@@ -694,15 +770,15 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
694770}
695771
696772static ssize_t io_write (struct io_kiocb * req , const struct sqe_submit * s ,
697- bool force_nonblock )
773+ bool force_nonblock , struct io_submit_state * state )
698774{
699775 struct iovec inline_vecs [UIO_FASTIOV ], * iovec = inline_vecs ;
700776 struct kiocb * kiocb = & req -> rw ;
701777 struct iov_iter iter ;
702778 struct file * file ;
703779 ssize_t ret ;
704780
705- ret = io_prep_rw (req , s -> sqe , force_nonblock );
781+ ret = io_prep_rw (req , s -> sqe , force_nonblock , state );
706782 if (ret )
707783 return ret ;
708784 /* Hold on to the file for -EAGAIN */
@@ -826,7 +902,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
826902}
827903
828904static int __io_submit_sqe (struct io_ring_ctx * ctx , struct io_kiocb * req ,
829- const struct sqe_submit * s , bool force_nonblock )
905+ const struct sqe_submit * s , bool force_nonblock ,
906+ struct io_submit_state * state )
830907{
831908 ssize_t ret ;
832909 int opcode ;
@@ -841,10 +918,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
841918 ret = io_nop (req , req -> user_data );
842919 break ;
843920 case IORING_OP_READV :
844- ret = io_read (req , s , force_nonblock );
921+ ret = io_read (req , s , force_nonblock , state );
845922 break ;
846923 case IORING_OP_WRITEV :
847- ret = io_write (req , s , force_nonblock );
924+ ret = io_write (req , s , force_nonblock , state );
848925 break ;
849926 case IORING_OP_FSYNC :
850927 ret = io_fsync (req , s -> sqe , force_nonblock );
@@ -896,7 +973,7 @@ static void io_sq_wq_submit_work(struct work_struct *work)
896973 s -> needs_lock = true;
897974
898975 do {
899- ret = __io_submit_sqe (ctx , req , s , false);
976+ ret = __io_submit_sqe (ctx , req , s , false, NULL );
900977 /*
901978 * We can get EAGAIN for polled IO even though we're forcing
902979 * a sync submission from here, since we can't wait for
@@ -920,7 +997,8 @@ static void io_sq_wq_submit_work(struct work_struct *work)
920997 kfree (sqe );
921998}
922999
923- static int io_submit_sqe (struct io_ring_ctx * ctx , struct sqe_submit * s )
1000+ static int io_submit_sqe (struct io_ring_ctx * ctx , struct sqe_submit * s ,
1001+ struct io_submit_state * state )
9241002{
9251003 struct io_kiocb * req ;
9261004 ssize_t ret ;
@@ -935,7 +1013,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
9351013
9361014 req -> rw .ki_filp = NULL ;
9371015
938- ret = __io_submit_sqe (ctx , req , s , true);
1016+ ret = __io_submit_sqe (ctx , req , s , true, state );
9391017 if (ret == - EAGAIN ) {
9401018 struct io_uring_sqe * sqe_copy ;
9411019
@@ -956,6 +1034,26 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
9561034 return ret ;
9571035}
9581036
1037+ /*
1038+ * Batched submission is done, ensure local IO is flushed out.
1039+ */
1040+ static void io_submit_state_end (struct io_submit_state * state )
1041+ {
1042+ blk_finish_plug (& state -> plug );
1043+ io_file_put (state , NULL );
1044+ }
1045+
1046+ /*
1047+ * Start submission side cache.
1048+ */
1049+ static void io_submit_state_start (struct io_submit_state * state ,
1050+ struct io_ring_ctx * ctx , unsigned max_ios )
1051+ {
1052+ blk_start_plug (& state -> plug );
1053+ state -> file = NULL ;
1054+ state -> ios_left = max_ios ;
1055+ }
1056+
9591057static void io_commit_sqring (struct io_ring_ctx * ctx )
9601058{
9611059 struct io_sq_ring * ring = ctx -> sq_ring ;
@@ -1029,11 +1127,13 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
10291127
10301128static int io_ring_submit (struct io_ring_ctx * ctx , unsigned int to_submit )
10311129{
1130+ struct io_submit_state state , * statep = NULL ;
10321131 int i , ret = 0 , submit = 0 ;
1033- struct blk_plug plug ;
10341132
1035- if (to_submit > IO_PLUG_THRESHOLD )
1036- blk_start_plug (& plug );
1133+ if (to_submit > IO_PLUG_THRESHOLD ) {
1134+ io_submit_state_start (& state , ctx , to_submit );
1135+ statep = & state ;
1136+ }
10371137
10381138 for (i = 0 ; i < to_submit ; i ++ ) {
10391139 struct sqe_submit s ;
@@ -1044,7 +1144,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
10441144 s .has_user = true;
10451145 s .needs_lock = false;
10461146
1047- ret = io_submit_sqe (ctx , & s );
1147+ ret = io_submit_sqe (ctx , & s , statep );
10481148 if (ret ) {
10491149 io_drop_sqring (ctx );
10501150 break ;
@@ -1054,8 +1154,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
10541154 }
10551155 io_commit_sqring (ctx );
10561156
1057- if (to_submit > IO_PLUG_THRESHOLD )
1058- blk_finish_plug ( & plug );
1157+ if (statep )
1158+ io_submit_state_end ( statep );
10591159
10601160 return submit ? submit : ret ;
10611161}
0 commit comments