4242#include "internal.h"
4343
4444/*
45- * How many user pages to map in one call to get_user_pages (). This determines
46- * the size of a structure in the slab cache
45+ * How many user pages to map in one call to iov_iter_extract_pages (). This
46+ * determines the size of a structure in the slab cache
4747 */
4848#define DIO_PAGES 64
4949
@@ -121,12 +121,13 @@ struct dio {
121121 struct inode * inode ;
122122 loff_t i_size ; /* i_size when submitted */
123123 dio_iodone_t * end_io ; /* IO completion function */
124+ bool is_pinned ; /* T if we have pins on the pages */
124125
125126 void * private ; /* copy from map_bh.b_private */
126127
127128 /* BIO completion state */
128129 spinlock_t bio_lock ; /* protects BIO fields below */
129- int page_errors ; /* errno from get_user_pages () */
130+ int page_errors ; /* err from iov_iter_extract_pages () */
130131 int is_async ; /* is IO async ? */
131132 bool defer_completion ; /* defer AIO completion to workqueue? */
132133 bool should_dirty ; /* if pages should be dirtied */
@@ -165,23 +166,22 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
165166 */
166167static inline int dio_refill_pages (struct dio * dio , struct dio_submit * sdio )
167168{
169+ struct page * * pages = dio -> pages ;
168170 const enum req_op dio_op = dio -> opf & REQ_OP_MASK ;
169171 ssize_t ret ;
170172
171- ret = iov_iter_get_pages2 (sdio -> iter , dio -> pages , LONG_MAX , DIO_PAGES ,
172- & sdio -> from );
173+ ret = iov_iter_extract_pages (sdio -> iter , & pages , LONG_MAX ,
174+ DIO_PAGES , 0 , & sdio -> from );
173175
174176 if (ret < 0 && sdio -> blocks_available && dio_op == REQ_OP_WRITE ) {
175- struct page * page = ZERO_PAGE (0 );
176177 /*
177178 * A memory fault, but the filesystem has some outstanding
178179 * mapped blocks. We need to use those blocks up to avoid
179180 * leaking stale data in the file.
180181 */
181182 if (dio -> page_errors == 0 )
182183 dio -> page_errors = ret ;
183- get_page (page );
184- dio -> pages [0 ] = page ;
184+ dio -> pages [0 ] = ZERO_PAGE (0 );
185185 sdio -> head = 0 ;
186186 sdio -> tail = 1 ;
187187 sdio -> from = 0 ;
@@ -201,9 +201,9 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
201201
202202/*
203203 * Get another userspace page. Returns an ERR_PTR on error. Pages are
204- * buffered inside the dio so that we can call get_user_pages() against a
205- * decent number of pages, less frequently. To provide nicer use of the
206- * L1 cache.
204+ * buffered inside the dio so that we can call iov_iter_extract_pages()
205+ * against a decent number of pages, less frequently. To provide nicer use of
206+ * the L1 cache.
207207 */
208208static inline struct page * dio_get_page (struct dio * dio ,
209209 struct dio_submit * sdio )
@@ -219,6 +219,18 @@ static inline struct page *dio_get_page(struct dio *dio,
219219 return dio -> pages [sdio -> head ];
220220}
221221
222+ static void dio_pin_page (struct dio * dio , struct page * page )
223+ {
224+ if (dio -> is_pinned )
225+ folio_add_pin (page_folio (page ));
226+ }
227+
228+ static void dio_unpin_page (struct dio * dio , struct page * page )
229+ {
230+ if (dio -> is_pinned )
231+ unpin_user_page (page );
232+ }
233+
222234/*
223235 * dio_complete() - called when all DIO BIO I/O has been completed
224236 *
@@ -402,8 +414,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
402414 bio -> bi_end_io = dio_bio_end_aio ;
403415 else
404416 bio -> bi_end_io = dio_bio_end_io ;
405- /* for now require references for all pages */
406- bio_set_flag (bio , BIO_PAGE_REFFED );
417+ if ( dio -> is_pinned )
418+ bio_set_flag (bio , BIO_PAGE_PINNED );
407419 sdio -> bio = bio ;
408420 sdio -> logical_offset_in_bio = sdio -> cur_page_fs_offset ;
409421}
@@ -444,8 +456,9 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
444456 */
445457static inline void dio_cleanup (struct dio * dio , struct dio_submit * sdio )
446458{
447- while (sdio -> head < sdio -> tail )
448- put_page (dio -> pages [sdio -> head ++ ]);
459+ if (dio -> is_pinned )
460+ unpin_user_pages (dio -> pages + sdio -> head ,
461+ sdio -> tail - sdio -> head );
449462}
450463
451464/*
@@ -676,7 +689,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
676689 *
677690 * Return zero on success. Non-zero means the caller needs to start a new BIO.
678691 */
679- static inline int dio_bio_add_page (struct dio_submit * sdio )
692+ static inline int dio_bio_add_page (struct dio * dio , struct dio_submit * sdio )
680693{
681694 int ret ;
682695
@@ -688,7 +701,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
688701 */
689702 if ((sdio -> cur_page_len + sdio -> cur_page_offset ) == PAGE_SIZE )
690703 sdio -> pages_in_io -- ;
691- get_page ( sdio -> cur_page );
704+ dio_pin_page ( dio , sdio -> cur_page );
692705 sdio -> final_block_in_bio = sdio -> cur_page_block +
693706 (sdio -> cur_page_len >> sdio -> blkbits );
694707 ret = 0 ;
@@ -743,11 +756,11 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
743756 goto out ;
744757 }
745758
746- if (dio_bio_add_page (sdio ) != 0 ) {
759+ if (dio_bio_add_page (dio , sdio ) != 0 ) {
747760 dio_bio_submit (dio , sdio );
748761 ret = dio_new_bio (dio , sdio , sdio -> cur_page_block , map_bh );
749762 if (ret == 0 ) {
750- ret = dio_bio_add_page (sdio );
763+ ret = dio_bio_add_page (dio , sdio );
751764 BUG_ON (ret != 0 );
752765 }
753766 }
@@ -804,13 +817,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
804817 */
805818 if (sdio -> cur_page ) {
806819 ret = dio_send_cur_page (dio , sdio , map_bh );
807- put_page ( sdio -> cur_page );
820+ dio_unpin_page ( dio , sdio -> cur_page );
808821 sdio -> cur_page = NULL ;
809822 if (ret )
810823 return ret ;
811824 }
812825
813- get_page ( page ); /* It is in dio */
826+ dio_pin_page ( dio , page ); /* It is in dio */
814827 sdio -> cur_page = page ;
815828 sdio -> cur_page_offset = offset ;
816829 sdio -> cur_page_len = len ;
@@ -825,7 +838,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
825838 ret = dio_send_cur_page (dio , sdio , map_bh );
826839 if (sdio -> bio )
827840 dio_bio_submit (dio , sdio );
828- put_page ( sdio -> cur_page );
841+ dio_unpin_page ( dio , sdio -> cur_page );
829842 sdio -> cur_page = NULL ;
830843 }
831844 return ret ;
@@ -926,7 +939,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
926939
927940 ret = get_more_blocks (dio , sdio , map_bh );
928941 if (ret ) {
929- put_page ( page );
942+ dio_unpin_page ( dio , page );
930943 goto out ;
931944 }
932945 if (!buffer_mapped (map_bh ))
@@ -971,7 +984,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
971984
972985 /* AKPM: eargh, -ENOTBLK is a hack */
973986 if (dio_op == REQ_OP_WRITE ) {
974- put_page ( page );
987+ dio_unpin_page ( dio , page );
975988 return - ENOTBLK ;
976989 }
977990
@@ -984,7 +997,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
984997 if (sdio -> block_in_file >=
985998 i_size_aligned >> blkbits ) {
986999 /* We hit eof */
987- put_page ( page );
1000+ dio_unpin_page ( dio , page );
9881001 goto out ;
9891002 }
9901003 zero_user (page , from , 1 << blkbits );
@@ -1024,7 +1037,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
10241037 sdio -> next_block_for_io ,
10251038 map_bh );
10261039 if (ret ) {
1027- put_page ( page );
1040+ dio_unpin_page ( dio , page );
10281041 goto out ;
10291042 }
10301043 sdio -> next_block_for_io += this_chunk_blocks ;
@@ -1039,8 +1052,8 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
10391052 break ;
10401053 }
10411054
1042- /* Drop the ref which was taken in get_user_pages() */
1043- put_page ( page );
1055+ /* Drop the pin which was taken in get_user_pages() */
1056+ dio_unpin_page ( dio , page );
10441057 }
10451058out :
10461059 return ret ;
@@ -1135,6 +1148,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
11351148 /* will be released by direct_io_worker */
11361149 inode_lock (inode );
11371150 }
1151+ dio -> is_pinned = iov_iter_extract_will_pin (iter );
11381152
11391153 /* Once we sampled i_size check for reads beyond EOF */
11401154 dio -> i_size = i_size_read (inode );
@@ -1259,7 +1273,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
12591273 ret2 = dio_send_cur_page (dio , & sdio , & map_bh );
12601274 if (retval == 0 )
12611275 retval = ret2 ;
1262- put_page ( sdio .cur_page );
1276+ dio_unpin_page ( dio , sdio .cur_page );
12631277 sdio .cur_page = NULL ;
12641278 }
12651279 if (sdio .bio )
0 commit comments