@@ -811,22 +811,64 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
811811{
812812 struct file * file = iocb -> ki_filp ;
813813 struct gfs2_inode * ip = GFS2_I (file -> f_mapping -> host );
814- size_t count = iov_iter_count (to );
814+ size_t prev_count = 0 , window_size = 0 ;
815+ size_t written = 0 ;
815816 ssize_t ret ;
816817
817- if (!count )
818+ /*
819+ * In this function, we disable page faults when we're holding the
820+ * inode glock while doing I/O. If a page fault occurs, we indicate
821+ * that the inode glock may be dropped, fault in the pages manually,
822+ * and retry.
823+ *
824+ * Unlike generic_file_read_iter, for reads, iomap_dio_rw can trigger
825+ * physical as well as manual page faults, and we need to disable both
826+ * kinds.
827+ *
828+ * For direct I/O, gfs2 takes the inode glock in deferred mode. This
829+ * locking mode is compatible with other deferred holders, so multiple
830+ * processes and nodes can do direct I/O to a file at the same time.
831+ * There's no guarantee that reads or writes will be atomic. Any
832+ * coordination among readers and writers needs to happen externally.
833+ */
834+
835+ if (!iov_iter_count (to ))
818836 return 0 ; /* skip atime */
819837
820838 gfs2_holder_init (ip -> i_gl , LM_ST_DEFERRED , 0 , gh );
839+ retry :
821840 ret = gfs2_glock_nq (gh );
822841 if (ret )
823842 goto out_uninit ;
843+ retry_under_glock :
844+ pagefault_disable ();
845+ to -> nofault = true;
846+ ret = iomap_dio_rw (iocb , to , & gfs2_iomap_ops , NULL ,
847+ IOMAP_DIO_PARTIAL , written );
848+ to -> nofault = false;
849+ pagefault_enable ();
850+ if (ret > 0 )
851+ written = ret ;
824852
825- ret = iomap_dio_rw (iocb , to , & gfs2_iomap_ops , NULL , 0 , 0 );
826- gfs2_glock_dq (gh );
853+ if (should_fault_in_pages (ret , to , & prev_count , & window_size )) {
854+ size_t leftover ;
855+
856+ gfs2_holder_allow_demote (gh );
857+ leftover = fault_in_iov_iter_writeable (to , window_size );
858+ gfs2_holder_disallow_demote (gh );
859+ if (leftover != window_size ) {
860+ if (!gfs2_holder_queued (gh ))
861+ goto retry ;
862+ goto retry_under_glock ;
863+ }
864+ }
865+ if (gfs2_holder_queued (gh ))
866+ gfs2_glock_dq (gh );
827867out_uninit :
828868 gfs2_holder_uninit (gh );
829- return ret ;
869+ if (ret < 0 )
870+ return ret ;
871+ return written ;
830872}
831873
832874static ssize_t gfs2_file_direct_write (struct kiocb * iocb , struct iov_iter * from ,
@@ -835,10 +877,20 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
835877 struct file * file = iocb -> ki_filp ;
836878 struct inode * inode = file -> f_mapping -> host ;
837879 struct gfs2_inode * ip = GFS2_I (inode );
838- size_t len = iov_iter_count ( from ) ;
839- loff_t offset = iocb -> ki_pos ;
880+ size_t prev_count = 0 , window_size = 0 ;
881+ size_t read = 0 ;
840882 ssize_t ret ;
841883
884+ /*
885+ * In this function, we disable page faults when we're holding the
886+ * inode glock while doing I/O. If a page fault occurs, we indicate
887+ * that the inode glock may be dropped, fault in the pages manually,
888+ * and retry.
889+ *
890+ * For writes, iomap_dio_rw only triggers manual page faults, so we
891+ * don't need to disable physical ones.
892+ */
893+
842894 /*
843895 * Deferred lock, even if its a write, since we do no allocation on
844896 * this path. All we need to change is the atime, and this lock mode
@@ -848,22 +900,45 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
848900 * VFS does.
849901 */
850902 gfs2_holder_init (ip -> i_gl , LM_ST_DEFERRED , 0 , gh );
903+ retry :
851904 ret = gfs2_glock_nq (gh );
852905 if (ret )
853906 goto out_uninit ;
854-
907+ retry_under_glock :
855908 /* Silently fall back to buffered I/O when writing beyond EOF */
856- if (offset + len > i_size_read (& ip -> i_inode ))
909+ if (iocb -> ki_pos + iov_iter_count ( from ) > i_size_read (& ip -> i_inode ))
857910 goto out ;
858911
859- ret = iomap_dio_rw (iocb , from , & gfs2_iomap_ops , NULL , 0 , 0 );
912+ from -> nofault = true;
913+ ret = iomap_dio_rw (iocb , from , & gfs2_iomap_ops , NULL ,
914+ IOMAP_DIO_PARTIAL , read );
915+ from -> nofault = false;
916+
860917 if (ret == - ENOTBLK )
861918 ret = 0 ;
919+ if (ret > 0 )
920+ read = ret ;
921+
922+ if (should_fault_in_pages (ret , from , & prev_count , & window_size )) {
923+ size_t leftover ;
924+
925+ gfs2_holder_allow_demote (gh );
926+ leftover = fault_in_iov_iter_readable (from , window_size );
927+ gfs2_holder_disallow_demote (gh );
928+ if (leftover != window_size ) {
929+ if (!gfs2_holder_queued (gh ))
930+ goto retry ;
931+ goto retry_under_glock ;
932+ }
933+ }
862934out :
863- gfs2_glock_dq (gh );
935+ if (gfs2_holder_queued (gh ))
936+ gfs2_glock_dq (gh );
864937out_uninit :
865938 gfs2_holder_uninit (gh );
866- return ret ;
939+ if (ret < 0 )
940+ return ret ;
941+ return read ;
867942}
868943
869944static ssize_t gfs2_file_read_iter (struct kiocb * iocb , struct iov_iter * to )
0 commit comments