@@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124124 return - EINVAL ;
125125}
126126
127- static void nfs_direct_dirty_pages (struct page * * pages , unsigned int pgbase , size_t count )
128- {
129- unsigned int npages ;
130- unsigned int i ;
131-
132- if (count == 0 )
133- return ;
134- pages += (pgbase >> PAGE_SHIFT );
135- npages = (count + (pgbase & ~PAGE_MASK ) + PAGE_SIZE - 1 ) >> PAGE_SHIFT ;
136- for (i = 0 ; i < npages ; i ++ ) {
137- struct page * page = pages [i ];
138- if (!PageCompound (page ))
139- set_page_dirty (page );
140- }
141- }
142-
143127static void nfs_direct_release_pages (struct page * * pages , unsigned int npages )
144128{
145129 unsigned int i ;
@@ -226,177 +210,178 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226210 nfs_direct_req_release (dreq );
227211}
228212
229- /*
230- * We must hold a reference to all the pages in this direct read request
231- * until the RPCs complete. This could be long *after* we are woken up in
232- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233- */
234- static void nfs_direct_read_result (struct rpc_task * task , void * calldata )
213+ void nfs_direct_readpage_release (struct nfs_page * req )
235214{
236- struct nfs_read_data * data = calldata ;
237-
238- nfs_readpage_result (task , data );
215+ dprintk ("NFS: direct read done (%s/%lld %d@%lld)\n" ,
216+ req -> wb_context -> dentry -> d_inode -> i_sb -> s_id ,
217+ (long long )NFS_FILEID (req -> wb_context -> dentry -> d_inode ),
218+ req -> wb_bytes ,
219+ (long long )req_offset (req ));
220+ nfs_release_request (req );
239221}
240222
241- static void nfs_direct_read_release ( void * calldata )
223+ static void nfs_direct_read_completion ( struct nfs_pgio_header * hdr )
242224{
225+ unsigned long bytes = 0 ;
226+ struct nfs_direct_req * dreq = hdr -> dreq ;
243227
244- struct nfs_read_data * data = calldata ;
245- struct nfs_direct_req * dreq = (struct nfs_direct_req * )data -> header -> req ;
246- int status = data -> task .tk_status ;
228+ if (test_bit (NFS_IOHDR_REDO , & hdr -> flags ))
229+ goto out_put ;
247230
248231 spin_lock (& dreq -> lock );
249- if (unlikely (status < 0 )) {
250- dreq -> error = status ;
251- spin_unlock (& dreq -> lock );
232+ if (test_bit (NFS_IOHDR_ERROR , & hdr -> flags ) && (hdr -> good_bytes == 0 ))
233+ dreq -> error = hdr -> error ;
234+ else
235+ dreq -> count += hdr -> good_bytes ;
236+ spin_unlock (& dreq -> lock );
237+
238+ if (!test_bit (NFS_IOHDR_ERROR , & hdr -> flags )) {
239+ while (!list_empty (& hdr -> pages )) {
240+ struct nfs_page * req = nfs_list_entry (hdr -> pages .next );
241+ struct page * page = req -> wb_page ;
242+
243+ if (test_bit (NFS_IOHDR_EOF , & hdr -> flags )) {
244+ if (bytes > hdr -> good_bytes )
245+ zero_user (page , 0 , PAGE_SIZE );
246+ else if (hdr -> good_bytes - bytes < PAGE_SIZE )
247+ zero_user_segment (page ,
248+ hdr -> good_bytes & ~PAGE_MASK ,
249+ PAGE_SIZE );
250+ }
251+ bytes += req -> wb_bytes ;
252+ nfs_list_remove_request (req );
253+ nfs_direct_readpage_release (req );
254+ if (!PageCompound (page ))
255+ set_page_dirty (page );
256+ page_cache_release (page );
257+ }
252258 } else {
253- dreq -> count += data -> res .count ;
254- spin_unlock (& dreq -> lock );
255- nfs_direct_dirty_pages (data -> pages .pagevec ,
256- data -> args .pgbase ,
257- data -> res .count );
259+ while (!list_empty (& hdr -> pages )) {
260+ struct nfs_page * req = nfs_list_entry (hdr -> pages .next );
261+
262+ if (bytes < hdr -> good_bytes )
263+ if (!PageCompound (req -> wb_page ))
264+ set_page_dirty (req -> wb_page );
265+ bytes += req -> wb_bytes ;
266+ page_cache_release (req -> wb_page );
267+ nfs_list_remove_request (req );
268+ nfs_direct_readpage_release (req );
269+ }
258270 }
259- nfs_direct_release_pages (data -> pages .pagevec , data -> pages .npages );
260-
271+ out_put :
261272 if (put_dreq (dreq ))
262273 nfs_direct_complete (dreq );
263- nfs_readdata_release ( data );
274+ hdr -> release ( hdr );
264275}
265276
266- static const struct rpc_call_ops nfs_read_direct_ops = {
267- .rpc_call_prepare = nfs_read_prepare ,
268- .rpc_call_done = nfs_direct_read_result ,
269- .rpc_release = nfs_direct_read_release ,
270- };
271-
272- static void nfs_direct_readhdr_release (struct nfs_read_header * rhdr )
277+ static void nfs_sync_pgio_error (struct list_head * head )
273278{
274- struct nfs_read_data * data = & rhdr -> rpc_data ;
279+ struct nfs_page * req ;
275280
276- if (data -> pages .pagevec != data -> pages .page_array )
277- kfree (data -> pages .pagevec );
278- nfs_readhdr_free (& rhdr -> header );
281+ while (!list_empty (head )) {
282+ req = nfs_list_entry (head -> next );
283+ nfs_list_remove_request (req );
284+ nfs_release_request (req );
285+ }
279286}
280287
288+ static void nfs_direct_pgio_init (struct nfs_pgio_header * hdr )
289+ {
290+ get_dreq (hdr -> dreq );
291+ }
292+
293+ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
294+ .error_cleanup = nfs_sync_pgio_error ,
295+ .init_hdr = nfs_direct_pgio_init ,
296+ .completion = nfs_direct_read_completion ,
297+ };
298+
281299/*
282300 * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
283301 * operation. If nfs_readdata_alloc() or get_user_pages() fails,
284302 * bail and stop sending more reads. Read length accounting is
285303 * handled automatically by nfs_direct_read_result(). Otherwise, if
286304 * no requests have been sent, just return an error.
287305 */
288- static ssize_t nfs_direct_read_schedule_segment (struct nfs_direct_req * dreq ,
306+ static ssize_t nfs_direct_read_schedule_segment (struct nfs_pageio_descriptor * desc ,
289307 const struct iovec * iov ,
290308 loff_t pos )
291309{
310+ struct nfs_direct_req * dreq = desc -> pg_dreq ;
292311 struct nfs_open_context * ctx = dreq -> ctx ;
293312 struct inode * inode = ctx -> dentry -> d_inode ;
294313 unsigned long user_addr = (unsigned long )iov -> iov_base ;
295314 size_t count = iov -> iov_len ;
296315 size_t rsize = NFS_SERVER (inode )-> rsize ;
297- struct rpc_task * task ;
298- struct rpc_message msg = {
299- .rpc_cred = ctx -> cred ,
300- };
301- struct rpc_task_setup task_setup_data = {
302- .rpc_client = NFS_CLIENT (inode ),
303- .rpc_message = & msg ,
304- .callback_ops = & nfs_read_direct_ops ,
305- .workqueue = nfsiod_workqueue ,
306- .flags = RPC_TASK_ASYNC ,
307- };
308316 unsigned int pgbase ;
309317 int result ;
310318 ssize_t started = 0 ;
319+ struct page * * pagevec = NULL ;
320+ unsigned int npages ;
311321
312322 do {
313- struct nfs_read_header * rhdr ;
314- struct nfs_read_data * data ;
315- struct nfs_page_array * pages ;
316323 size_t bytes ;
324+ int i ;
317325
318326 pgbase = user_addr & ~PAGE_MASK ;
319- bytes = min (rsize ,count );
327+ bytes = min (max ( rsize , PAGE_SIZE ), count );
320328
321329 result = - ENOMEM ;
322- rhdr = nfs_readhdr_alloc ();
323- if (unlikely (!rhdr ))
324- break ;
325- data = nfs_readdata_alloc (& rhdr -> header , nfs_page_array_len (pgbase , bytes ));
326- if (!data ) {
327- nfs_readhdr_free (& rhdr -> header );
330+ npages = nfs_page_array_len (pgbase , bytes );
331+ if (!pagevec )
332+ pagevec = kmalloc (npages * sizeof (struct page * ),
333+ GFP_KERNEL );
334+ if (!pagevec )
328335 break ;
329- }
330- data -> header = & rhdr -> header ;
331- atomic_inc (& data -> header -> refcnt );
332- pages = & data -> pages ;
333-
334336 down_read (& current -> mm -> mmap_sem );
335337 result = get_user_pages (current , current -> mm , user_addr ,
336- pages -> npages , 1 , 0 , pages -> pagevec , NULL );
338+ npages , 1 , 0 , pagevec , NULL );
337339 up_read (& current -> mm -> mmap_sem );
338- if (result < 0 ) {
339- nfs_direct_readhdr_release (rhdr );
340+ if (result < 0 )
340341 break ;
341- }
342- if ((unsigned )result < pages -> npages ) {
342+ if ((unsigned )result < npages ) {
343343 bytes = result * PAGE_SIZE ;
344344 if (bytes <= pgbase ) {
345- nfs_direct_release_pages (pages -> pagevec , result );
346- nfs_direct_readhdr_release (rhdr );
345+ nfs_direct_release_pages (pagevec , result );
347346 break ;
348347 }
349348 bytes -= pgbase ;
350- pages -> npages = result ;
349+ npages = result ;
351350 }
352351
353- get_dreq (dreq );
354-
355- rhdr -> header .req = (struct nfs_page * ) dreq ;
356- rhdr -> header .inode = inode ;
357- rhdr -> header .cred = msg .rpc_cred ;
358- data -> args .fh = NFS_FH (inode );
359- data -> args .context = get_nfs_open_context (ctx );
360- data -> args .lock_context = dreq -> l_ctx ;
361- data -> args .offset = pos ;
362- data -> args .pgbase = pgbase ;
363- data -> args .pages = pages -> pagevec ;
364- data -> args .count = bytes ;
365- data -> res .fattr = & data -> fattr ;
366- data -> res .eof = 0 ;
367- data -> res .count = bytes ;
368- nfs_fattr_init (& data -> fattr );
369- msg .rpc_argp = & data -> args ;
370- msg .rpc_resp = & data -> res ;
371-
372- task_setup_data .task = & data -> task ;
373- task_setup_data .callback_data = data ;
374- NFS_PROTO (inode )-> read_setup (data , & msg );
375-
376- task = rpc_run_task (& task_setup_data );
377- if (IS_ERR (task ))
378- break ;
379-
380- dprintk ("NFS: %5u initiated direct read call "
381- "(req %s/%Ld, %zu bytes @ offset %Lu)\n" ,
382- task -> tk_pid ,
383- inode -> i_sb -> s_id ,
384- (long long )NFS_FILEID (inode ),
385- bytes ,
386- (unsigned long long )data -> args .offset );
387- rpc_put_task (task );
388-
389- started += bytes ;
390- user_addr += bytes ;
391- pos += bytes ;
392- /* FIXME: Remove this unnecessary math from final patch */
393- pgbase += bytes ;
394- pgbase &= ~PAGE_MASK ;
395- BUG_ON (pgbase != (user_addr & ~PAGE_MASK ));
396-
397- count -= bytes ;
352+ for (i = 0 ; i < npages ; i ++ ) {
353+ struct nfs_page * req ;
354+ unsigned int req_len = min (bytes , PAGE_SIZE - pgbase );
355+ /* XXX do we need to do the eof zeroing found in async_filler? */
356+ req = nfs_create_request (dreq -> ctx , dreq -> inode ,
357+ pagevec [i ],
358+ pgbase , req_len );
359+ if (IS_ERR (req )) {
360+ nfs_direct_release_pages (pagevec + i ,
361+ npages - i );
362+ result = PTR_ERR (req );
363+ break ;
364+ }
365+ req -> wb_index = pos >> PAGE_SHIFT ;
366+ req -> wb_offset = pos & ~PAGE_MASK ;
367+ if (!nfs_pageio_add_request (desc , req )) {
368+ result = desc -> pg_error ;
369+ nfs_release_request (req );
370+ nfs_direct_release_pages (pagevec + i ,
371+ npages - i );
372+ break ;
373+ }
374+ pgbase = 0 ;
375+ bytes -= req_len ;
376+ started += req_len ;
377+ user_addr += req_len ;
378+ pos += req_len ;
379+ count -= req_len ;
380+ }
398381 } while (count != 0 );
399382
383+ kfree (pagevec );
384+
400385 if (started )
401386 return started ;
402387 return result < 0 ? (ssize_t ) result : - EFAULT ;
@@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
407392 unsigned long nr_segs ,
408393 loff_t pos )
409394{
395+ struct nfs_pageio_descriptor desc ;
410396 ssize_t result = - EINVAL ;
411397 size_t requested_bytes = 0 ;
412398 unsigned long seg ;
413399
400+ nfs_pageio_init_read (& desc , dreq -> inode ,
401+ & nfs_direct_read_completion_ops );
414402 get_dreq (dreq );
403+ desc .pg_dreq = dreq ;
415404
416405 for (seg = 0 ; seg < nr_segs ; seg ++ ) {
417406 const struct iovec * vec = & iov [seg ];
418- result = nfs_direct_read_schedule_segment (dreq , vec , pos );
407+ result = nfs_direct_read_schedule_segment (& desc , vec , pos );
419408 if (result < 0 )
420409 break ;
421410 requested_bytes += result ;
@@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
424413 pos += vec -> iov_len ;
425414 }
426415
416+ nfs_pageio_complete (& desc );
417+
427418 /*
428419 * If no bytes were started, return the error, and let the
429420 * generic layer handle the completion.
0 commit comments