Skip to content

Commit 584aa81

Browse files
Fred IsamanTrond Myklebust
authored andcommitted
NFS: rewrite directio read to use async coalesce code
This also has the advantage that it allows directio to use pnfs. Signed-off-by: Fred Isaman <iisaman@netapp.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
1 parent 1825a0d commit 584aa81

File tree

6 files changed

+138
-144
lines changed

6 files changed

+138
-144
lines changed

fs/nfs/direct.c

Lines changed: 123 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
124124
return -EINVAL;
125125
}
126126

127-
static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
128-
{
129-
unsigned int npages;
130-
unsigned int i;
131-
132-
if (count == 0)
133-
return;
134-
pages += (pgbase >> PAGE_SHIFT);
135-
npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
136-
for (i = 0; i < npages; i++) {
137-
struct page *page = pages[i];
138-
if (!PageCompound(page))
139-
set_page_dirty(page);
140-
}
141-
}
142-
143127
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
144128
{
145129
unsigned int i;
@@ -226,177 +210,178 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
226210
nfs_direct_req_release(dreq);
227211
}
228212

229-
/*
230-
* We must hold a reference to all the pages in this direct read request
231-
* until the RPCs complete. This could be long *after* we are woken up in
232-
* nfs_direct_wait (for instance, if someone hits ^C on a slow server).
233-
*/
234-
static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
213+
void nfs_direct_readpage_release(struct nfs_page *req)
235214
{
236-
struct nfs_read_data *data = calldata;
237-
238-
nfs_readpage_result(task, data);
215+
dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
216+
req->wb_context->dentry->d_inode->i_sb->s_id,
217+
(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
218+
req->wb_bytes,
219+
(long long)req_offset(req));
220+
nfs_release_request(req);
239221
}
240222

241-
static void nfs_direct_read_release(void *calldata)
223+
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
242224
{
225+
unsigned long bytes = 0;
226+
struct nfs_direct_req *dreq = hdr->dreq;
243227

244-
struct nfs_read_data *data = calldata;
245-
struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
246-
int status = data->task.tk_status;
228+
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
229+
goto out_put;
247230

248231
spin_lock(&dreq->lock);
249-
if (unlikely(status < 0)) {
250-
dreq->error = status;
251-
spin_unlock(&dreq->lock);
232+
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
233+
dreq->error = hdr->error;
234+
else
235+
dreq->count += hdr->good_bytes;
236+
spin_unlock(&dreq->lock);
237+
238+
if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
239+
while (!list_empty(&hdr->pages)) {
240+
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
241+
struct page *page = req->wb_page;
242+
243+
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
244+
if (bytes > hdr->good_bytes)
245+
zero_user(page, 0, PAGE_SIZE);
246+
else if (hdr->good_bytes - bytes < PAGE_SIZE)
247+
zero_user_segment(page,
248+
hdr->good_bytes & ~PAGE_MASK,
249+
PAGE_SIZE);
250+
}
251+
bytes += req->wb_bytes;
252+
nfs_list_remove_request(req);
253+
nfs_direct_readpage_release(req);
254+
if (!PageCompound(page))
255+
set_page_dirty(page);
256+
page_cache_release(page);
257+
}
252258
} else {
253-
dreq->count += data->res.count;
254-
spin_unlock(&dreq->lock);
255-
nfs_direct_dirty_pages(data->pages.pagevec,
256-
data->args.pgbase,
257-
data->res.count);
259+
while (!list_empty(&hdr->pages)) {
260+
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
261+
262+
if (bytes < hdr->good_bytes)
263+
if (!PageCompound(req->wb_page))
264+
set_page_dirty(req->wb_page);
265+
bytes += req->wb_bytes;
266+
page_cache_release(req->wb_page);
267+
nfs_list_remove_request(req);
268+
nfs_direct_readpage_release(req);
269+
}
258270
}
259-
nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
260-
271+
out_put:
261272
if (put_dreq(dreq))
262273
nfs_direct_complete(dreq);
263-
nfs_readdata_release(data);
274+
hdr->release(hdr);
264275
}
265276

266-
static const struct rpc_call_ops nfs_read_direct_ops = {
267-
.rpc_call_prepare = nfs_read_prepare,
268-
.rpc_call_done = nfs_direct_read_result,
269-
.rpc_release = nfs_direct_read_release,
270-
};
271-
272-
static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
277+
static void nfs_sync_pgio_error(struct list_head *head)
273278
{
274-
struct nfs_read_data *data = &rhdr->rpc_data;
279+
struct nfs_page *req;
275280

276-
if (data->pages.pagevec != data->pages.page_array)
277-
kfree(data->pages.pagevec);
278-
nfs_readhdr_free(&rhdr->header);
281+
while (!list_empty(head)) {
282+
req = nfs_list_entry(head->next);
283+
nfs_list_remove_request(req);
284+
nfs_release_request(req);
285+
}
279286
}
280287

288+
static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
289+
{
290+
get_dreq(hdr->dreq);
291+
}
292+
293+
static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
294+
.error_cleanup = nfs_sync_pgio_error,
295+
.init_hdr = nfs_direct_pgio_init,
296+
.completion = nfs_direct_read_completion,
297+
};
298+
281299
/*
282300
* For each rsize'd chunk of the user's buffer, dispatch an NFS READ
283301
* operation. If nfs_readdata_alloc() or get_user_pages() fails,
284302
* bail and stop sending more reads. Read length accounting is
285303
* handled automatically by nfs_direct_read_result(). Otherwise, if
286304
* no requests have been sent, just return an error.
287305
*/
288-
static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
306+
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
289307
const struct iovec *iov,
290308
loff_t pos)
291309
{
310+
struct nfs_direct_req *dreq = desc->pg_dreq;
292311
struct nfs_open_context *ctx = dreq->ctx;
293312
struct inode *inode = ctx->dentry->d_inode;
294313
unsigned long user_addr = (unsigned long)iov->iov_base;
295314
size_t count = iov->iov_len;
296315
size_t rsize = NFS_SERVER(inode)->rsize;
297-
struct rpc_task *task;
298-
struct rpc_message msg = {
299-
.rpc_cred = ctx->cred,
300-
};
301-
struct rpc_task_setup task_setup_data = {
302-
.rpc_client = NFS_CLIENT(inode),
303-
.rpc_message = &msg,
304-
.callback_ops = &nfs_read_direct_ops,
305-
.workqueue = nfsiod_workqueue,
306-
.flags = RPC_TASK_ASYNC,
307-
};
308316
unsigned int pgbase;
309317
int result;
310318
ssize_t started = 0;
319+
struct page **pagevec = NULL;
320+
unsigned int npages;
311321

312322
do {
313-
struct nfs_read_header *rhdr;
314-
struct nfs_read_data *data;
315-
struct nfs_page_array *pages;
316323
size_t bytes;
324+
int i;
317325

318326
pgbase = user_addr & ~PAGE_MASK;
319-
bytes = min(rsize,count);
327+
bytes = min(max(rsize, PAGE_SIZE), count);
320328

321329
result = -ENOMEM;
322-
rhdr = nfs_readhdr_alloc();
323-
if (unlikely(!rhdr))
324-
break;
325-
data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
326-
if (!data) {
327-
nfs_readhdr_free(&rhdr->header);
330+
npages = nfs_page_array_len(pgbase, bytes);
331+
if (!pagevec)
332+
pagevec = kmalloc(npages * sizeof(struct page *),
333+
GFP_KERNEL);
334+
if (!pagevec)
328335
break;
329-
}
330-
data->header = &rhdr->header;
331-
atomic_inc(&data->header->refcnt);
332-
pages = &data->pages;
333-
334336
down_read(&current->mm->mmap_sem);
335337
result = get_user_pages(current, current->mm, user_addr,
336-
pages->npages, 1, 0, pages->pagevec, NULL);
338+
npages, 1, 0, pagevec, NULL);
337339
up_read(&current->mm->mmap_sem);
338-
if (result < 0) {
339-
nfs_direct_readhdr_release(rhdr);
340+
if (result < 0)
340341
break;
341-
}
342-
if ((unsigned)result < pages->npages) {
342+
if ((unsigned)result < npages) {
343343
bytes = result * PAGE_SIZE;
344344
if (bytes <= pgbase) {
345-
nfs_direct_release_pages(pages->pagevec, result);
346-
nfs_direct_readhdr_release(rhdr);
345+
nfs_direct_release_pages(pagevec, result);
347346
break;
348347
}
349348
bytes -= pgbase;
350-
pages->npages = result;
349+
npages = result;
351350
}
352351

353-
get_dreq(dreq);
354-
355-
rhdr->header.req = (struct nfs_page *) dreq;
356-
rhdr->header.inode = inode;
357-
rhdr->header.cred = msg.rpc_cred;
358-
data->args.fh = NFS_FH(inode);
359-
data->args.context = get_nfs_open_context(ctx);
360-
data->args.lock_context = dreq->l_ctx;
361-
data->args.offset = pos;
362-
data->args.pgbase = pgbase;
363-
data->args.pages = pages->pagevec;
364-
data->args.count = bytes;
365-
data->res.fattr = &data->fattr;
366-
data->res.eof = 0;
367-
data->res.count = bytes;
368-
nfs_fattr_init(&data->fattr);
369-
msg.rpc_argp = &data->args;
370-
msg.rpc_resp = &data->res;
371-
372-
task_setup_data.task = &data->task;
373-
task_setup_data.callback_data = data;
374-
NFS_PROTO(inode)->read_setup(data, &msg);
375-
376-
task = rpc_run_task(&task_setup_data);
377-
if (IS_ERR(task))
378-
break;
379-
380-
dprintk("NFS: %5u initiated direct read call "
381-
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
382-
task->tk_pid,
383-
inode->i_sb->s_id,
384-
(long long)NFS_FILEID(inode),
385-
bytes,
386-
(unsigned long long)data->args.offset);
387-
rpc_put_task(task);
388-
389-
started += bytes;
390-
user_addr += bytes;
391-
pos += bytes;
392-
/* FIXME: Remove this unnecessary math from final patch */
393-
pgbase += bytes;
394-
pgbase &= ~PAGE_MASK;
395-
BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
396-
397-
count -= bytes;
352+
for (i = 0; i < npages; i++) {
353+
struct nfs_page *req;
354+
unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
355+
/* XXX do we need to do the eof zeroing found in async_filler? */
356+
req = nfs_create_request(dreq->ctx, dreq->inode,
357+
pagevec[i],
358+
pgbase, req_len);
359+
if (IS_ERR(req)) {
360+
nfs_direct_release_pages(pagevec + i,
361+
npages - i);
362+
result = PTR_ERR(req);
363+
break;
364+
}
365+
req->wb_index = pos >> PAGE_SHIFT;
366+
req->wb_offset = pos & ~PAGE_MASK;
367+
if (!nfs_pageio_add_request(desc, req)) {
368+
result = desc->pg_error;
369+
nfs_release_request(req);
370+
nfs_direct_release_pages(pagevec + i,
371+
npages - i);
372+
break;
373+
}
374+
pgbase = 0;
375+
bytes -= req_len;
376+
started += req_len;
377+
user_addr += req_len;
378+
pos += req_len;
379+
count -= req_len;
380+
}
398381
} while (count != 0);
399382

383+
kfree(pagevec);
384+
400385
if (started)
401386
return started;
402387
return result < 0 ? (ssize_t) result : -EFAULT;
@@ -407,15 +392,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
407392
unsigned long nr_segs,
408393
loff_t pos)
409394
{
395+
struct nfs_pageio_descriptor desc;
410396
ssize_t result = -EINVAL;
411397
size_t requested_bytes = 0;
412398
unsigned long seg;
413399

400+
nfs_pageio_init_read(&desc, dreq->inode,
401+
&nfs_direct_read_completion_ops);
414402
get_dreq(dreq);
403+
desc.pg_dreq = dreq;
415404

416405
for (seg = 0; seg < nr_segs; seg++) {
417406
const struct iovec *vec = &iov[seg];
418-
result = nfs_direct_read_schedule_segment(dreq, vec, pos);
407+
result = nfs_direct_read_schedule_segment(&desc, vec, pos);
419408
if (result < 0)
420409
break;
421410
requested_bytes += result;
@@ -424,6 +413,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
424413
pos += vec->iov_len;
425414
}
426415

416+
nfs_pageio_complete(&desc);
417+
427418
/*
428419
* If no bytes were started, return the error, and let the
429420
* generic layer handle the completion.

fs/nfs/internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops;
304304
/* read.c */
305305
extern struct nfs_read_header *nfs_readhdr_alloc(void);
306306
extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
307-
extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
308-
unsigned int pagecount);
307+
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
308+
struct inode *inode,
309+
const struct nfs_pgio_completion_ops *compl_ops);
309310
extern int nfs_initiate_read(struct rpc_clnt *clnt,
310311
struct nfs_read_data *data,
311312
const struct rpc_call_ops *call_ops);

fs/nfs/pagelist.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
4848
hdr->cred = hdr->req->wb_context->cred;
4949
hdr->io_start = req_offset(hdr->req);
5050
hdr->good_bytes = desc->pg_count;
51+
hdr->dreq = desc->pg_dreq;
5152
hdr->release = release;
5253
hdr->completion_ops = desc->pg_completion_ops;
54+
if (hdr->completion_ops->init_hdr)
55+
hdr->completion_ops->init_hdr(hdr);
5356
}
5457

5558
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
116119
req->wb_page = page;
117120
req->wb_index = page->index;
118121
page_cache_get(page);
119-
BUG_ON(PagePrivate(page));
120-
BUG_ON(!PageLocked(page));
121-
BUG_ON(page->mapping->host != inode);
122122
req->wb_offset = offset;
123123
req->wb_pgbase = offset;
124124
req->wb_bytes = count;
@@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
257257
desc->pg_ioflags = io_flags;
258258
desc->pg_error = 0;
259259
desc->pg_lseg = NULL;
260+
desc->pg_dreq = NULL;
260261
}
261262

262263
/**

0 commit comments

Comments
 (0)