Skip to content

Commit

Permalink
cifs: Change the I/O paths to use an iterator rather than a page list
Browse files Browse the repository at this point in the history
Currently, the cifs I/O paths hand lists of pages from the VM interface
routines at the top all the way through the intervening layers to the
socket interface at the bottom.

This is a problem, however, for interfacing with netfslib which passes an
iterator through to the ->issue_read() method (and will pass an iterator
through to the ->issue_write() method in future).  Netfslib takes over
bounce buffering for direct I/O, async I/O and encrypted content, so cifs
doesn't need to do that.  Netfslib also converts IOVEC-type iterators into
BVEC-type iterators if necessary.

Further, cifs needs foliating - and folios may come in a variety of sizes,
so a page list pointing to an array of heterogeneous pages may cause
problems in places such as where crypto is done.

Change the cifs I/O paths to hand iov_iter iterators all the way through
instead.

Notes:

 (1) Some old routines are #if'd out to be removed in a follow up patch so
     as to avoid confusing diff, thereby making the diff output easier to
     follow.  I've removed functions that don't overlap with anything
     added.

 (2) struct smb_rqst loses rq_pages, rq_offset, rq_npages, rq_pagesz and
     rq_tailsz which describe the pages forming the buffer; instead there's
     an rq_iter describing the source buffer and an rq_buffer which is used
     to hold the buffer for encryption.

 (3) struct cifs_readdata and cifs_writedata are similarly modified to
     smb_rqst.  The ->read_into_pages() and ->copy_into_pages() are then
     replaced with passing the iterator directly to the socket.

     The iterators are stored in these structs so that they are persistent
     and don't get deallocated when the function returns (unlike if they
     were stack variables).

 (4) Buffered writeback is overhauled, borrowing the code from the afs
     filesystem to gather up contiguous runs of folios.  The XARRAY-type
     iterator is then used to refer directly to the pagecache and can be
     passed to the socket to transmit data directly from there.

     This includes:

	cifs_extend_writeback()
	cifs_write_back_from_locked_folio()
	cifs_writepages_region()
	cifs_writepages()

 (5) Pages are converted to folios.

 (6) Direct I/O uses netfs_extract_user_iter() to create a BVEC-type
     iterator from an IOBUF/UBUF-type source iterator.

 (7) smb2_get_aead_req() uses netfs_extract_iter_to_sg() to extract page
     fragments from the iterator into the scatterlists that the crypto
     layer prefers.

 (8) smb2_init_transform_rq() attached pages to smb_rqst::rq_buffer, an
     xarray, to use as a bounce buffer for encryption.  An XARRAY-type
     iterator can then be used to pass the bounce buffer to lower layers.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Paulo Alcantara <pc@cjr.nz>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
  • Loading branch information
dhowells committed Jan 16, 2023
1 parent 175b548 commit c42cd4a
Show file tree
Hide file tree
Showing 14 changed files with 1,127 additions and 1,094 deletions.
1 change: 1 addition & 0 deletions fs/cifs/Kconfig
Expand Up @@ -18,6 +18,7 @@ config CIFS
select DNS_RESOLVER
select ASN1
select OID_REGISTRY
select NETFS_SUPPORT
help
This is the client VFS module for the SMB3 family of NAS protocols,
(including support for the most recent, most secure dialect SMB3.1.1)
Expand Down
28 changes: 6 additions & 22 deletions fs/cifs/cifsencrypt.c
Expand Up @@ -169,11 +169,11 @@ static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize,
}

int __cifs_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server, char *signature,
struct shash_desc *shash)
struct TCP_Server_Info *server, char *signature,
struct shash_desc *shash)
{
int i;
int rc;
ssize_t rc;
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;

Expand Down Expand Up @@ -205,25 +205,9 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
}
}

/* now hash over the rq_pages array */
for (i = 0; i < rqst->rq_npages; i++) {
void *kaddr;
unsigned int len, offset;

rqst_page_get_length(rqst, i, &len, &offset);

kaddr = (char *) kmap(rqst->rq_pages[i]) + offset;

rc = crypto_shash_update(shash, kaddr, len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with payload\n",
__func__);
kunmap(rqst->rq_pages[i]);
return rc;
}

kunmap(rqst->rq_pages[i]);
}
rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash);
if (rc < 0)
return rc;

rc = crypto_shash_final(shash, signature);
if (rc)
Expand Down
66 changes: 33 additions & 33 deletions fs/cifs/cifsglob.h
Expand Up @@ -216,11 +216,8 @@ static inline void cifs_free_open_info(struct cifs_open_info_data *data)
struct smb_rqst {
struct kvec *rq_iov; /* array of kvecs */
unsigned int rq_nvec; /* number of kvecs in array */
struct page **rq_pages; /* pointer to array of page ptrs */
unsigned int rq_offset; /* the offset to the 1st page */
unsigned int rq_npages; /* number pages in array */
unsigned int rq_pagesz; /* page size to use */
unsigned int rq_tailsz; /* length of last page */
struct iov_iter rq_iter; /* Data iterator */
struct xarray rq_buffer; /* Page buffer for encryption */
};

struct mid_q_entry;
Expand Down Expand Up @@ -1426,10 +1423,11 @@ struct cifs_aio_ctx {
struct cifsFileInfo *cfile;
struct bio_vec *bv;
loff_t pos;
unsigned int npages;
unsigned int nr_pinned_pages;
ssize_t rc;
unsigned int len;
unsigned int total_len;
unsigned int bv_cleanup_mode; /* How to clean up ->bv[] */
bool should_dirty;
/*
* Indicates if this aio_ctx is for direct_io,
Expand All @@ -1447,28 +1445,18 @@ struct cifs_readdata {
struct address_space *mapping;
struct cifs_aio_ctx *ctx;
__u64 offset;
ssize_t got_bytes;
unsigned int bytes;
unsigned int got_bytes;
pid_t pid;
int result;
struct work_struct work;
int (*read_into_pages)(struct TCP_Server_Info *server,
struct cifs_readdata *rdata,
unsigned int len);
int (*copy_into_pages)(struct TCP_Server_Info *server,
struct cifs_readdata *rdata,
struct iov_iter *iter);
struct iov_iter iter;
struct kvec iov[2];
struct TCP_Server_Info *server;
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
unsigned int pagesz;
unsigned int page_offset;
unsigned int tailsz;
struct cifs_credits credits;
unsigned int nr_pages;
struct page **pages;
};

/* asynchronous write support */
Expand All @@ -1480,6 +1468,8 @@ struct cifs_writedata {
struct work_struct work;
struct cifsFileInfo *cfile;
struct cifs_aio_ctx *ctx;
struct iov_iter iter;
struct bio_vec *bv;
__u64 offset;
pid_t pid;
unsigned int bytes;
Expand All @@ -1488,12 +1478,7 @@ struct cifs_writedata {
#ifdef CONFIG_CIFS_SMB_DIRECT
struct smbd_mr *mr;
#endif
unsigned int pagesz;
unsigned int page_offset;
unsigned int tailsz;
struct cifs_credits credits;
unsigned int nr_pages;
struct page **pages;
};

/*
Expand Down Expand Up @@ -2153,9 +2138,9 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const
dst->FileNameLength = src->FileNameLength;
}

static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
int num_rqst,
const u8 *sig)
static inline int cifs_get_num_sgs(const struct smb_rqst *rqst,
int num_rqst,
const u8 *sig)
{
unsigned int len, skip;
unsigned int nents = 0;
Expand All @@ -2169,6 +2154,20 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
* rqst[1+].rq_iov[0+] data to be encrypted/decrypted
*/
for (i = 0; i < num_rqst; i++) {
/* We really don't want a mixture of pinned and unpinned pages
* in the sglist. It's hard to keep track of which is what.
* Instead, we convert to a BVEC-type iterator higher up.
*/
if (WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter)))
return -EIO;

/* We also don't want to have any extra refs or pins
* to clean up in the sglist.
*/
if (WARN_ON_ONCE(iov_iter_extract_mode(&rqst[i].rq_iter,
FOLL_DEST_BUF)))
return -EIO;

/*
* The first rqst has a transform header where the
* first 20 bytes are not part of the encrypted blob.
Expand All @@ -2186,7 +2185,7 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
nents++;
}
}
nents += rqst[i].rq_npages;
nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX);
}
nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
return nents;
Expand All @@ -2195,9 +2194,9 @@ static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
/* We can not use the normal sg_set_buf() as we will sometimes pass a
* stack object as buf.
*/
static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
const void *buf,
unsigned int buflen)
static inline void cifs_sg_set_buf(struct sg_table *sgtable,
const void *buf,
unsigned int buflen)
{
unsigned long addr = (unsigned long)buf;
unsigned int off = offset_in_page(addr);
Expand All @@ -2207,16 +2206,17 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
do {
unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off);

sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off);
sg_set_page(&sgtable->sgl[sgtable->nents++],
vmalloc_to_page((void *)addr), len, off);

off = 0;
addr += PAGE_SIZE;
buflen -= len;
} while (buflen);
} else {
sg_set_page(sg++, virt_to_page(addr), buflen, off);
sg_set_page(&sgtable->sgl[sgtable->nents++],
virt_to_page(addr), buflen, off);
}
return sg;
}

#endif /* _CIFS_GLOB_H */
8 changes: 1 addition & 7 deletions fs/cifs/cifsproto.h
Expand Up @@ -584,10 +584,7 @@ int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid);
int cifs_async_writev(struct cifs_writedata *wdata,
void (*release)(struct kref *kref));
void cifs_writev_complete(struct work_struct *work);
struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
work_func_t complete);
struct cifs_writedata *cifs_writedata_direct_alloc(struct page **pages,
work_func_t complete);
struct cifs_writedata *cifs_writedata_alloc(work_func_t complete);
void cifs_writedata_release(struct kref *refcount);
int cifs_query_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
Expand All @@ -604,13 +601,10 @@ enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
struct cifs_aio_ctx *cifs_aio_ctx_alloc(void);
void cifs_aio_ctx_release(struct kref *refcount);
int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);

int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
void cifs_free_hash(struct shash_desc **sdesc);

void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
unsigned int *len, unsigned int *offset);
struct cifs_chan *
cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
Expand Down
13 changes: 3 additions & 10 deletions fs/cifs/cifssmb.c
Expand Up @@ -24,6 +24,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/uaccess.h>
#include "cifspdu.h"
#include "cifsfs.h"
#include "cifsglob.h"
#include "cifsacl.h"
#include "cifsproto.h"
Expand Down Expand Up @@ -1294,11 +1295,7 @@ cifs_readv_callback(struct mid_q_entry *mid)
struct TCP_Server_Info *server = tcon->ses->server;
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
.rq_offset = rdata->page_offset,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
.rq_tailsz = rdata->tailsz };
.rq_iter = rdata->iter };
struct cifs_credits credits = { .value = 1, .instance = 0 };

cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n",
Expand Down Expand Up @@ -1737,11 +1734,7 @@ cifs_async_writev(struct cifs_writedata *wdata,

rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
rqst.rq_iter = wdata->iter;

cifs_dbg(FYI, "async write at %llu %u bytes\n",
wdata->offset, wdata->bytes);
Expand Down

0 comments on commit c42cd4a

Please sign in to comment.