Skip to content
Permalink
Browse files

dcp: add --chunksize and --blocksize options

Add command-line options for --chunksize and --blocksize, and add
them to the dcp.1 man page, since these options now exist for both
dcp and dcp1.

Improve the usage message for these options in dcp.1 to not be
self-referential, and explain what they are actually used for.
It was otherwise unclear what the difference between them was.

Keep the default values at 1MB for now, but it probably makes
sense to make them at least the Lustre stripe size to minimize
contention between multiple threads reading/writing the same file.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
  • Loading branch information...
adilger authored and adammoody committed May 31, 2019
1 parent eb56ee3 commit e559ef151dd45fe27494e4ae311dd292baac4ba9
Showing with 76 additions and 17 deletions.
  1. +19 −4 doc/rst/dcp.1.rst
  2. +19 −3 man/dcp.1
  3. +4 −1 src/common/mfu_flist.h
  4. +1 −1 src/common/mfu_flist_copy.c
  5. +27 −2 src/dcp/dcp.c
  6. +6 −6 src/dcp1/dcp1.c
@@ -11,26 +11,41 @@ DESCRIPTION

Parallel MPI application to recursively copy files and directories.

dcp is a file copy tool in the spirit of :manpage:`cp(1)` that evenly distributes
work across a large cluster without any centralized state. It is
dcp is a file copy tool in the spirit of :manpage:`cp(1)` that evenly
distributes the work of scanning the directory tree, and copying file
data across a large cluster without any centralized state. It is
designed for copying files that are located on a distributed parallel
file system.
file system, and will split large file copies across multiple processes.

OPTIONS
-------

.. option:: -b, --blocksize SIZE

Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may
immediately follow the number without spaces (eg. 8MB). The default
blocksize is 1MB.

.. option:: -i, --input FILE

Read source list from FILE. FILE must be generated by another tool
from the mpiFileUtils suite.

.. option:: -k, --chunksize SIZE

Split large files into chunks of SIZE bytes to be processed. Multiple
process ranks may copy a large file in parallel. Units like "MB" and
"GB" can immediately follow the number without spaces (eg. 64MB).
The default chunksize is 1MB.

.. option:: -p, --preserve

Preserve permissions, group, timestamps, and extended attributes.

.. option:: -s, --synchronous

Use synchronous read/write calls (open files with 0_DIRECT)
Use synchronous read/write calls (open files with O_DIRECT).
This also avoids caching the file data on the client nodes.

.. option:: -S, --sparse

@@ -38,18 +38,34 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
Parallel MPI application to recursively copy files and directories.
.sp
dcp is a file copy tool in the spirit of \fBcp(1)\fP that evenly distributes
work across a large cluster without any centralized state. It is
designed for copying files that are located on a distributed parallel
file system.
the work of scanning the directory tree, and copuing file data across a
large cluster without any centralized state. It is designed for copying
files that are located on a distributed parallel file system, and will
split large file copies across multiple processes.
.SH OPTIONS
.INDENT 0.0
.TP
.B \-b, \-\-blocksize SIZE
Set the I/O buffer to be SIZE bytes. Units like "MB" and "GB" may
immediately follow the number without spaces (eg. 8MB). The default
blocksize is 1MB.
.UNINDENT
.INDENT 0.0
.TP
.B \-i, \-\-input FILE
Read source list from FILE. FILE must be generated by another tool
from the mpiFileUtils suite.
.UNINDENT
.INDENT 0.0
.TP
.B \-k, \-\-chunksize SIZE
Split large files into chunks of SIZE bytes to be processed. Multiple
process ranks may copy a large file in parallel. Units like "MB" and
"GB" can immediately follow the number without spaces (eg. 64MB).
The default chunksize is 1MB.
.UNINDENT
.INDENT 0.0
.TP
.B \-p, \-\-preserve
Preserve permissions, group, timestamps, and extended attributes.
.UNINDENT
@@ -59,7 +59,10 @@ extern "C" {
#define DCOPY_DEF_PERMS_FILE (S_IRUSR | S_IWUSR)
#define DCOPY_DEF_PERMS_DIR (S_IRWXU)

/* buffer size to read/write data to file system */
/* default chunk size to split files into work units */
#define FD_CHUNK_SIZE (1*1024*1024)

/* default buffer size to read/write data to file system */
#define FD_BLOCK_SIZE (1*1024*1024)

/*
@@ -2768,7 +2768,7 @@ mfu_copy_opts_t* mfu_copy_opts_new(void)
opts->sparse = false;

/* Set default chunk size */
opts->chunk_size = 1*1024*1024;
opts->chunk_size = FD_CHUNK_SIZE;

/* temporaries used during the copy operation for buffers to read/write data */
opts->block_size = FD_BLOCK_SIZE;
@@ -65,7 +65,9 @@ void print_usage(void)
#ifdef LUSTRE_SUPPORT
/* printf(" -g, --grouplock <id> - use Lustre grouplock when reading/writing file\n"); */
#endif
printf(" -b, --blocksize - IO buffer size in bytes (default 1MB)\n");
printf(" -i, --input <file> - read source list from file\n");
printf(" -k, --chunksize - work size per task in bytes (default 1MB)\n");
printf(" -p, --preserve - preserve permissions, ownership, timestamps, extended attributes\n");
printf(" -s, --synchronous - use synchronous read/write calls (O_DIRECT)\n");
printf(" -S, --sparse - create sparse files when possible\n");
@@ -109,6 +111,8 @@ int main(int argc, char** argv)

int option_index = 0;
static struct option long_options[] = {
{"blocksize" , required_argument, 0, 'b'},
{"chunksize" , required_argument, 0, 'k'},
{"debug" , required_argument, 0, 'd'}, // undocumented
{"grouplock" , required_argument, 0, 'g'}, // untested
{"input" , required_argument, 0, 'i'},
@@ -123,10 +127,11 @@ int main(int argc, char** argv)
};

/* Parse options */
unsigned long long bytes = 0;
int usage = 0;
while(1) {
int c = getopt_long(
argc, argv, "d:g:i:psSvqh",
argc, argv, "b:d:g:i:k:psSvqh",
long_options, &option_index
);

@@ -135,6 +140,16 @@ int main(int argc, char** argv)
}

switch(c) {
case 'b':
if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS || bytes == 0) {
if (rank == 0)
MFU_LOG(MFU_LOG_ERR,
"Failed to parse block size: '%s'\n", optarg);
usage = 1;
} else {
mfu_copy_opts->block_size = (size_t)bytes;
}
break;
case 'd':
if(strncmp(optarg, "fatal", 5) == 0) {
CIRCLE_debug = CIRCLE_LOG_FATAL;
@@ -182,7 +197,7 @@ int main(int argc, char** argv)
case 'g':
mfu_copy_opts->grouplock_id = atoi(optarg);
if(rank == 0) {
MFU_LOG(MFU_LOG_INFO, "groulock ID: %d.",
MFU_LOG(MFU_LOG_INFO, "grouplock ID: %d.",
mfu_copy_opts->grouplock_id);
}
break;
@@ -193,6 +208,16 @@ int main(int argc, char** argv)
MFU_LOG(MFU_LOG_INFO, "Using input list.");
}
break;
case 'k':
if (mfu_abtoull(optarg, &bytes) != MFU_SUCCESS || bytes == 0) {
if (rank == 0)
MFU_LOG(MFU_LOG_ERR,
"Failed to parse chunk size: '%s'\n", optarg);
usage = 1;
} else {
mfu_copy_opts->chunk_size = bytes;
}
break;
case 'p':
mfu_copy_opts->preserve = true;
if(rank == 0) {
@@ -268,11 +268,11 @@ void DCOPY_print_usage(void)
printf(" -d, --debug <level> - specify debug verbosity level (default info)\n");
printf(" -f, --force - delete destination file if error on open\n");
printf(" -p, --preserve - preserve permissions, ownership, timestamps, extended attributes\n");
printf(" -p, --verbose - verbose output\n");
printf(" -p, --quiet - quiet output\n");
printf(" -v, --verbose - verbose output\n");
printf(" -q, --quiet - quiet output\n");
printf(" -s, --synchronous - use synchronous read/write calls (O_DIRECT)\n");
printf(" -k, --chunksize - specify chunksize in MB unit (default 1MB)\n");
printf(" -b, --blocksize - specify blocksize in MB unit (default 1MB)\n");
printf(" -k, --chunksize - work size per task in bytes (default 1MB)\n");
printf(" -b, --blocksize - IO buffer size in bytes (default 1MB)\n");
printf(" -h, --help - print usage\n");
printf("\n");
printf("Level: dbg,info,warn,err,fatal\n");
@@ -338,12 +338,12 @@ int main(int argc, \
DCOPY_user_opts.synchronous = false;

static struct option long_options[] = {
{"compare" , no_argument , 0, 'c'},
{"blocksize" , required_argument, 0, 'b'},
{"chunksize" , required_argument, 0, 'k'},
{"compare" , no_argument , 0, 'c'},
{"debug" , required_argument, 0, 'd'},
{"force" , no_argument , 0, 'f'},
{"help" , no_argument , 0, 'h'},
{"chunksize" , required_argument, 0, 'k'},
{"preserve" , no_argument , 0, 'p'},
{"verbose" , no_argument , 0, 'v'},
{"quiet" , no_argument , 0, 'q'},

0 comments on commit e559ef1

Please sign in to comment.
You can’t perform that action at this time.