Skip to content

Commit

Permalink
Illumos 4970-4974 - extreme rewind enhancements
Browse files Browse the repository at this point in the history
4970 need controls on i/o issued by zpool import -XF
4971 zpool import -T should accept hex values
4972 zpool import -T implies extreme rewind, and thus a scrub
4973 spa_load_retry retries the same txg
4974 spa_load_verify() reads all data twice
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>

References:
  https://www.illumos.org/issues/4970
  https://www.illumos.org/issues/4971
  https://www.illumos.org/issues/4972
  https://www.illumos.org/issues/4973
  https://www.illumos.org/issues/4974
  illumos/illumos-gate@e42d205

Notes:
    This set of patches adds a set of tunable parameters for the
    "extreme rewind" mode of pool import which allows control over
    the traversal performed during such an import.

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes openzfs#2598
  • Loading branch information
ahrens authored and behlendorf committed Aug 26, 2014
1 parent 49ddb31 commit dea377c
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 15 deletions.
4 changes: 2 additions & 2 deletions cmd/zpool/zpool_main.c
Expand Up @@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
*/
Expand Down Expand Up @@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)

case 'T':
errno = 0;
txg = strtoull(optarg, &endptr, 10);
txg = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0') {
(void) fprintf(stderr,
gettext("invalid txg value\n"));
Expand Down
46 changes: 46 additions & 0 deletions man/man5/zfs-module-parameters.5
Expand Up @@ -230,6 +230,52 @@ they operate close to quota or capacity limits.
Default value: 24
.RE

.sp
.ne 2
.na
\fBspa_load_verify_data\fR (int)
.ad
.RS 12n
Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
import. Use 0 to disable and 1 to enable.

An extreme rewind import normally performs a full traversal of all
blocks in the pool for verification. If this parameter is set to 0,
the traversal skips non-metadata blocks. It can be toggled once the
import has started to stop or start the traversal of non-metadata blocks.
.sp
Default value: 1
.RE

.sp
.ne 2
.na
\fBspa_load_verify_metadata\fR (int)
.ad
.RS 12n
Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
pool import. Use 0 to disable and 1 to enable.

An extreme rewind import normally performs a full traversal of all
blocks in the pool for verification. If this parameter is set to 1,
the traversal is not performed. It can be toggled once the import has
started to stop or start the traversal.
.sp
Default value: 1
.RE

.sp
.ne 2
.na
\fBspa_load_verify_maxinflight\fR (int)
.ad
.RS 12n
Maximum concurrent I/Os during the traversal performed during an "extreme
rewind" (\fB-X\fR) pool import.
.sp
Default value: 10000
.RE

.sp
.ne 2
.na
Expand Down
79 changes: 66 additions & 13 deletions module/zfs/spa.c
Expand Up @@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
spa_load_error_t *sle = zio->io_private;
dmu_object_type_t type = BP_GET_TYPE(bp);
int error = zio->io_error;
spa_t *spa = zio->io_spa;

if (error) {
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
Expand All @@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
atomic_add_64(&sle->sle_data_count, 1);
}
zio_data_buf_free(zio->io_data, zio->io_size);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
cv_broadcast(&spa->spa_scrub_io_cv);
mutex_exit(&spa->spa_scrub_lock);
}

/*
* Maximum number of concurrent scrub i/os to create while verifying
* a pool while importing it.
*/
int spa_load_verify_maxinflight = 10000;
int spa_load_verify_metadata = B_TRUE;
int spa_load_verify_data = B_TRUE;

/*ARGSUSED*/
static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
zio_t *rio;
size_t size;
void *data;

zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
}
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
/*
* Note: normally this routine will not be called if
* spa_load_verify_metadata is not set. However, it may be useful
* to manually set the flag after the traversal has begun.
*/
if (!spa_load_verify_metadata)
return (0);
if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
return (0);

rio = arg;
size = BP_GET_PSIZE(bp);
data = zio_data_buf_alloc(size);

mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
return (0);
}

Expand All @@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
spa_load_error_t sle = { 0 };
zpool_rewind_policy_t policy;
boolean_t verify_ok = B_FALSE;
int error;
int error = 0;

zpool_get_rewind_policy(spa->spa_config, &policy);

Expand All @@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);

error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
if (spa_load_verify_metadata) {
error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
spa_load_verify_cb, rio);
}

(void) zio_wait(rio);

Expand Down Expand Up @@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_unload(spa);
spa_deactivate(spa);

spa->spa_load_max_txg--;
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;

spa_activate(spa, mode);
spa_async_suspend(spa);
Expand Down Expand Up @@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
spa->spa_load_max_txg = max_request;
if (max_request != UINT64_MAX)
spa->spa_extreme_rewind = B_TRUE;
}

load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
Expand Down Expand Up @@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);
#endif

#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(spa_load_verify_maxinflight, int, 0644);
MODULE_PARM_DESC(spa_load_verify_maxinflight,
"Max concurrent traversal I/Os while verifying pool during import -X");

module_param(spa_load_verify_metadata, int, 0644);
MODULE_PARM_DESC(spa_load_verify_metadata,
"Set to traverse metadata on pool import");

module_param(spa_load_verify_data, int, 0644);
MODULE_PARM_DESC(spa_load_verify_data,
"Set to traverse data on pool import");
#endif

0 comments on commit dea377c

Please sign in to comment.