Skip to content

Commit

Permalink
10405 Implement ZFS sorted scans
Browse files Browse the repository at this point in the history
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Kody Kantor <kody.kantor@joyent.com>
Approved by: Dan McDonald <danmcd@joyent.com>
  • Loading branch information
tsoome committed May 13, 2019
1 parent 0250bb1 commit a3874b8
Show file tree
Hide file tree
Showing 45 changed files with 3,260 additions and 926 deletions.
10 changes: 5 additions & 5 deletions usr/src/cmd/zdb/zdb.c
Expand Up @@ -2386,8 +2386,6 @@ dump_dir(objset_t *os)
max_slot_used = object + dnode_slots - 1;
}

ASSERT3U(object_count, ==, usedobjs);

(void) printf("\n");

(void) printf(" Dnode slots:\n");
Expand All @@ -2410,6 +2408,8 @@ dump_dir(objset_t *os)
leaked_objects);
leaked_objects = 0;
}

ASSERT3U(object_count, ==, usedobjs);
}

static void
Expand Down Expand Up @@ -2964,7 +2964,7 @@ zdb_blkptr_done(zio_t *zio)
abd_free(zio->io_abd);

mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
spa->spa_load_verify_ios--;
cv_broadcast(&spa->spa_scrub_io_cv);

if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
Expand Down Expand Up @@ -3035,9 +3035,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
flags |= ZIO_FLAG_SPECULATIVE;

mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight > max_inflight)
while (spa->spa_load_verify_ios > max_inflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
spa->spa_load_verify_ios++;
mutex_exit(&spa->spa_scrub_lock);

zio_nowait(zio_read(NULL, spa, bp, abd, size,
Expand Down
131 changes: 77 additions & 54 deletions usr/src/cmd/zpool/zpool_main.c
Expand Up @@ -1692,7 +1692,7 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
(void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &c);

if (ps && ps->pss_state == DSS_SCANNING &&
if (ps != NULL && ps->pss_state == DSS_SCANNING &&
vs->vs_scan_processed != 0 && children == 0) {
(void) printf(gettext(" (%s)"),
(ps->pss_func == POOL_SCAN_RESILVER) ?
Expand Down Expand Up @@ -4707,11 +4707,13 @@ static void
print_scan_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
uint64_t elapsed, mins_left, hours_left;
uint64_t pass_exam, examined, total;
uint_t rate;
uint64_t total_secs_left;
uint64_t elapsed, secs_left, mins_left, hours_left, days_left;
uint64_t pass_scanned, scanned, pass_issued, issued, total;
uint_t scan_rate, issue_rate;
double fraction_done;
char processed_buf[7], examined_buf[7], total_buf[7], rate_buf[7];
char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
char srate_buf[7], irate_buf[7];

(void) printf(gettext(" scan: "));

Expand All @@ -4725,30 +4727,37 @@ print_scan_status(pool_scan_stat_t *ps)
start = ps->pss_start_time;
end = ps->pss_end_time;
pause = ps->pss_pass_scrub_pause;

zfs_nicenum(ps->pss_processed, processed_buf, sizeof (processed_buf));

assert(ps->pss_func == POOL_SCAN_SCRUB ||
ps->pss_func == POOL_SCAN_RESILVER);

/*
* Scan is finished or canceled.
*/
if (ps->pss_state == DSS_FINISHED) {
uint64_t minutes_taken = (end - start) / 60;
char *fmt = NULL;
total_secs_left = end - start;
days_left = total_secs_left / 60 / 60 / 24;
hours_left = (total_secs_left / 60 / 60) % 24;
mins_left = (total_secs_left / 60) % 60;
secs_left = (total_secs_left % 60);

if (ps->pss_func == POOL_SCAN_SCRUB) {
fmt = gettext("scrub repaired %s in %lluh%um with "
"%llu errors on %s");
(void) printf(gettext("scrub repaired %s "
"in %llu days %02llu:%02llu:%02llu "
"with %llu errors on %s"), processed_buf,
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
(u_longlong_t)ps->pss_errors, ctime(&end));
} else if (ps->pss_func == POOL_SCAN_RESILVER) {
fmt = gettext("resilvered %s in %lluh%um with "
"%llu errors on %s");
(void) printf(gettext("resilvered %s "
"in %llu days %02llu:%02llu:%02llu "
"with %llu errors on %s"), processed_buf,
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left,
(u_longlong_t)ps->pss_errors, ctime(&end));
}
/* LINTED */
(void) printf(fmt, processed_buf,
(u_longlong_t)(minutes_taken / 60),
(uint_t)(minutes_taken % 60),
(u_longlong_t)ps->pss_errors,
ctime((time_t *)&end));
return;
} else if (ps->pss_state == DSS_CANCELED) {
if (ps->pss_func == POOL_SCAN_SCRUB) {
Expand All @@ -4763,70 +4772,84 @@ print_scan_status(pool_scan_stat_t *ps)

assert(ps->pss_state == DSS_SCANNING);

/*
* Scan is in progress.
*/
/* Scan is in progress. Resilvers can't be paused. */
if (ps->pss_func == POOL_SCAN_SCRUB) {
if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"),
ctime(&start));
} else {
char buf[32];
struct tm *p = localtime(&pause);
(void) strftime(buf, sizeof (buf), "%a %b %e %T %Y", p);
(void) printf(gettext("scrub paused since %s\n"), buf);
(void) printf(gettext("\tscrub started on %s"),
(void) printf(gettext("scrub paused since %s"),
ctime(&pause));
(void) printf(gettext("\tscrub started on %s"),
ctime(&start));
}
} else if (ps->pss_func == POOL_SCAN_RESILVER) {
(void) printf(gettext("resilver in progress since %s"),
ctime(&start));
}

examined = ps->pss_examined ? ps->pss_examined : 1;
scanned = ps->pss_examined;
pass_scanned = ps->pss_pass_exam;
issued = ps->pss_issued;
pass_issued = ps->pss_pass_issued;
total = ps->pss_to_examine;
fraction_done = (double)examined / total;

/* elapsed time for this pass */
/* we are only done with a block once we have issued the IO for it */
fraction_done = (double)issued / total;

/* elapsed time for this pass, rounding up to 1 if it's 0 */
elapsed = time(NULL) - ps->pss_pass_start;
elapsed -= ps->pss_pass_scrub_spent_paused;
elapsed = elapsed ? elapsed : 1;
pass_exam = ps->pss_pass_exam ? ps->pss_pass_exam : 1;
rate = pass_exam / elapsed;
rate = rate ? rate : 1;
mins_left = ((total - examined) / rate) / 60;
hours_left = mins_left / 60;

zfs_nicenum(examined, examined_buf, sizeof (examined_buf));
elapsed = (elapsed != 0) ? elapsed : 1;

scan_rate = pass_scanned / elapsed;
issue_rate = pass_issued / elapsed;
total_secs_left = (issue_rate != 0) ?
((total - issued) / issue_rate) : UINT64_MAX;

days_left = total_secs_left / 60 / 60 / 24;
hours_left = (total_secs_left / 60 / 60) % 24;
mins_left = (total_secs_left / 60) % 60;
secs_left = (total_secs_left % 60);

/* format all of the numbers we will be reporting */
zfs_nicenum(scanned, scanned_buf, sizeof (scanned_buf));
zfs_nicenum(issued, issued_buf, sizeof (issued_buf));
zfs_nicenum(total, total_buf, sizeof (total_buf));
zfs_nicenum(scan_rate, srate_buf, sizeof (srate_buf));
zfs_nicenum(issue_rate, irate_buf, sizeof (irate_buf));

/*
* do not print estimated time if hours_left is more than 30 days
* or we have a paused scrub
*/
/* do not print estimated time if we have a paused scrub */
if (pause == 0) {
zfs_nicenum(rate, rate_buf, sizeof (rate_buf));
(void) printf(gettext("\t%s scanned out of %s at %s/s"),
examined_buf, total_buf, rate_buf);
if (hours_left < (30 * 24)) {
(void) printf(gettext(", %lluh%um to go\n"),
(u_longlong_t)hours_left, (uint_t)(mins_left % 60));
} else {
(void) printf(gettext(
", (scan is slow, no estimated time)\n"));
}
(void) printf(gettext("\t%s scanned at %s/s, "
"%s issued at %s/s, %s total\n"),
scanned_buf, srate_buf, issued_buf, irate_buf, total_buf);
} else {
(void) printf(gettext("\t%s scanned out of %s\n"),
examined_buf, total_buf);
(void) printf(gettext("\t%s scanned, %s issued, %s total\n"),
scanned_buf, issued_buf, total_buf);
}

if (ps->pss_func == POOL_SCAN_RESILVER) {
(void) printf(gettext(" %s resilvered, %.2f%% done\n"),
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
processed_buf, 100 * fraction_done);
} else if (ps->pss_func == POOL_SCAN_SCRUB) {
(void) printf(gettext(" %s repaired, %.2f%% done\n"),
(void) printf(gettext("\t%s repaired, %.2f%% done"),
processed_buf, 100 * fraction_done);
}

if (pause == 0) {
if (issue_rate >= 10 * 1024 * 1024) {
(void) printf(gettext(", %llu days "
"%02llu:%02llu:%02llu to go\n"),
(u_longlong_t)days_left, (u_longlong_t)hours_left,
(u_longlong_t)mins_left, (u_longlong_t)secs_left);
} else {
(void) printf(gettext(", no estimated "
"completion time\n"));
}
} else {
(void) printf(gettext("\n"));
}
}

/*
Expand Down
6 changes: 3 additions & 3 deletions usr/src/cmd/ztest/ztest.c
Expand Up @@ -397,15 +397,15 @@ ztest_info_t ztest_info[] = {
{ ztest_fzap, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
{ ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_incessant },
{ ztest_ddt_repair, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
{ ztest_mmp_enable_disable, 1, &zopt_sometimes },
{ ztest_reguid, 1, &zopt_rarely },
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_scrub, 1, &zopt_often },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_sometimes },
{ ztest_vdev_attach_detach, 1, &zopt_incessant },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1,
&ztest_opts.zo_vdevtime },
Expand Down
1 change: 1 addition & 0 deletions usr/src/lib/libfakekernel/common/mapfile-vers
Expand Up @@ -216,6 +216,7 @@ SYMBOL_VERSION SUNWprivate_1.1 {
taskq_empty;
taskq_member;
taskq_wait;
taskq_wait_id;

thread_create;
thread_join;
Expand Down
6 changes: 6 additions & 0 deletions usr/src/lib/libfakekernel/common/taskq.c
Expand Up @@ -234,6 +234,12 @@ taskq_wait(taskq_t *tq)
mutex_exit(&tq->tq_lock);
}

void
taskq_wait_id(taskq_t *tq, taskqid_t id __unused)
{
taskq_wait(tq);
}

static void *
taskq_thread(void *arg)
{
Expand Down
2 changes: 1 addition & 1 deletion usr/src/lib/libzfs/common/libzfs_status.c
Expand Up @@ -225,7 +225,7 @@ check_status(nvlist_t *config, boolean_t isimport)
*/
(void) nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_SCAN_STATS,
(uint64_t **)&ps, &psc);
if (ps && ps->pss_func == POOL_SCAN_RESILVER &&
if (ps != NULL && ps->pss_func == POOL_SCAN_RESILVER &&
ps->pss_state == DSS_SCANNING)
return (ZPOOL_STATUS_RESILVERING);

Expand Down
Expand Up @@ -26,7 +26,9 @@
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg

verify_runnable "global"

log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
destroy_mirrors
Expand Up @@ -37,11 +37,8 @@ verify_disk_count "$DISKS" 2

default_mirror_setup_noexit $DISK1 $DISK2

mntpnt=$(get_prop mountpoint $TESTPOOL)
typeset -i i=0
while ((i < 10)); do
log_must mkfile 500M $mntpnt/bigfile.$i
((i += 1))
done
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)

# Create 256M of data
log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile
log_pass
Expand Up @@ -28,5 +28,8 @@
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
#

export DISK1=$(echo $DISKS | awk '{print $1}')
export DISK1=${DISKS%% *}
export DISK2=$(echo $DISKS | awk '{print $2}')

export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
Expand Up @@ -46,23 +46,31 @@
# 6. Verify zpool scrub -s succeed when the system is scrubbing.
#
# NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the
# scrub does not complete before it has a chance to be cancelled.
# This can occur when testing with small pools or very fast hardware.
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
# low and adding a 50ms zio delay in order to ensure that the scrub does
# not complete early.
#

verify_runnable "global"

function cleanup
{
log_must zinject -c all
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
log_must rm -f $mntpnt/biggerfile
}

log_onexit cleanup

log_assert "Verify scrub, scrub -p, and scrub -s show the right status."

log_must zinject -d $DISK1 -D20:1 $TESTPOOL
# Create 1G of additional data
mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
log_must file_write -b 1048576 -c 1024 -o create -d 0 -f $mntpnt/biggerfile
log_must sync

log_must zinject -d $DISK1 -D50:1 $TESTPOOL
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_must zpool scrub -p $TESTPOOL
Expand Down
Expand Up @@ -43,23 +43,22 @@
# 2. Kick off a second scrub and verify it fails
#
# NOTES:
# A 10ms delay is added to the ZIOs in order to ensure that the
# scrub does not complete before it has a chance to be restarted.
# This can occur when testing with small pools or very fast hardware.
# Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
# low in order to ensure that the scrub does not complete early.
#

verify_runnable "global"

function cleanup
{
log_must zinject -c all
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
}

log_onexit cleanup

log_assert "Scrub command fails when there is already a scrub in progress"

log_must zinject -d $DISK1 -D10:1 $TESTPOOL
log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
log_must zpool scrub $TESTPOOL
log_must is_pool_scrubbing $TESTPOOL true
log_mustnot zpool scrub $TESTPOOL
Expand Down

0 comments on commit a3874b8

Please sign in to comment.