Skip to content

Commit

Permalink
Add callback for zfs_multihost_interval
Browse files Browse the repository at this point in the history
Add a callback to wake all running mmp threads when
zfs_multihost_interval is changed.

This is necessary when the interval is changed from a very large value
to a significantly lower one, while pools are imported that have the
multihost property enabled.

Without this commit, the mmp thread does not wake up and detect the new
interval until after it has waited the old multihost interval time.  A
user monitoring mmp writes via the provided kstat would be led to
believe that the changed setting did not work.

Added a test in the ZTS under mmp to verify the new functionality is
working.

Added a test to ztest which starts and stops mmp threads, and calls into
the code to signal sleeping mmp threads, to test for deadlocks or
similar locking issues.

Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Olaf Faaland <faaland1@llnl.gov>
Closes openzfs#6387
  • Loading branch information
ofaaland authored and behlendorf committed Jul 25, 2017
1 parent 60f5103 commit 0582e40
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 2 deletions.
43 changes: 43 additions & 0 deletions cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ ztest_func_t ztest_spa_create_destroy;
ztest_func_t ztest_fault_inject;
ztest_func_t ztest_ddt_repair;
ztest_func_t ztest_dmu_snapshot_hold;
ztest_func_t ztest_mmp_enable_disable;
ztest_func_t ztest_spa_rename;
ztest_func_t ztest_scrub;
ztest_func_t ztest_dsl_dataset_promote_busy;
Expand Down Expand Up @@ -375,6 +376,7 @@ ztest_info_t ztest_info[] = {
ZTI_INIT(ztest_fault_inject, 1, &zopt_sometimes),
ZTI_INIT(ztest_ddt_repair, 1, &zopt_sometimes),
ZTI_INIT(ztest_dmu_snapshot_hold, 1, &zopt_sometimes),
ZTI_INIT(ztest_mmp_enable_disable, 1, &zopt_sometimes),
ZTI_INIT(ztest_reguid, 1, &zopt_rarely),
ZTI_INIT(ztest_spa_rename, 1, &zopt_rarely),
ZTI_INIT(ztest_scrub, 1, &zopt_rarely),
Expand Down Expand Up @@ -2660,6 +2662,47 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
(void) rw_unlock(&ztest_name_lock);
}

/*
* Start and then stop the MMP threads to ensure the startup and shutdown code
* works properly. Actual protection and property-related code tested via ZTS.
*/
/* ARGSUSED */
void
ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id)
{
ztest_shared_opts_t *zo = &ztest_opts;
spa_t *spa = ztest_spa;

if (zo->zo_mmp_test)
return;

spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
mutex_enter(&spa->spa_props_lock);

if (!spa_multihost(spa)) {
spa->spa_multihost = B_TRUE;
mmp_thread_start(spa);
}

mutex_exit(&spa->spa_props_lock);
spa_config_exit(spa, SCL_CONFIG, FTAG);

txg_wait_synced(spa_get_dsl(spa), 0);
mmp_signal_all_threads();
txg_wait_synced(spa_get_dsl(spa), 0);

spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
mutex_enter(&spa->spa_props_lock);

if (spa_multihost(spa)) {
mmp_thread_stop(spa);
spa->spa_multihost = B_FALSE;
}

mutex_exit(&spa->spa_props_lock);
spa_config_exit(spa, SCL_CONFIG, FTAG);
}

/* ARGSUSED */
void
ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
Expand Down
1 change: 1 addition & 0 deletions include/sys/mmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ extern void mmp_fini(struct spa *spa);
extern void mmp_thread_start(struct spa *spa);
extern void mmp_thread_stop(struct spa *spa);
extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
extern void mmp_signal_all_threads(void);

/* Global tuning */
extern ulong_t zfs_multihost_interval;
Expand Down
50 changes: 49 additions & 1 deletion module/zfs/mmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -459,13 +459,61 @@ mmp_thread(spa_t *spa)
mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
}

/*
* Signal the MMP thread to wake it, when it is sleeping on
* its cv. Used when some module parameter has changed and
* we want the thread to know about it.
* Only signal if the pool is active and mmp thread is
* running, otherwise there is no thread to wake.
*/
static void
mmp_signal_thread(spa_t *spa)
{
mmp_thread_t *mmp = &spa->spa_mmp;

mutex_enter(&mmp->mmp_thread_lock);
if (mmp->mmp_thread)
cv_broadcast(&mmp->mmp_thread_cv);
mutex_exit(&mmp->mmp_thread_lock);
}

void
mmp_signal_all_threads(void)
{
spa_t *spa = NULL;

mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa))) {
if (spa->spa_state == POOL_STATE_ACTIVE)
mmp_signal_thread(spa);
}
mutex_exit(&spa_namespace_lock);
}

#if defined(_KERNEL) && defined(HAVE_SPL)
#include <linux/mod_compat.h>

static int
param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
{
int ret;

ret = param_set_ulong(val, kp);
if (ret < 0)
return (ret);

mmp_signal_all_threads();

return (ret);
}

/* BEGIN CSTYLED */
module_param(zfs_multihost_fail_intervals, uint, 0644);
MODULE_PARM_DESC(zfs_multihost_fail_intervals,
"Max allowed period without a successful mmp write");

module_param(zfs_multihost_interval, ulong, 0644);
module_param_call(zfs_multihost_interval, param_set_multihost_interval,
param_get_ulong, &zfs_multihost_interval, 0644);
MODULE_PARM_DESC(zfs_multihost_interval,
"Milliseconds between mmp writes to each leaf");

Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ tests = ['mmap_write_001_pos', 'mmap_read_001_pos']
[tests/functional/mmp]
tests = ['mmp_on_thread', 'mmp_on_uberblocks', 'mmp_on_off', 'mmp_interval',
'mmp_active_import', 'mmp_inactive_import', 'mmp_exported_import',
'mmp_write_uberblocks']
'mmp_write_uberblocks', 'mmp_reset_interval']

[tests/functional/mount]
tests = ['umount_001', 'umountall_001']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/mmp/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dist_pkgdata_SCRIPTS = \
mmp_inactive_import.ksh \
mmp_exported_import.ksh \
mmp_write_uberblocks.ksh \
mmp_reset_interval.ksh \
setup.ksh \
cleanup.ksh \
mmp.kshlib \
Expand Down
2 changes: 2 additions & 0 deletions tests/zfs-tests/tests/functional/mmp/mmp.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ export TXG_TIMEOUT_DEFAULT=5
export MMP_POOL=mmppool
export MMP_DIR=$TEST_BASE_DIR/mmp
export MMP_HISTORY=100
export MMP_HISTORY_OFF=0

export MMP_INTERVAL_HOUR=$((60*60*1000))
export MMP_INTERVAL_DEFAULT=1000
export MMP_INTERVAL_MIN=100

Expand Down
67 changes: 67 additions & 0 deletions tests/zfs-tests/tests/functional/mmp/mmp_reset_interval.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
# CDDL HEADER END
#

#
# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
#

# DESCRIPTION:
# Ensure that the MMP thread is notified when zfs_multihost_interval is
# reduced.
#
# STRATEGY:
# 1. Set zfs_multihost_interval to much longer than the test duration
# 2. Create a zpool and enable multihost
# 3. Verify no MMP writes occurred
# 4. Set zfs_multihost_interval to 1 second
# 5. Sleep briefly
# 6. Verify MMP writes began
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/mmp/mmp.cfg
. $STF_SUITE/tests/functional/mmp/mmp.kshlib

verify_runnable "both"

function cleanup
{
default_cleanup_noexit
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT
log_must mmp_clear_hostid
}

log_assert "mmp threads notified when zfs_multihost_interval reduced"
log_onexit cleanup

log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_HOUR
log_must mmp_set_hostid $HOSTID1

default_setup_noexit $DISK
log_must zpool set multihost=on $TESTPOOL

prev_count=$(wc -l /proc/spl/kstat/zfs/$TESTPOOL/multihost | cut -f1 -d' ')
log_must set_tunable64 zfs_multihost_interval $MMP_INTERVAL_DEFAULT

# slight delay to allow time for the mmp write to complete
sleep 1
curr_count=$(wc -l /proc/spl/kstat/zfs/$TESTPOOL/multihost | cut -f1 -d' ')

if [ $curr_count -eq $prev_count ]; then
log_fail "mmp writes did not start when zfs_multihost_interval reduced"
fi

log_pass "mmp threads notified when zfs_multihost_interval reduced"

0 comments on commit 0582e40

Please sign in to comment.