Skip to content

Commit

Permalink
Fix lock order inversion with zvol_open()
Browse files Browse the repository at this point in the history
zfsonlinux issue openzfs#3681 - lock order inversion between zvol_open() and
dsl_pool_sync()...zvol_rename_minors()

Remove trylock of spa_namespace_lock as it is no longer needed when
zvol minor operations are performed in a separate context with no
prior locking state; the spa_namespace_lock is no longer held
when bdev->bd_mutex or zfs_state_lock might be taken in the code
paths originating from the zvol minor operation callbacks.

Signed-off-by: Boris Protopopov <boris.protopopov@actifio.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes openzfs#3681
  • Loading branch information
bprotopopov authored and behlendorf committed Mar 10, 2016
1 parent a0bd735 commit 1ee159f
Showing 1 changed file with 20 additions and 43 deletions.
63 changes: 20 additions & 43 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
Expand Up @@ -957,56 +957,27 @@ zvol_first_open(zvol_state_t *zv)
{
objset_t *os;
uint64_t volsize;
int locked = 0;
int error;
uint64_t ro;

/*
* In all other cases the spa_namespace_lock is taken before the
* bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
* function calls fops->open() with the bdev->bd_mutex lock held.
*
* To avoid a potential lock inversion deadlock we preemptively
* try to take the spa_namespace_lock(). Normally it will not
* be contended and this is safe because spa_open_common() handles
* the case where the caller already holds the spa_namespace_lock.
*
* When it is contended we risk a lock inversion if we were to
* block waiting for the lock. Luckily, the __blkdev_get()
* function allows us to return -ERESTARTSYS which will result in
* bdev->bd_mutex being dropped, reacquired, and fops->open() being
* called again. This process can be repeated safely until both
* locks are acquired.
*/
if (!mutex_owned(&spa_namespace_lock)) {
locked = mutex_tryenter(&spa_namespace_lock);
if (!locked)
return (-SET_ERROR(ERESTARTSYS));
}

error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
if (error)
goto out_mutex;

/* lie and say we're read-only */
error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
if (error)
goto out_mutex;
return (SET_ERROR(-error));

zv->zv_objset = os;

error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
if (error)
goto out_owned;

error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
if (error) {
dmu_objset_disown(os, zvol_tag);
zv->zv_objset = NULL;
goto out_mutex;
}
if (error)
goto out_owned;

zv->zv_objset = os;
error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
if (error) {
dmu_objset_disown(os, zvol_tag);
zv->zv_objset = NULL;
goto out_mutex;
}
if (error)
goto out_owned;

set_capacity(zv->zv_disk, volsize >> 9);
zv->zv_volsize = volsize;
Expand All @@ -1021,9 +992,11 @@ zvol_first_open(zvol_state_t *zv)
zv->zv_flags &= ~ZVOL_RDONLY;
}

out_mutex:
if (locked)
mutex_exit(&spa_namespace_lock);
out_owned:
if (error) {
dmu_objset_disown(os, zvol_tag);
zv->zv_objset = NULL;
}

return (SET_ERROR(-error));
}
Expand Down Expand Up @@ -1526,6 +1499,8 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
{
const char *name = (const char *)arg;

ASSERT0(MUTEX_HELD(&spa_namespace_lock));

/* skip the designated dataset */
if (name && strcmp(dsname, name) == 0)
return (0);
Expand All @@ -1550,6 +1525,8 @@ zvol_create_minors_cb(const char *dsname, void *arg)
uint64_t snapdev;
int error;

ASSERT0(MUTEX_HELD(&spa_namespace_lock));

error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
if (error)
return (0);
Expand Down

0 comments on commit 1ee159f

Please sign in to comment.