Skip to content

Commit

Permalink
Debugging for zio_wait() hangs
Browse files Browse the repository at this point in the history
From the information gathered about issue openzfs#2523 it's clear that
somehow the spin lock is being damaged.  This could occur if the
spin lock is reinitialized, the memory is accidentally overwritten,
or freed back to the cache to soon.  To determine exactly what is
happening this patch adds a couple new sanity tests.

* A zio->io_magic field is added before the io_lock.  This field
  is designed to act as a red zone allow us to detect if the zio
  has been written.

* The zio->io_magic field is also used to detect if somehow the
  constructor or destructor is running multiple for the object.
  This would effectively cause the spin lock to be reinitialized.

* The destructor has been updated to poison the entire structure.
  This should cause us to quickly detect any use-after-free bugs.

Once the root cause of this issue can be determined this patch
should be reverted.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue openzfs#2523
  • Loading branch information
behlendorf committed Sep 11, 2014
1 parent 52dd454 commit 6e47c4b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 4 deletions.
6 changes: 6 additions & 0 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ extern "C" {
*/
#define ZEC_MAGIC 0x210da7ab10c7a11ULL

/*
* Debug zio redzone magic
*/
#define ZIO_MAGIC 0x210210210210210ULL

typedef struct zio_eck {
uint64_t zec_magic; /* for validation, endianness */
zio_cksum_t zec_cksum; /* 256-bit checksum */
Expand Down Expand Up @@ -445,6 +450,7 @@ struct zio {
zio_gang_node_t *io_gang_tree;
void *io_executor;
void *io_waiter;
uint64_t io_magic;
kmutex_t io_lock;
kcondvar_t io_cv;

Expand Down
24 changes: 20 additions & 4 deletions module/zfs/zio.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ zio_cons(void *arg, void *unused, int kmflag)
{
zio_t *zio = arg;

/* Verify the constructor never runs twice on the same object. */
VERIFY3U(zio->io_magic, !=, ZIO_MAGIC);
bzero(zio, sizeof (zio_t));
zio->io_magic = ZIO_MAGIC;

mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
Expand All @@ -120,6 +123,14 @@ zio_dest(void *arg, void *unused)
cv_destroy(&zio->io_cv);
list_destroy(&zio->io_parent_list);
list_destroy(&zio->io_child_list);

/*
* Verify the destructor never runs twice on the same object, and
* poison the object to catch any use-after-free type of bugs.
*/
VERIFY3U(zio->io_magic, !=, ~ZIO_MAGIC);
memset(zio, 'z', sizeof (zio_t));
zio->io_magic = ~ZIO_MAGIC;
}

void
Expand Down Expand Up @@ -553,6 +564,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(vd || stage == ZIO_STAGE_OPEN);

zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE);
VERIFY3U(zio->io_magic, ==, ZIO_MAGIC);

if (vd != NULL)
zio->io_child_type = ZIO_CHILD_VDEV;
Expand Down Expand Up @@ -647,6 +659,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
static void
zio_destroy(zio_t *zio)
{
VERIFY3U(zio->io_magic, ==, ZIO_MAGIC);
kmem_cache_free(zio_cache, zio);
}

Expand Down Expand Up @@ -1007,7 +1020,7 @@ zio_flush(zio_t *zio, vdev_t *vd)
void
zio_shrink(zio_t *zio, uint64_t size)
{
ASSERT(zio->io_executor == NULL);
VERIFY(zio->io_executor == NULL);
ASSERT(zio->io_orig_size == zio->io_size);
ASSERT(size <= zio->io_size);

Expand Down Expand Up @@ -1380,6 +1393,7 @@ __zio_execute(zio_t *zio)
ASSERT(!MUTEX_HELD(&zio->io_lock));
ASSERT(ISP2(stage));
ASSERT(zio->io_stall == NULL);
VERIFY3U(zio->io_magic, ==, ZIO_MAGIC);

do {
stage <<= 1;
Expand Down Expand Up @@ -1443,15 +1457,17 @@ zio_wait(zio_t *zio)
int error;

ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
ASSERT(zio->io_executor == NULL);
VERIFY(zio->io_executor == NULL);

zio->io_waiter = curthread;

__zio_execute(zio);

mutex_enter(&zio->io_lock);
while (zio->io_executor != NULL)
while (zio->io_executor != NULL) {
VERIFY3U(zio->io_magic, ==, ZIO_MAGIC);
cv_wait_io(&zio->io_cv, &zio->io_lock);
}
mutex_exit(&zio->io_lock);

error = zio->io_error;
Expand All @@ -1463,7 +1479,7 @@ zio_wait(zio_t *zio)
void
zio_nowait(zio_t *zio)
{
ASSERT(zio->io_executor == NULL);
VERIFY(zio->io_executor == NULL);

if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
zio_unique_parent(zio) == NULL) {
Expand Down

0 comments on commit 6e47c4b

Please sign in to comment.