Skip to content

Commit

Permalink
3821 Race in rollback, zil close, and zil flush
Browse files Browse the repository at this point in the history
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Andriy Gapon <avg@FreeBSD.org>
Approved by: Richard Lowe <richlowe@richlowe.net>
  • Loading branch information
grwilson authored and ahrens committed Nov 6, 2016
1 parent 479ad5c commit 43297f9
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 10 deletions.
11 changes: 9 additions & 2 deletions usr/src/uts/common/fs/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
Expand Down Expand Up @@ -593,9 +593,16 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
{
zilog_t *zilog;

while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) {
while (zilog = txg_list_head(&dp->dp_dirty_zilogs, txg)) {
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
/*
* We don't remove the zilog from the dp_dirty_zilogs
* list until after we've cleaned it. This ensures that
* callers of zilog_is_dirty() receive an accurate
* answer when they are racing with the spa sync thread.
*/
zil_clean(zilog, txg);
(void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg);
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
Expand Down
9 changes: 7 additions & 2 deletions usr/src/uts/common/fs/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -6735,8 +6735,6 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_config_syncing = NULL;
}

spa->spa_ubsync = spa->spa_uberblock;

dsl_pool_sync_done(dp, txg);

mutex_enter(&spa->spa_alloc_lock);
Expand All @@ -6761,6 +6759,13 @@ spa_sync(spa_t *spa, uint64_t txg)

spa->spa_sync_pass = 0;

/*
* Update the last synced uberblock here. We want to do this at
* the end of spa_sync() so that consumers of spa_last_synced_txg()
* will be guaranteed that all the processing associated with
* that txg has been completed.
*/
spa->spa_ubsync = spa->spa_uberblock;
spa_config_exit(spa, SCL_CONFIG, FTAG);

spa_handle_ignored_writes(spa);
Expand Down
60 changes: 54 additions & 6 deletions usr/src/uts/common/fs/zfs/zil.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/

Expand Down Expand Up @@ -485,6 +484,27 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
}
}

/*
* Determine if the zil is dirty in the specified txg. Callers wanting to
* ensure that the dirty state does not change must hold the itxg_lock for
* the specified txg. Holding the lock will ensure that the zil cannot be
* dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current
* state.
*/
boolean_t
zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg)
{
dsl_pool_t *dp = zilog->zl_dmu_pool;

if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK))
return (B_TRUE);
return (B_FALSE);
}

/*
* Determine if the zil is dirty. The zil is considered dirty if it has
* any pending itx records that have not been cleaned by zil_clean().
*/
boolean_t
zilog_is_dirty(zilog_t *zilog)
{
Expand Down Expand Up @@ -1048,8 +1068,6 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
return (NULL);

ASSERT(lwb->lwb_buf != NULL);
ASSERT(zilog_is_dirty(zilog) ||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);

if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
dlen = P2ROUNDUP_TYPED(
Expand Down Expand Up @@ -1285,6 +1303,8 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
* this itxg. Save the itxs for release below.
* This should be rare.
*/
zfs_dbgmsg("zil_itx_assign: missed itx cleanup for "
"txg %llu", itxg->itxg_txg);
atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
itxg->itxg_sod = 0;
clean = itxg->itxg_itxs;
Expand Down Expand Up @@ -1382,6 +1402,11 @@ zil_get_commit_list(zilog_t *zilog)
else
otxg = spa_last_synced_txg(zilog->zl_spa) + 1;

/*
* This is inherently racy, since there is nothing to prevent
* the last synced txg from changing. That's okay since we'll
* only commit things in the future.
*/
for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];

Expand All @@ -1391,6 +1416,16 @@ zil_get_commit_list(zilog_t *zilog)
continue;
}

/*
* If we're adding itx records to the zl_itx_commit_list,
* then the zil better be dirty in this "txg". We can assert
* that here since we're holding the itxg_lock which will
* prevent spa_sync from cleaning it. Once we add the itxs
* to the zl_itx_commit_list we must commit it to disk even
* if it's unnecessary (i.e. the txg was synced).
*/
ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
push_sod += itxg->itxg_sod;
itxg->itxg_sod = 0;
Expand All @@ -1416,6 +1451,10 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid)
else
otxg = spa_last_synced_txg(zilog->zl_spa) + 1;

/*
* This is inherently racy, since there is nothing to prevent
* the last synced txg from changing.
*/
for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];

Expand Down Expand Up @@ -1487,8 +1526,14 @@ zil_commit_writer(zilog_t *zilog)
DTRACE_PROBE1(zil__cw1, zilog_t *, zilog);
while (itx = list_head(&zilog->zl_itx_commit_list)) {
txg = itx->itx_lr.lrc_txg;
ASSERT(txg);
ASSERT3U(txg, !=, 0);

/*
* This is inherently racy and may result in us writing
* out a log block for a txg that was just synced. This is
* ok since we'll end cleaning up that log block the next
* time we call zil_sync().
*/
if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa))
lwb = zil_lwb_commit(zilog, itx, lwb);
list_remove(&zilog->zl_itx_commit_list, itx);
Expand Down Expand Up @@ -1805,7 +1850,10 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
if (txg)
txg_wait_synced(zilog->zl_dmu_pool, txg);
ASSERT(!zilog_is_dirty(zilog));

if (zilog_is_dirty(zilog))
zfs_dbgmsg("zil (%p) is dirty, txg %llu", zilog, txg);
VERIFY(!zilog_is_dirty(zilog));

taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
Expand Down

0 comments on commit 43297f9

Please sign in to comment.