Skip to content

Commit

Permalink
3145 single-copy arc
Browse files Browse the repository at this point in the history
3212 ztest: race condition between vdev_online() and spa_vdev_remove()
Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Eric Schrock <eric.schrock@delphix.com>
Reviewed by: Justin T. Gibbs <gibbs@scsiguy.com>
Approved by: Eric Schrock <eric.schrock@delphix.com>
  • Loading branch information
grwilson committed Sep 28, 2012
1 parent c13be35 commit 9253d63
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 4 deletions.
3 changes: 2 additions & 1 deletion usr/src/cmd/mdb/common/modules/zfs/zfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,8 @@ arc_print(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
const char *suffix;

static const char *bytestats[] = {
"p", "c", "c_min", "c_max", "size", NULL
"p", "c", "c_min", "c_max", "size", "duplicate_buffers_size",
NULL
};

static const char *extras[] = {
Expand Down
11 changes: 11 additions & 0 deletions usr/src/cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -4695,7 +4695,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
if (islog)
(void) rw_unlock(&ztest_name_lock);
} else {
/*
* Ideally we would like to be able to randomly
* call vdev_[on|off]line without holding locks
* to force unpredictable failures but the side
* effects of vdev_[on|off]line prevent us from
* doing so. We grab the ztest_vdev_lock here to
* prevent a race between injection testing and
* aux_vdev removal.
*/
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
(void) vdev_online(spa, guid0, 0, NULL);
VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
}
}

Expand Down
86 changes: 84 additions & 2 deletions usr/src/uts/common/fs/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ uint64_t zfs_arc_meta_limit = 0;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
int zfs_disable_dup_eviction = 0;

/*
* Note that buffers can be in one of 6 states:
Expand Down Expand Up @@ -290,6 +291,9 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_size;
kstat_named_t arcstat_l2_hdr_size;
kstat_named_t arcstat_memory_throttle_count;
kstat_named_t arcstat_duplicate_buffers;
kstat_named_t arcstat_duplicate_buffers_size;
kstat_named_t arcstat_duplicate_reads;
} arc_stats_t;

static arc_stats_t arc_stats = {
Expand Down Expand Up @@ -345,7 +349,10 @@ static arc_stats_t arc_stats = {
{ "l2_io_error", KSTAT_DATA_UINT64 },
{ "l2_size", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
{ "memory_throttle_count", KSTAT_DATA_UINT64 }
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
{ "duplicate_buffers", KSTAT_DATA_UINT64 },
{ "duplicate_buffers_size", KSTAT_DATA_UINT64 },
{ "duplicate_reads", KSTAT_DATA_UINT64 }
};

#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
Expand Down Expand Up @@ -1360,6 +1367,17 @@ arc_buf_clone(arc_buf_t *from)
hdr->b_buf = buf;
arc_get_data_buf(buf);
bcopy(from->b_data, buf->b_data, size);

/*
* This buffer already exists in the arc so create a duplicate
* copy for the caller. If the buffer is associated with user data
* then track the size and number of duplicates. These stats will be
* updated as duplicate buffers are created and destroyed.
*/
if (hdr->b_type == ARC_BUFC_DATA) {
ARCSTAT_BUMP(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
}
hdr->b_datacnt += 1;
return (buf);
}
Expand Down Expand Up @@ -1458,6 +1476,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all)
ASSERT3U(state->arcs_size, >=, size);
atomic_add_64(&state->arcs_size, -size);
buf->b_data = NULL;

/*
* If we're destroying a duplicate buffer make sure
* that the appropriate statistics are updated.
*/
if (buf->b_hdr->b_datacnt > 1 &&
buf->b_hdr->b_type == ARC_BUFC_DATA) {
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
}
ASSERT(buf->b_hdr->b_datacnt > 0);
buf->b_hdr->b_datacnt -= 1;
}
Expand Down Expand Up @@ -1641,6 +1669,48 @@ arc_buf_size(arc_buf_t *buf)
return (buf->b_hdr->b_size);
}

/*
* Called from the DMU to determine if the current buffer should be
* evicted. In order to ensure proper locking, the eviction must be initiated
* from the DMU. Return true if the buffer is associated with user data and
* duplicate buffers still exist.
*/
boolean_t
arc_buf_eviction_needed(arc_buf_t *buf)
{
arc_buf_hdr_t *hdr;
boolean_t evict_needed = B_FALSE;

if (zfs_disable_dup_eviction)
return (B_FALSE);

mutex_enter(&buf->b_evict_lock);
hdr = buf->b_hdr;
if (hdr == NULL) {
/*
* We are in arc_do_user_evicts(); let that function
* perform the eviction.
*/
ASSERT(buf->b_data == NULL);
mutex_exit(&buf->b_evict_lock);
return (B_FALSE);
} else if (buf->b_data == NULL) {
/*
* We have already been added to the arc eviction list;
* recommend eviction.
*/
ASSERT3P(hdr, ==, &arc_eviction_hdr);
mutex_exit(&buf->b_evict_lock);
return (B_TRUE);
}

if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
evict_needed = B_TRUE;

mutex_exit(&buf->b_evict_lock);
return (evict_needed);
}

/*
* Evict buffers from list until we've removed the specified number of
* bytes. Move the removed buffers to the appropriate evict state.
Expand Down Expand Up @@ -2626,8 +2696,10 @@ arc_read_done(zio_t *zio)
abuf = buf;
for (acb = callback_list; acb; acb = acb->acb_next) {
if (acb->acb_done) {
if (abuf == NULL)
if (abuf == NULL) {
ARCSTAT_BUMP(arcstat_duplicate_reads);
abuf = arc_buf_clone(buf);
}
acb->acb_buf = abuf;
abuf = NULL;
}
Expand Down Expand Up @@ -3166,6 +3238,16 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT3U(*size, >=, hdr->b_size);
atomic_add_64(size, -hdr->b_size);
}

/*
* We're releasing a duplicate user data buffer, update
* our statistics accordingly.
*/
if (hdr->b_type == ARC_BUFC_DATA) {
ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
ARCSTAT_INCR(arcstat_duplicate_buffers_size,
-hdr->b_size);
}
hdr->b_datacnt -= 1;
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
Expand Down
19 changes: 18 additions & 1 deletion usr/src/uts/common/fs/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2089,7 +2089,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
if (!DBUF_IS_CACHEABLE(db))

/*
* A dbuf will be eligible for eviction if either the
* 'primarycache' property is set or a duplicate
* copy of this buffer is already cached in the arc.
*
* In the case of the 'primarycache' a buffer
* is considered for eviction if it matches the
* criteria set in the property.
*
* To decide if our buffer is considered a
* duplicate, we must call into the arc to determine
* if multiple buffers are referencing the same
* block on-disk. If so, then we simply evict
* ourselves.
*/
if (!DBUF_IS_CACHEABLE(db) ||
arc_buf_eviction_needed(db->db_buf))
dbuf_clear(db);
else
mutex_exit(&db->db_mtx);
Expand Down
1 change: 1 addition & 0 deletions usr/src/uts/common/fs/zfs/sys/arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
Expand Down

0 comments on commit 9253d63

Please sign in to comment.