Skip to content

Commit b98f85d

Browse files
committed
fix l2arc compression buffers leak
We have observed that arc_release() can be called concurrently with a l2arc in-flight write. Also, we have observed that arc_hdr_destroy() can be called from arc_write_done() for a zio with ZIO_FLAG_IO_REWRITE flag in similar circumstances. Previously the l2arc headers would be freed while leaking their associated compression buffers. Now the buffers are placed on l2arc_free_on_write list for delayed freeing. This is similar to what was already done to arc buffers that were supposed to be freed concurrently with in-flight writes of those buffers. In addition to fixing the discovered leaks this change also adds some protective code to assert that a compression buffer associated with a l2arc header is never leaked. A new kstat l2_cdata_free_on_write is added. It keeps a count of delayed compression buffer frees which previously would have been leaks. Tested by: Vitalij Satanivskij <satan@ukr.net> et al Requested by: many MFC after: 2 weeks Sponsored by: HybridCluster / ClusterHQ
1 parent 8819c14 commit b98f85d

File tree

1 file changed

+55
-10
lines changed
  • sys/cddl/contrib/opensolaris/uts/common/fs/zfs

1 file changed

+55
-10
lines changed

sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ typedef struct arc_stats {
387387
kstat_named_t arcstat_l2_evict_lock_retry;
388388
kstat_named_t arcstat_l2_evict_reading;
389389
kstat_named_t arcstat_l2_free_on_write;
390+
kstat_named_t arcstat_l2_cdata_free_on_write;
390391
kstat_named_t arcstat_l2_abort_lowmem;
391392
kstat_named_t arcstat_l2_cksum_bad;
392393
kstat_named_t arcstat_l2_io_error;
@@ -464,6 +465,7 @@ static arc_stats_t arc_stats = {
464465
{ "l2_evict_lock_retry", KSTAT_DATA_UINT64 },
465466
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
466467
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
468+
{ "l2_cdata_free_on_write", KSTAT_DATA_UINT64 },
467469
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
468470
{ "l2_cksum_bad", KSTAT_DATA_UINT64 },
469471
{ "l2_io_error", KSTAT_DATA_UINT64 },
@@ -1655,6 +1657,21 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag)
16551657
data, metadata, hits);
16561658
}
16571659

1660+
static void
1661+
arc_buf_free_on_write(void *data, size_t size,
1662+
void (*free_func)(void *, size_t))
1663+
{
1664+
l2arc_data_free_t *df;
1665+
1666+
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
1667+
df->l2df_data = data;
1668+
df->l2df_size = size;
1669+
df->l2df_func = free_func;
1670+
mutex_enter(&l2arc_free_on_write_mtx);
1671+
list_insert_head(l2arc_free_on_write, df);
1672+
mutex_exit(&l2arc_free_on_write_mtx);
1673+
}
1674+
16581675
/*
16591676
* Free the arc data buffer. If it is an l2arc write in progress,
16601677
* the buffer is placed on l2arc_free_on_write to be freed later.
@@ -1665,14 +1682,7 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
16651682
arc_buf_hdr_t *hdr = buf->b_hdr;
16661683

16671684
if (HDR_L2_WRITING(hdr)) {
1668-
l2arc_data_free_t *df;
1669-
df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
1670-
df->l2df_data = buf->b_data;
1671-
df->l2df_size = hdr->b_size;
1672-
df->l2df_func = free_func;
1673-
mutex_enter(&l2arc_free_on_write_mtx);
1674-
list_insert_head(l2arc_free_on_write, df);
1675-
mutex_exit(&l2arc_free_on_write_mtx);
1685+
arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
16761686
ARCSTAT_BUMP(arcstat_l2_free_on_write);
16771687
} else {
16781688
free_func(buf->b_data, hdr->b_size);
@@ -1683,6 +1693,23 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
16831693
* Free up buf->b_data and if 'remove' is set, then pull the
16841694
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
16851695
*/
1696+
static void
1697+
arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
1698+
{
1699+
l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
1700+
1701+
ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
1702+
1703+
if (l2hdr->b_tmp_cdata == NULL)
1704+
return;
1705+
1706+
ASSERT(HDR_L2_WRITING(hdr));
1707+
arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
1708+
zio_data_buf_free);
1709+
ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
1710+
l2hdr->b_tmp_cdata = NULL;
1711+
}
1712+
16861713
static void
16871714
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
16881715
{
@@ -1782,6 +1809,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
17821809
trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
17831810
hdr->b_size, 0);
17841811
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
1812+
arc_buf_l2_cdata_free(hdr);
17851813
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
17861814
ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
17871815
vdev_space_update(l2hdr->b_dev->l2ad_vdev,
@@ -3675,6 +3703,7 @@ arc_release(arc_buf_t *buf, void *tag)
36753703
l2hdr = hdr->b_l2hdr;
36763704
if (l2hdr) {
36773705
mutex_enter(&l2arc_buflist_mtx);
3706+
arc_buf_l2_cdata_free(hdr);
36783707
hdr->b_l2hdr = NULL;
36793708
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
36803709
}
@@ -4964,6 +4993,11 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
49644993
ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
49654994
bytes_evicted += abl2->b_asize;
49664995
ab->b_l2hdr = NULL;
4996+
/*
4997+
* We are destroying l2hdr, so ensure that
4998+
* its compressed buffer, if any, is not leaked.
4999+
*/
5000+
ASSERT(abl2->b_tmp_cdata == NULL);
49675001
kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
49685002
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
49695003
}
@@ -5202,6 +5236,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
52025236
buf_data = l2hdr->b_tmp_cdata;
52035237
buf_sz = l2hdr->b_asize;
52045238

5239+
/*
5240+
* If the data has not been compressed, then clear b_tmp_cdata
5241+
* to make sure that it points only to a temporary compression
5242+
* buffer.
5243+
*/
5244+
if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
5245+
l2hdr->b_tmp_cdata = NULL;
5246+
52055247
/* Compression may have squashed the buffer to zero length. */
52065248
if (buf_sz != 0) {
52075249
uint64_t buf_p_sz;
@@ -5392,15 +5434,18 @@ l2arc_release_cdata_buf(arc_buf_hdr_t *ab)
53925434
{
53935435
l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
53945436

5395-
if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
5437+
ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
5438+
if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
53965439
/*
53975440
* If the data was compressed, then we've allocated a
53985441
* temporary buffer for it, so now we need to release it.
53995442
*/
54005443
ASSERT(l2hdr->b_tmp_cdata != NULL);
54015444
zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
5445+
l2hdr->b_tmp_cdata = NULL;
5446+
} else {
5447+
ASSERT(l2hdr->b_tmp_cdata == NULL);
54025448
}
5403-
l2hdr->b_tmp_cdata = NULL;
54045449
}
54055450

54065451
/*

0 commit comments

Comments
 (0)