Skip to content

Commit

Permalink
6569 large file delete can starve out write ops
Browse files Browse the repository at this point in the history
Reviewed by: Matt Ahrens <mahrens@delphix.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Approved by: Dan McDonald <danmcd@omniti.com>
  • Loading branch information
alek-p authored and ahrens committed Dec 31, 2016
1 parent 7748149 commit ff5177e
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 6 deletions.
57 changes: 51 additions & 6 deletions usr/src/uts/common/fs/zfs/dmu.c
Expand Up @@ -56,6 +56,14 @@
*/
int zfs_nopwrite_enabled = 1;

/*
* Tunable to control percentage of dirtied blocks from frees in one TXG.
* After this threshold is crossed, additional dirty blocks from frees
* wait until the next TXG.
* A value of zero will disable this throttle.
*/
uint32_t zfs_per_txg_dirty_frees_percent = 30;

const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
Expand Down Expand Up @@ -717,15 +725,25 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
{
uint64_t object_size = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
int err;
uint64_t dirty_frees_threshold;
dsl_pool_t *dp = dmu_objset_pool(os);

if (offset >= object_size)
return (0);

if (zfs_per_txg_dirty_frees_percent <= 100)
dirty_frees_threshold =
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
else
dirty_frees_threshold = zfs_dirty_data_max / 4;

if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;

while (length != 0) {
uint64_t chunk_end, chunk_begin;
uint64_t chunk_end, chunk_begin, chunk_len;
uint64_t long_free_dirty_all_txgs = 0;
dmu_tx_t *tx;

if (dmu_objset_zfs_unmounting(dn->dn_objset))
return (SET_ERROR(EINTR));
Expand All @@ -739,9 +757,28 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
ASSERT3U(chunk_begin, >=, offset);
ASSERT3U(chunk_begin, <=, chunk_end);

dmu_tx_t *tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, dn->dn_object,
chunk_begin, chunk_end - chunk_begin);
chunk_len = chunk_end - chunk_begin;

mutex_enter(&dp->dp_lock);
for (int t = 0; t < TXG_SIZE; t++) {
long_free_dirty_all_txgs +=
dp->dp_long_free_dirty_pertxg[t];
}
mutex_exit(&dp->dp_lock);

/*
* To avoid filling up a TXG with just frees wait for
* the next TXG to open before freeing more chunks if
* we have reached the threshold of frees
*/
if (dirty_frees_threshold != 0 &&
long_free_dirty_all_txgs >= dirty_frees_threshold) {
txg_wait_open(dp, 0);
continue;
}

tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_len);

/*
* Mark this transaction as typically resulting in a net
Expand All @@ -753,10 +790,18 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
dmu_tx_abort(tx);
return (err);
}
dnode_free_range(dn, chunk_begin, chunk_end - chunk_begin, tx);

mutex_enter(&dp->dp_lock);
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
chunk_len;
mutex_exit(&dp->dp_lock);
DTRACE_PROBE3(free__long__range,
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
uint64_t, dmu_tx_get_txg(tx));
dnode_free_range(dn, chunk_begin, chunk_len, tx);
dmu_tx_commit(tx);

length -= chunk_end - chunk_begin;
length -= chunk_len;
}
return (0);
}
Expand Down
11 changes: 11 additions & 0 deletions usr/src/uts/common/fs/zfs/dsl_pool.c
Expand Up @@ -24,6 +24,7 @@
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/

#include <sys/dsl_pool.h>
Expand Down Expand Up @@ -492,6 +493,16 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);

/*
* Update the long range free counter after
* we're done syncing user data
*/
mutex_enter(&dp->dp_lock);
ASSERT(spa_sync_pass(dp->dp_spa) == 1 ||
dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0);
dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0;
mutex_exit(&dp->dp_lock);

/*
* After the data blocks have been written (ensured by the zio_wait()
* above), update the user/group space accounting.
Expand Down
2 changes: 2 additions & 0 deletions usr/src/uts/common/fs/zfs/sys/dsl_pool.h
Expand Up @@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/

#ifndef _SYS_DSL_POOL_H
Expand Down Expand Up @@ -103,6 +104,7 @@ typedef struct dsl_pool {
kcondvar_t dp_spaceavail_cv;
uint64_t dp_dirty_pertxg[TXG_SIZE];
uint64_t dp_dirty_total;
uint64_t dp_long_free_dirty_pertxg[TXG_SIZE];
uint64_t dp_mos_used_delta;
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;
Expand Down

0 comments on commit ff5177e

Please sign in to comment.