Skip to content

Commit

Permalink
7430 Backfill metadnode more intelligently
Browse files Browse the repository at this point in the history
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
  • Loading branch information
nedbass authored and ahrens committed Oct 22, 2016
1 parent 0e60744 commit af346df
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 11 deletions.
31 changes: 20 additions & 11 deletions usr/src/uts/common/fs/zfs/dmu_object.c
Expand Up @@ -36,33 +36,42 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
uint64_t object;
uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
(DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
dnode_t *dn = NULL;
int restarted = B_FALSE;

mutex_enter(&os->os_obj_lock);
for (;;) {
object = os->os_obj_next;
/*
* Each time we polish off an L2 bp worth of dnodes
* (2^13 objects), move to another L2 bp that's still
* reasonably sparse (at most 1/4 full). Look from the
* beginning once, but after that keep looking from here.
* If we can't find one, just keep going from here.
* Each time we polish off a L1 bp worth of dnodes (2^12
* objects), move to another L1 bp that's still reasonably
* sparse (at most 1/4 full). Look from the beginning at most
* once per txg, but after that keep looking from here.
* os_scan_dnodes is set during txg sync if enough objects
* have been freed since the previous rescan to justify
* backfilling again. If we can't find a suitable block, just
* keep going from here.
*
* Note that dmu_traverse depends on the behavior that we use
* multiple blocks of the dnode object before going back to
* reuse objects. Any change to this algorithm should preserve
* that property or find another solution to the issues
* described in traverse_visitbp.
*/
if (P2PHASE(object, L2_dnode_count) == 0) {
uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
int error = dnode_next_offset(DMU_META_DNODE(os),

if (P2PHASE(object, L1_dnode_count) == 0) {
uint64_t offset;
int error;
if (os->os_rescan_dnodes) {
offset = 0;
os->os_rescan_dnodes = B_FALSE;
} else {
offset = object << DNODE_SHIFT;
}
error = dnode_next_offset(DMU_META_DNODE(os),
DNODE_FIND_HOLE,
&offset, 2, DNODES_PER_BLOCK >> 2, 0);
restarted = B_TRUE;
if (error == 0)
object = offset >> DNODE_SHIFT;
}
Expand Down
14 changes: 14 additions & 0 deletions usr/src/uts/common/fs/zfs/dmu_objset.c
Expand Up @@ -67,6 +67,13 @@ krwlock_t os_lock;
*/
int dmu_find_threads = 0;

/*
* Backfill lower metadnode objects after this many have been freed.
* Backfilling negatively impacts object creation rates, so only do it
* if there are enough holes to fill.
*/
int dmu_rescan_dnode_threshold = 131072;

static void dmu_objset_find_dp_cb(void *arg);

void
Expand Down Expand Up @@ -1176,6 +1183,13 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
if (dr->dr_zio)
zio_nowait(dr->dr_zio);
}

/* Enable dnode backfill if enough objects have been freed. */
if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
os->os_rescan_dnodes = B_TRUE;
os->os_freed_dnodes = 0;
}

/*
* Free intent log blocks up to this tx.
*/
Expand Down
1 change: 1 addition & 0 deletions usr/src/uts/common/fs/zfs/dnode_sync.c
Expand Up @@ -672,6 +672,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}

if (freeing_dnode) {
dn->dn_objset->os_freed_dnodes++;
dnode_sync_free(dn, tx);
return;
}
Expand Down
2 changes: 2 additions & 0 deletions usr/src/uts/common/fs/zfs/sys/dmu_objset.h
Expand Up @@ -112,6 +112,8 @@ struct objset {
zil_header_t os_zil_header;
list_t os_synced_dnodes;
uint64_t os_flags;
uint64_t os_freed_dnodes;
boolean_t os_rescan_dnodes;

/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
Expand Down

0 comments on commit af346df

Please sign in to comment.