Skip to content

Commit

Permalink
7578 Fix/improve some aspects of ZIL writing.
Browse files Browse the repository at this point in the history
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Andriy Gapon <avg@FreeBSD.org>
Reviewed by: Steven Hartland <steven.hartland@multiplay.co.uk>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: Richard Elling <Richard.Elling@RichardElling.com>
Approved by: Robert Mustacchi <rm@joyent.com>
  • Loading branch information
amotin authored and Prakash Surya committed May 24, 2017
1 parent e6301a3 commit c5ee468
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 115 deletions.
1 change: 0 additions & 1 deletion usr/src/cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -1377,7 +1377,6 @@ ztest_log_write(ztest_ds_t *zd, dmu_tx_t *tx, lr_write_t *lr)
itx->itx_private = zd;
itx->itx_wr_state = write_state;
itx->itx_sync = (ztest_random(8) == 0);
itx->itx_sod += (write_state == WR_NEED_COPY ? lr->lr_length : 0);

bcopy(&lr->lr_common + 1, &itx->itx_lr + 1,
sizeof (*lr) - sizeof (lr_t));
Expand Down
1 change: 0 additions & 1 deletion usr/src/uts/common/fs/zfs/sys/zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ typedef struct itx {
void *itx_private; /* type-specific opaque data */
itx_wr_state_t itx_wr_state; /* write state */
uint8_t itx_sync; /* synchronous transaction */
uint64_t itx_sod; /* record size on disk */
uint64_t itx_oid; /* object id */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
Expand Down
20 changes: 18 additions & 2 deletions usr/src/uts/common/fs/zfs/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ extern "C" {
typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
boolean_t lwb_slog; /* lwb_blk is on SLOG device */
int lwb_nused; /* # used bytes in buffer */
int lwb_sz; /* size of block and buffer */
char *lwb_buf; /* log write buffer */
Expand All @@ -62,7 +63,6 @@ typedef struct itxs {
typedef struct itxg {
kmutex_t itxg_lock; /* lock for this structure */
uint64_t itxg_txg; /* txg for this chain */
uint64_t itxg_sod; /* total size on disk for this txg */
itxs_t *itxg_itxs; /* sync and async itxs */
} itxg_t;

Expand Down Expand Up @@ -120,7 +120,6 @@ struct zilog {
kcondvar_t zl_cv_batch[2]; /* batch condition variables */
itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */
list_t zl_itx_commit_list; /* itx list to be committed */
uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
list_t zl_lwb_list; /* in-flight log write list */
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
Expand All @@ -140,9 +139,26 @@ typedef struct zil_bp_node {
avl_node_t zn_node;
} zil_bp_node_t;

/*
* Maximum amount of write data that can be put into single log block.
*/
#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
sizeof (lr_write_t))

/*
* Maximum amount of log space we agree to waste to reduce number of
* WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
*/
#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)

/*
* Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY
* as more space efficient if we can't fit at least two log records into
* maximum sized log block.
*/
#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \
sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion usr/src/uts/common/fs/zfs/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);

extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
blkptr_t *old_bp, uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
Expand Down
37 changes: 15 additions & 22 deletions usr/src/uts/common/fs/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,20 +454,17 @@ void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
boolean_t slogging;
uintptr_t fsync_cnt;
ssize_t immediate_write_sz;

if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;

immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
? 0 : zfs_immediate_write_sz;

slogging = spa_has_slogs(zilog->zl_spa) &&
(zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
write_state = WR_INDIRECT;
else if (!spa_has_slogs(zilog->zl_spa) &&
resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
Expand All @@ -481,30 +478,26 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
while (resid) {
itx_t *itx;
lr_write_t *lr;
ssize_t len;
itx_wr_state_t wr_state = write_state;
ssize_t len = resid;

/*
* If the write would overflow the largest block then split it.
*/
if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
len = SPA_OLD_MAXBLOCKSIZE >> 1;
else
len = resid;
if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
wr_state = WR_NEED_COPY;
else if (wr_state == WR_INDIRECT)
len = MIN(blocksize - P2PHASE(off, blocksize), resid);

itx = zil_itx_create(txtype, sizeof (*lr) +
(write_state == WR_COPIED ? len : 0));
(wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
if (wr_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
write_state = WR_NEED_COPY;
wr_state = WR_NEED_COPY;
}

itx->itx_wr_state = write_state;
if (write_state == WR_NEED_COPY)
itx->itx_sod += len;
itx->itx_wr_state = wr_state;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
Expand Down
Loading

0 comments on commit c5ee468

Please sign in to comment.