Skip to content

Commit

Permalink
Fix send/recv lost spill block
Browse files Browse the repository at this point in the history
When receiving an object in a send stream the receive_object()
function must determine if it is an existing or new object.  This
is normally straight forward since that object number will usually
not be allocated, and therefore it must be a new object.

However, when the object exists there are two possible scenarios.

1) The object may have been freed and an entirely new object
   allocated.  In which case it needs to be reallocated to free
   any attached spill block and to set the new attributes (i.e.
   block size, bonus size, etc).  Or,

2) The object's attributes, like block size, we're modified at
   the source but it is the same original object.  In which case
   only those attributes should be updated, and everything else
   preserved.

The issue is that this determination is accomplished using a set
of heuristics from the OBJECT record.  Unfortunately, these fields
aren't sufficient to always distinguish between these two cases.

The result of which is that a change in the objects block size will
result it in being reallocated.  As part of this reallocation any
spill block associated with the object will be freed.

When performing a normal send/recv this issue will most likely
manifest itself as a file with missing xattrs.  This is because
when the xattr=sa property is set the xattrs can be stored in
this lost spill block.

If this issue occurs when performing a raw send then the missing
spill block will trigger an authentication error.  This error will
prevent the receiving side for accessing the damaged dnode block.
Furthermore, if first dnode block is damaged in this way it will
make it impossible to mount the received snapshot.

This change resolves the issue by updating the sender to always
include a SPILL record for each OBJECT record with a spill block.
This allows the missing spill block to be recreated if it's freed
during receive_object().

The major advantage of this approach is that it is backwards
compatible with existing versions of 'zfs receive'.   This means
there's no need to add an incompatible feature flag which is only
understood by the latest versions.  Older versions of the software
which already know how to handle spill blocks will do the right thing.

The downside to this approach is that it can increases the size of
the stream due to the additional spill blocks.  Additionally, since
new spill blocks will be written the received snapshot will consume
more capacity.  These drawbacks can be largely mitigated by using
the large dnode feature which reduces the need for spill blocks.

Both the send_realloc_files and send_realloc_encrypted_files ZTS
test cases were updated to create xattrs in order to force spill
blocks.  As part of validating an incremental receive the contents
of all received xattrs are verified against the source snapshot.

OpenZFS-issue: https://www.illumos.org/issues/9952
FreeBSD-issue: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=233277

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue openzfs#6224
  • Loading branch information
behlendorf committed Apr 24, 2019
1 parent 17cbc2e commit a436e48
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 20 deletions.
1 change: 1 addition & 0 deletions include/sys/dmu_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ typedef struct dmu_sendarg {
objset_t *dsa_os;
zio_cksum_t dsa_zc;
uint64_t dsa_toguid;
uint64_t dsa_fromtxg;
int dsa_err;
dmu_pendop_t dsa_pending_op;
uint64_t dsa_featureflags;
Expand Down
4 changes: 2 additions & 2 deletions include/sys/dnode.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ typedef struct dnode_phys {
};
} dnode_phys_t;

#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
#define DN_SPILL_BLKPTR(dnp) ((blkptr_t *)((char *)(dnp) + \
(((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT)))

struct dnode {
/*
Expand Down
2 changes: 1 addition & 1 deletion module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2466,7 +2466,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(db->db_level == 0);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
ASSERT(arc_buf_lsize(buf) == db->db.db_size);
ASSERT3U(arc_buf_lsize(buf), ==, db->db.db_size);
ASSERT(tx->tx_txg != 0);

arc_return_buf(buf, db);
Expand Down
9 changes: 8 additions & 1 deletion module/zfs/dmu_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1699,9 +1699,16 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
return (err);
}

if (db_spill->db_size < drrs->drr_length)
/*
* Spill blocks may both grow and shrink. When a change in size
* occurs any existing dbuf must be updated to match the logical
* size of the provided arc_buf_t.
*/
if (db_spill->db_size != drrs->drr_length) {
dmu_buf_will_fill(db_spill, tx);
VERIFY(0 == dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
}

if (rwa->byteswap && !arc_is_encrypted(abuf) &&
arc_get_compression(abuf) == ZIO_COMPRESS_OFF) {
Expand Down
25 changes: 25 additions & 0 deletions module/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ typedef struct dump_bytes_io {
int dbi_len;
} dump_bytes_io_t;

static int do_dump(dmu_sendarg_t *dsa, struct send_block_record *data);

static void
dump_bytes_cb(void *arg)
{
Expand Down Expand Up @@ -594,8 +596,30 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object,
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0)
return (SET_ERROR(EINTR));

/*
* Send the spill block if it existed before the fromtxg. This is
* required to ensure that the spill block is recreated if the dnode
* is reallocated on the receiving side. This may occur when certain
* attributes of the dnode change (block size, bonus size, etc).
*/
if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
(DN_SPILL_BLKPTR(dnp)->blk_birth <= dsp->dsa_fromtxg)) {
struct send_block_record record;

bzero(&record, sizeof (struct send_block_record));
record.eos_marker = B_FALSE;
record.bp = *DN_SPILL_BLKPTR(dnp);
SET_BOOKMARK(&(record.zb), dmu_objset_id(dsp->dsa_os),
object, 0, DMU_SPILL_BLKID);

if (do_dump(dsp, &record) != 0)
return (SET_ERROR(EINTR));
}

if (dsp->dsa_err != 0)
return (SET_ERROR(EINTR));

return (0);
}

Expand Down Expand Up @@ -1084,6 +1108,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
dsp->dsa_os = os;
dsp->dsa_off = off;
dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_fromtxg = fromtxg;
dsp->dsa_pending_op = PENDING_NONE;
dsp->dsa_featureflags = featureflags;
dsp->dsa_resume_object = resumeobj;
Expand Down
4 changes: 2 additions & 2 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -806,8 +806,8 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_hierarchy',
'send_encrypted_props', 'send_encrypted_truncated_files',
'send_freeobjects', 'send_realloc_dnode_size', 'send_realloc_files',
'send_realloc_encrypted_files', 'send_holds', 'send_hole_birth',
'send_mixed_raw', 'send-wDR_encrypted_zvol']
'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
'send_hole_birth', 'send_mixed_raw', 'send-wDR_encrypted_zvol']
tags = ['functional', 'rsend']

[tests/functional/scrub_mirror]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/rsend/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ dist_pkgdata_SCRIPTS = \
send_realloc_dnode_size.ksh \
send_realloc_files.ksh \
send_realloc_encrypted_files.ksh \
send_spill_block.ksh \
send_holds.ksh \
send_hole_birth.ksh \
send_mixed_raw.ksh \
Expand Down
23 changes: 16 additions & 7 deletions tests/zfs-tests/tests/functional/rsend/rsend.kshlib
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/include/math.shlib
. $STF_SUITE/tests/functional/cli_root/zfs_set/zfs_set_common.kshlib
. $STF_SUITE/tests/functional/rsend/rsend.cfg

#
Expand Down Expand Up @@ -518,9 +519,13 @@ function churn_files
value=$((RANDOM % 5))
if [ $value -eq 0 -a $xattrs -ne 0 ]; then
attrname="testattr$((RANDOM % 3))"
attrlen="$(((RANDOM % 1000) + 1))"
attrvalue="$(random_string VALID_NAME_CHAR \
$attrlen)"
attr -qr $attrname $file_name || \
log_fail "Failed to remove $attrname"
attr -qs $attrname -V TestValue $file_name || \
attr -qs $attrname \
-V "$attrvalue" $file_name || \
log_fail "Failed to set $attrname"
elif [ $value -eq 1 ]; then
dd if=/dev/urandom of=$file_name \
Expand Down Expand Up @@ -548,9 +553,12 @@ function churn_files
if [ $xattrs -ne 0 ]; then
for j in {0..2}; do
attrname="testattr$j"
attr -qs $attrname -V TestValue \
$file_name || log_fail \
"Failed to set $attrname"
attrlen="$(((RANDOM % 1000) + 1))"
attrvalue="$(random_string \
VALID_NAME_CHAR $attrlen)"
attr -qs $attrname \
-V "$attrvalue" $file_name || \
log_fail "Failed to set $attrname"
done
fi
fi
Expand Down Expand Up @@ -791,10 +799,11 @@ function rand_set_prop
log_must eval "zfs set $prop='$value' $dtst"
}

# Generate a recursive checksum of a filesystems contents. Only file
# data is included in the checksum (no meta data, or xattrs).
# Generate a recursive checksum of a filesystem which includes the file
# contents and any associated xattrs.
function recursive_cksum
{
find $1 -type f -exec sha256sum {} \; | \
find $1 -type f -exec sh -c 'sha256sum {}; getfattr \
--absolute-names --only-values -d {} | sha256sum' \; | \
sort -k 2 | awk '{ print $1 }' | sha256sum
}
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,8 @@ for i in {1..5}; do

# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
#
# Disable xattrs until the following spill block issue is resolved:
# https://github.com/openzfs/openzfs/pull/705
#
log_must churn_files 1000 524288 $POOL/fs 0
expected_cksum=$(recursive_cksum /$fs)
log_must churn_files 1000 524288 $POOL/fs
expected_cksum=$(recursive_cksum /$POOL/fs)

# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ for i in {1..5}; do
# Churn the filesystem in such a way that we're likely to be both
# allocating and reallocating objects in the incremental stream.
log_must churn_files 1000 524288 $POOL/fs
expected_cksum=$(recursive_cksum /$fs)
expected_cksum=$(recursive_cksum /$POOL/fs)

# Create a snapshot and use it to send an incremental stream.
this_snap=$((last_snap + 1))
Expand Down
155 changes: 155 additions & 0 deletions tests/zfs-tests/tests/functional/rsend/send_spill_block.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/bin/ksh

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright (c) 2019 by Lawrence Livermore National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/rsend/rsend.kshlib

#
# Description:
# Verify spill blocks are correctly preserved.
#
# Strategy:
# 1) Create a set of files each containing some file data.
# 2) Add enough xattrs to the file to require a spill block.
# 3) Snapshot and send these files to a new dataset.
# 4) Modify the files and spill blocks in a variety of ways.
# 5) Send the changes using an incremental send stream.
# 6) Verify that all the xattrs (and thus the spill block) were
# preserved when receiving the incremental stream.
#

verify_runnable "both"

log_assert "Verify spill blocks are correctly preserved"

function cleanup
{
rm -f $BACKDIR/fs@*
destroy_dataset $POOL/fs "-rR"
destroy_dataset $POOL/newfs "-rR"
}

attrvalue="abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"

log_onexit cleanup

log_must zfs create $POOL/fs
log_must zfs set xattr=sa $POOL/fs
log_must zfs set dnodesize=legacy $POOL/fs
log_must zfs set recordsize=128k $POOL/fs

# Create 40 files each with a spill block containing xattrs. Each file
# will be modified in a different way to validate the incremental receive.
for i in {1..40}; do
file="/$POOL/fs/file$i"

log_must mkfile 16384 $file
for j in {1..20}; do
log_must attr -qs "testattr$j" -V "$attrvalue" $file
done
done

# Snapshot the pool and send it to the new dataset.
log_must zfs snapshot $POOL/fs@snap1
log_must eval "zfs send -e $POOL/fs@snap1 >$BACKDIR/fs@snap1"
log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs@snap1"

#
# Modify file[1-6]'s contents but not the spill blocks.
#
# file1 - Increase record size; single block
# file2 - Increase record size; multiple blocks
# file3 - Truncate file to zero size; single block
# file4 - Truncate file to smaller size; single block
# file5 - Truncate file to much larger size; add holes
# file6 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file1
log_must mkfile 1048576 /$POOL/fs/file2
log_must truncate -s 0 /$POOL/fs/file3
log_must truncate -s 8192 /$POOL/fs/file4
log_must truncate -s 1073741824 /$POOL/fs/file5
log_must truncate -s 50 /$POOL/fs/file6

#
# Modify file[11-16]'s contents and their spill blocks.
#
# file11 - Increase record size; single block
# file12 - Increase record size; multiple blocks
# file13 - Truncate file to zero size; single block
# file14 - Truncate file to smaller size; single block
# file15 - Truncate file to much larger size; add holes
# file16 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file11
log_must mkfile 1048576 /$POOL/fs/file12
log_must truncate -s 0 /$POOL/fs/file13
log_must truncate -s 8192 /$POOL/fs/file14
log_must truncate -s 1073741824 /$POOL/fs/file15
log_must truncate -s 50 /$POOL/fs/file16

for i in {11..20}; do
log_must attr -qr testattr1 /$POOL/fs/file$i
done

#
# Modify file[21-26]'s contents and remove their spill blocks.
#
# file21 - Increase record size; single block
# file22 - Increase record size; multiple blocks
# file23 - Truncate file to zero size; single block
# file24 - Truncate file to smaller size; single block
# file25 - Truncate file to much larger size; add holes
# file26 - Truncate file to embedded size; embedded data
#
log_must mkfile 32768 /$POOL/fs/file21
log_must mkfile 1048576 /$POOL/fs/file22
log_must truncate -s 0 /$POOL/fs/file23
log_must truncate -s 8192 /$POOL/fs/file24
log_must truncate -s 1073741824 /$POOL/fs/file25
log_must truncate -s 50 /$POOL/fs/file26

for i in {21..30}; do
for j in {1..20}; do
log_must attr -qr testattr$j /$POOL/fs/file$i
done
done

#
# Modify file[31-40]'s spill blocks but not the file contents.
#
for i in {31..40}; do
file="/$POOL/fs/file$i"
log_must attr -qr testattr$(((RANDOM % 20) + 1)) $file
log_must attr -qs testattr$(((RANDOM % 20) + 1)) -V "$attrvalue" $file
done

# Calculate the expected recursive checksum for the source.
expected_cksum=$(recursive_cksum /$POOL/fs)

# Snapshot the pool and send the incremental snapshot.
log_must zfs snapshot $POOL/fs@snap2
log_must eval "zfs send -e -i $POOL/fs@snap1 $POOL/fs@snap2 >$BACKDIR/fs@snap2"
log_must eval "zfs recv -F $POOL/newfs < $BACKDIR/fs@snap2"

# Validate the received copy using the received recursive checksum.
actual_cksum=$(recursive_cksum /$POOL/newfs)
if [[ "$expected_cksum" != "$actual_cksum" ]]; then
log_fail "Checksums differ ($expected_cksum != $actual_cksum)"
fi

log_pass "Verify spill blocks are correctly preserved"

0 comments on commit a436e48

Please sign in to comment.