Skip to content

Commit

Permalink
Improve zpool status output, list all affected datasets
Browse files Browse the repository at this point in the history
Currently, determining which datasets are affected by corruption is
a manual process.

The primary difficulty in reporting the list of affected snapshots is
that since the error was initially found, the snapshot where the error
originally occurred in, may have been deleted. To solve this issue, we
add the ID of the head dataset of the original snapshot which the error
was detected in, to the stored error report. Then any time a filesystem
is deleted, the errors associated with it are deleted as well. Any time
a clone promote occurs, we modify reports associated with the original
head to refer to the new head. The stored error reports are identified
by this head ID, the birth time of the block which the error occurred
in, as well as some information about the error itself are also stored.

Once this information is stored, we can find the set of datasets
affected by an error by walking back the list of snapshots in the given
head until we find one with the appropriate birth txg, and then traverse
through the snapshots of the clone family, terminating a branch if the
block was replaced in a given snapshot. Then we report this information
back to libzfs, and to the zpool status command, where it is displayed
as follows:

 pool: test
 state: ONLINE
status: One or more devices has experienced an error resulting in data
        corruption.  Applications may be affected.
action: Restore the file in question if possible.  Otherwise restore the
        entire pool from backup.
   see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-8A
  scan: scrub repaired 0B in 00:00:00 with 800 errors on Fri Dec  3
08:27:57 2021
config:

        NAME        STATE     READ WRITE CKSUM
        test        ONLINE       0     0     0
          sdb       ONLINE       0     0 1.58K

errors: Permanent errors have been detected in the following files:

        test@1:/test.0.0
        /test/test.0.0
        /test/1clone/test.0.0

A new feature flag is introduced to mark the presence of this change, as
well as promotion and backwards compatibility logic. This is an updated
version of openzfs#9175. Rebase required fixing the tests, updating the ABI of
libzfs, updating the man pages, fixing bugs, fixing the error returns,
and updating the old on-disk error logs to the new format when
activating the feature.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mark.maybee@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Co-authored-by: TulsiJain <tulsi.jain@delphix.com>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes openzfs#9175
Closes openzfs#12812
  • Loading branch information
gamanakis authored and andrewc12 committed Sep 23, 2022
1 parent 7752428 commit aba0908
Show file tree
Hide file tree
Showing 21 changed files with 1,219 additions and 194 deletions.
3 changes: 3 additions & 0 deletions include/sys/dsl_dataset.h
Expand Up @@ -487,6 +487,9 @@ boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds,
void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
uint64_t num_redact_snaps, dmu_tx_t *tx);

int dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
uint64_t *oldest_dsobj);

#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
Expand Down
8 changes: 7 additions & 1 deletion include/sys/spa.h
Expand Up @@ -1144,11 +1144,17 @@ extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
extern int spa_get_errlog(spa_t *spa, void *uaddr, uint64_t *count);
extern void spa_errlog_rotate(spa_t *spa);
extern void spa_errlog_drain(spa_t *spa);
extern void spa_errlog_sync(spa_t *spa, uint64_t txg);
extern void spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub);
extern void spa_delete_dataset_errlog(spa_t *spa, uint64_t ds, dmu_tx_t *tx);
extern void spa_swap_errlog(spa_t *spa, uint64_t new_head_ds,
uint64_t old_head_ds, dmu_tx_t *tx);
extern void sync_error_list(spa_t *spa, avl_tree_t *t, uint64_t *obj,
dmu_tx_t *tx);
extern void spa_upgrade_errlog(spa_t *spa, dmu_tx_t *tx);

/* vdev cache */
extern void vdev_cache_stat_init(void);
Expand Down
14 changes: 14 additions & 0 deletions include/sys/zio.h
Expand Up @@ -283,6 +283,13 @@ extern const char *const zio_type_name[ZIO_TYPES];
* Note: this structure is passed between userland and the kernel, and is
* stored on disk (by virtue of being incorporated into other on-disk
* structures, e.g. dsl_scan_phys_t).
*
* If the head_errlog feature is enabled a different on-disk format for error
* logs is used. This introduces the use of an error bookmark, a four-tuple
* <object, level, blkid, birth> that uniquely identifies any error block
* in the pool. The birth transaction group is used to track whether the block
* has been overwritten by newer data or added to a snapshot since its marking
* as an error.
*/
struct zbookmark_phys {
uint64_t zb_objset;
Expand All @@ -291,6 +298,13 @@ struct zbookmark_phys {
uint64_t zb_blkid;
};

typedef struct zbookmark_err_phys {
uint64_t zb_object;
int64_t zb_level;
uint64_t zb_blkid;
uint64_t zb_birth;
} zbookmark_err_phys_t;

#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
(zb)->zb_objset = objset; \
Expand Down
1 change: 1 addition & 0 deletions include/zfeature_common.h
Expand Up @@ -76,6 +76,7 @@ typedef enum spa_feature {
SPA_FEATURE_ZSTD_COMPRESS,
SPA_FEATURE_DRAID,
SPA_FEATURE_ZILSAXATTR,
SPA_FEATURE_HEAD_ERRLOG,
SPA_FEATURES
} spa_feature_t;

Expand Down
2 changes: 1 addition & 1 deletion lib/libnvpair/libnvpair.abi
Expand Up @@ -2794,7 +2794,7 @@
</abi-instr>
<abi-instr address-size='64' path='assert.c' language='LANG_C99'>
<function-decl name='libspl_set_assert_ok' mangled-name='libspl_set_assert_ok' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libspl_set_assert_ok'>
<parameter type-id='f58c8277' name='val'/>
<parameter type-id='c19b74c3' name='val'/>
<return type-id='48b5725f'/>
</function-decl>
<function-decl name='libspl_assertf' mangled-name='libspl_assertf' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='libspl_assertf'>
Expand Down
193 changes: 94 additions & 99 deletions lib/libuutil/libuutil.abi

Large diffs are not rendered by default.

11 changes: 6 additions & 5 deletions lib/libzfs/libzfs.abi
Expand Up @@ -596,7 +596,7 @@
<elf-symbol name='fletcher_4_superscalar4_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='1960' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='spa_feature_table' size='2016' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand Down Expand Up @@ -1855,8 +1855,8 @@
</function-decl>
</abi-instr>
<abi-instr address-size='64' path='../../module/zcommon/zfeature_common.c' language='LANG_C99'>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='15680' id='9d60dcc5'>
<subrange length='35' type-id='7359adad' id='6e6845b5'/>
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16128' id='9d5e9e2e'>
<subrange length='36' type-id='7359adad' id='ae666bde'/>
</array-type-def>
<enum-decl name='spa_feature' id='33ecb627'>
<underlying-type type-id='9cac1fee'/>
Expand Down Expand Up @@ -1896,7 +1896,8 @@
<enumerator name='SPA_FEATURE_ZSTD_COMPRESS' value='32'/>
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
<enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
<enumerator name='SPA_FEATURES' value='35'/>
<enumerator name='SPA_FEATURE_HEAD_ERRLOG' value='35'/>
<enumerator name='SPA_FEATURES' value='36'/>
</enum-decl>
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
<enum-decl name='zfeature_flags' id='6db816a4'>
Expand Down Expand Up @@ -1954,7 +1955,7 @@
<qualified-type-def type-id='3eee3342' const='yes' id='0c1d5bbb'/>
<pointer-type-def type-id='0c1d5bbb' size-in-bits='64' id='a3372543'/>
<pointer-type-def type-id='d6618c78' size-in-bits='64' id='a8425263'/>
<var-decl name='spa_feature_table' type-id='9d60dcc5' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
<var-decl name='spa_feature_table' type-id='9d5e9e2e' mangled-name='spa_feature_table' visibility='default' elf-symbol-id='spa_feature_table'/>
<var-decl name='zfeature_checks_disable' type-id='c19b74c3' mangled-name='zfeature_checks_disable' visibility='default' elf-symbol-id='zfeature_checks_disable'/>
<function-decl name='zfeature_is_valid_guid' mangled-name='zfeature_is_valid_guid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfeature_is_valid_guid'>
<parameter type-id='80f4b756' name='name'/>
Expand Down
37 changes: 16 additions & 21 deletions lib/libzfsbootenv/libzfsbootenv.abi
Expand Up @@ -5,8 +5,6 @@
<dependency name='libc.so.6'/>
</elf-needed>
<elf-function-symbols>
<elf-symbol name='_fini' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='_init' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzbe_add_pair' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzbe_bootenv_print' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
<elf-symbol name='lzbe_get_boot_device' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
Expand All @@ -20,6 +18,7 @@
<type-decl name='char' size-in-bits='8' id='a84c031d'/>
<type-decl name='int' size-in-bits='32' id='95e97e5e'/>
<type-decl name='unnamed-enum-underlying-type-32' is-anonymous='yes' size-in-bits='32' alignment-in-bits='32' id='9cac1fee'/>
<type-decl name='void' id='48b5725f'/>
<enum-decl name='lzbe_flags' id='2b77720b'>
<underlying-type type-id='9cac1fee'/>
<enumerator name='lzbe_add' value='0'/>
Expand Down Expand Up @@ -84,21 +83,16 @@
<array-type-def dimensions='1' type-id='a84c031d' size-in-bits='160' id='664ac0b7'>
<subrange length='20' type-id='7359adad' id='fdca39cf'/>
</array-type-def>
<class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
<class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
<class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
<type-decl name='long int' size-in-bits='64' id='bd54fe1a'/>
<type-decl name='signed char' size-in-bits='8' id='28577a57'/>
<type-decl name='unsigned short int' size-in-bits='16' id='8efea9e5'/>
<typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
<typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
<typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
<typedef-decl name='_IO_lock_t' type-id='48b5725f' id='bb4788fa'/>
<class-decl name='_IO_marker' size-in-bits='192' is-struct='yes' visibility='default' id='010ae0b9'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='_next' type-id='e4c6fa61' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='64'>
<var-decl name='_sbuf' type-id='dca988a5' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='128'>
<var-decl name='_pos' type-id='95e97e5e' visibility='default'/>
</data-member>
</class-decl>
<class-decl name='_IO_FILE' size-in-bits='1728' is-struct='yes' visibility='default' id='ec1ed955'>
<data-member access='public' layout-offset-in-bits='0'>
<var-decl name='_flags' type-id='95e97e5e' visibility='default'/>
Expand Down Expand Up @@ -167,16 +161,16 @@
<var-decl name='_offset' type-id='724e4de6' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1216'>
<var-decl name='__pad1' type-id='eaa32e2f' visibility='default'/>
<var-decl name='_codecvt' type-id='570f8c59' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1280'>
<var-decl name='__pad2' type-id='eaa32e2f' visibility='default'/>
<var-decl name='_wide_data' type-id='c65a1f29' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1344'>
<var-decl name='__pad3' type-id='eaa32e2f' visibility='default'/>
<var-decl name='_freeres_list' type-id='dca988a5' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1408'>
<var-decl name='__pad4' type-id='eaa32e2f' visibility='default'/>
<var-decl name='_freeres_buf' type-id='eaa32e2f' visibility='default'/>
</data-member>
<data-member access='public' layout-offset-in-bits='1472'>
<var-decl name='__pad5' type-id='b59d7dce' visibility='default'/>
Expand All @@ -188,19 +182,20 @@
<var-decl name='_unused2' type-id='664ac0b7' visibility='default'/>
</data-member>
</class-decl>
<typedef-decl name='__off_t' type-id='bd54fe1a' id='79989e9c'/>
<typedef-decl name='__off64_t' type-id='bd54fe1a' id='724e4de6'/>
<typedef-decl name='FILE' type-id='ec1ed955' id='aa12d1ba'/>
<pointer-type-def type-id='aa12d1ba' size-in-bits='64' id='822cd80b'/>
<pointer-type-def type-id='ec1ed955' size-in-bits='64' id='dca988a5'/>
<pointer-type-def type-id='a4036571' size-in-bits='64' id='570f8c59'/>
<pointer-type-def type-id='bb4788fa' size-in-bits='64' id='cecf4ea7'/>
<pointer-type-def type-id='010ae0b9' size-in-bits='64' id='e4c6fa61'/>
<pointer-type-def type-id='79bd3751' size-in-bits='64' id='c65a1f29'/>
<class-decl name='_IO_codecvt' is-struct='yes' visibility='default' is-declaration-only='yes' id='a4036571'/>
<class-decl name='_IO_marker' is-struct='yes' visibility='default' is-declaration-only='yes' id='010ae0b9'/>
<class-decl name='_IO_wide_data' is-struct='yes' visibility='default' is-declaration-only='yes' id='79bd3751'/>
<function-decl name='lzbe_bootenv_print' mangled-name='lzbe_bootenv_print' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='lzbe_bootenv_print'>
<parameter type-id='80f4b756' name='pool'/>
<parameter type-id='80f4b756' name='nvlist'/>
<parameter type-id='822cd80b' name='of'/>
<return type-id='95e97e5e'/>
</function-decl>
<type-decl name='void' id='48b5725f'/>
</abi-instr>
</abi-corpus>
7 changes: 7 additions & 0 deletions man/man4/zfs.4
Expand Up @@ -454,6 +454,13 @@ If we have less than this amount of free space,
most ZPL operations (e.g. write, create) will return
.Sy ENOSPC .
.
.It Sy spa_upgrade_errlog_limit Ns = Ns Sy 0 Pq uint
Limits the number of on-disk error log entries that will be converted to the
new format when enabling the
.Sy head_errlog
feature.
The default is to convert all log entries.
.
.It Sy vdev_removal_max_span Ns = Ns Sy 32768 Ns B Po 32kB Pc Pq int
During top-level vdev removal, chunks of data are copied from the vdev
which may include free space in order to trade bandwidth for IOPS.
Expand Down
11 changes: 11 additions & 0 deletions man/man7/zpool-features.7
Expand Up @@ -507,6 +507,17 @@ once either of the limit properties has been set on a dataset
and will never return to being
.Sy enabled .
.
.feature com.delphix head_errlog no
This feature enables the upgraded version of errlog, which required an on-disk
error log format change.
Now the error log of each head dataset is stored separately in the zap object
and keyed by the head id.
With this feature enabled, every dataset affected by an error block is listed
in the output of
.Nm zpool Cm status .
.Pp
\*[instant-never]
.
.feature com.delphix hole_birth no enabled_txg
This feature has/had bugs, the result of which is that, if you do a
.Nm zfs Cm send Fl i
Expand Down
7 changes: 7 additions & 0 deletions module/zcommon/zfeature_common.c
Expand Up @@ -696,6 +696,7 @@ zpool_feature_init(void)
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);

{

static const spa_feature_t zilsaxattr_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
Expand All @@ -707,6 +708,12 @@ zpool_feature_init(void)
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
}

zfeature_register(SPA_FEATURE_HEAD_ERRLOG,
"com.delphix:head_errlog", "head_errlog",
"Support for per-dataset on-disk error logs.",
ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, ZFEATURE_TYPE_BOOLEAN, NULL,
sfeatures);

zfs_mod_list_supported_free(sfeatures);
}

Expand Down
40 changes: 40 additions & 0 deletions module/zfs/dsl_dataset.c
Expand Up @@ -3708,6 +3708,15 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)

dsl_dir_rele(odd, FTAG);
promote_rele(ddpa, FTAG);

/*
* Transfer common error blocks from old head to new head.
*/
if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG)) {
uint64_t old_head = origin_head->ds_object;
uint64_t new_head = hds->ds_object;
spa_swap_errlog(dp->dp_spa, new_head, old_head, tx);
}
}

/*
Expand Down Expand Up @@ -4924,6 +4933,37 @@ dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;
}

/*
* Find and return (in *oldest_dsobj) the oldest snapshot of the dsobj
* dataset whose birth time is >= min_txg.
*/
int
dsl_dataset_oldest_snapshot(spa_t *spa, uint64_t head_ds, uint64_t min_txg,
uint64_t *oldest_dsobj)
{
dsl_dataset_t *ds;
dsl_pool_t *dp = spa->spa_dsl_pool;

int error = dsl_dataset_hold_obj(dp, head_ds, FTAG, &ds);
if (error != 0)
return (error);

uint64_t prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
uint64_t prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;

while (prev_obj != 0 && min_txg < prev_obj_txg) {
dsl_dataset_rele(ds, FTAG);
if ((error = dsl_dataset_hold_obj(dp, prev_obj,
FTAG, &ds)) != 0)
return (error);
prev_obj_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg;
prev_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
}
*oldest_dsobj = ds->ds_object;
dsl_dataset_rele(ds, FTAG);
return (0);
}

#if defined(_LP64)
#define RECORDSIZE_PERM ZMOD_RW
#else
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/dsl_destroy.c
Expand Up @@ -1153,6 +1153,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
dsl_dataset_rele(prev, FTAG);
}
/* Delete errlog. */
if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG))
spa_delete_dataset_errlog(dp->dp_spa, ds->ds_object, tx);
}

void
Expand Down

0 comments on commit aba0908

Please sign in to comment.