Skip to content

Commit

Permalink
xfs: Introduce per-inode 64-bit extent counters
Browse files Browse the repository at this point in the history
This commit introduces new fields in the on-disk inode format to support
64-bit data fork extent counters and 32-bit attribute fork extent
counters. The new fields will be used only when an inode has
XFS_DIFLAG2_NREXT64 flag set. Otherwise we continue to use the regular 32-bit
data fork extent counters and 16-bit attribute fork extent counters.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Suggested-by: Dave Chinner <dchinner@redhat.com>
  • Loading branch information
Chandan Babu R authored and intel-lab-lkp committed Apr 6, 2022
1 parent 1a1af2f commit 28be4fd
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 29 deletions.
33 changes: 29 additions & 4 deletions fs/xfs/libxfs/xfs_format.h
Expand Up @@ -792,16 +792,41 @@ struct xfs_dinode {
__be32 di_nlink; /* number of links to file */
__be16 di_projid_lo; /* lower part of owner's project id */
__be16 di_projid_hi; /* higher part owner's project id */
__u8 di_pad[6]; /* unused, zeroed space */
__be16 di_flushiter; /* incremented on flush */
union {
/* Number of data fork extents if NREXT64 is set */
__be64 di_big_nextents;

/* Padding for V3 inodes without NREXT64 set. */
__be64 di_v3_pad;

/* Padding and inode flush counter for V2 inodes. */
struct {
__u8 di_v2_pad[6];
__be16 di_flushiter;
};
};
xfs_timestamp_t di_atime; /* time last accessed */
xfs_timestamp_t di_mtime; /* time last modified */
xfs_timestamp_t di_ctime; /* time created/inode modified */
__be64 di_size; /* number of bytes in file */
__be64 di_nblocks; /* # of direct & btree blocks used */
__be32 di_extsize; /* basic/minimum extent size for file */
__be32 di_nextents; /* number of extents in data fork */
__be16 di_anextents; /* number of extents in attribute fork*/
union {
/*
* For V2 inodes and V3 inodes without NREXT64 set, this
* is the number of data and attr fork extents.
*/
struct {
__be32 di_nextents;
__be16 di_anextents;
} __packed;

/* Number of attr fork extents if NREXT64 is set. */
struct {
__be32 di_big_anextents;
__be16 di_nrext64_pad;
} __packed;
} __packed;
__u8 di_forkoff; /* attr fork offs, <<3 for 64b align */
__s8 di_aformat; /* format of attr fork's data */
__be32 di_dmevmask; /* DMIG event mask */
Expand Down
49 changes: 45 additions & 4 deletions fs/xfs/libxfs/xfs_inode_buf.c
Expand Up @@ -279,6 +279,25 @@ xfs_inode_to_disk_ts(
return ts;
}

static inline void
xfs_inode_to_disk_iext_counters(
struct xfs_inode *ip,
struct xfs_dinode *to)
{
if (xfs_inode_has_large_extent_counts(ip)) {
to->di_big_nextents = cpu_to_be64(xfs_ifork_nextents(&ip->i_df));
to->di_big_anextents = cpu_to_be32(xfs_ifork_nextents(ip->i_afp));
/*
* We might be upgrading the inode to use larger extent counters
* than was previously used. Hence zero the unused field.
*/
to->di_nrext64_pad = cpu_to_be16(0);
} else {
to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
}
}

void
xfs_inode_to_disk(
struct xfs_inode *ip,
Expand All @@ -296,7 +315,6 @@ xfs_inode_to_disk(
to->di_projid_lo = cpu_to_be16(ip->i_projid & 0xffff);
to->di_projid_hi = cpu_to_be16(ip->i_projid >> 16);

memset(to->di_pad, 0, sizeof(to->di_pad));
to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
Expand All @@ -307,8 +325,6 @@ xfs_inode_to_disk(
to->di_size = cpu_to_be64(ip->i_disk_size);
to->di_nblocks = cpu_to_be64(ip->i_nblocks);
to->di_extsize = cpu_to_be32(ip->i_extsize);
to->di_nextents = cpu_to_be32(xfs_ifork_nextents(&ip->i_df));
to->di_anextents = cpu_to_be16(xfs_ifork_nextents(ip->i_afp));
to->di_forkoff = ip->i_forkoff;
to->di_aformat = xfs_ifork_format(ip->i_afp);
to->di_flags = cpu_to_be16(ip->i_diflags);
Expand All @@ -323,11 +339,14 @@ xfs_inode_to_disk(
to->di_lsn = cpu_to_be64(lsn);
memset(to->di_pad2, 0, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
to->di_flushiter = 0;
to->di_v3_pad = 0;
} else {
to->di_version = 2;
to->di_flushiter = cpu_to_be16(ip->i_flushiter);
memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
}

xfs_inode_to_disk_iext_counters(ip, to);
}

static xfs_failaddr_t
Expand Down Expand Up @@ -398,6 +417,24 @@ xfs_dinode_verify_forkoff(
return NULL;
}

static xfs_failaddr_t
xfs_dinode_verify_nrext64(
struct xfs_mount *mp,
struct xfs_dinode *dip)
{
if (xfs_dinode_has_large_extent_counts(dip)) {
if (!xfs_has_large_extent_counts(mp))
return __this_address;
if (dip->di_nrext64_pad != 0)
return __this_address;
} else if (dip->di_version >= 3) {
if (dip->di_v3_pad != 0)
return __this_address;
}

return NULL;
}

xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
Expand Down Expand Up @@ -442,6 +479,10 @@ xfs_dinode_verify(
if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
return __this_address;

fa = xfs_dinode_verify_nrext64(mp, dip);
if (fa)
return fa;

nextents = xfs_dfork_data_extents(dip);
naextents = xfs_dfork_attr_extents(dip);
nblocks = be64_to_cpu(dip->di_nblocks);
Expand Down
6 changes: 6 additions & 0 deletions fs/xfs/libxfs/xfs_inode_fork.h
Expand Up @@ -158,13 +158,19 @@ static inline xfs_extnum_t
xfs_dfork_data_extents(
struct xfs_dinode *dip)
{
if (xfs_dinode_has_large_extent_counts(dip))
return be64_to_cpu(dip->di_big_nextents);

return be32_to_cpu(dip->di_nextents);
}

static inline xfs_extnum_t
xfs_dfork_attr_extents(
struct xfs_dinode *dip)
{
if (xfs_dinode_has_large_extent_counts(dip))
return be32_to_cpu(dip->di_big_anextents);

return be16_to_cpu(dip->di_anextents);
}

Expand Down
33 changes: 29 additions & 4 deletions fs/xfs/libxfs/xfs_log_format.h
Expand Up @@ -388,16 +388,41 @@ struct xfs_log_dinode {
uint32_t di_nlink; /* number of links to file */
uint16_t di_projid_lo; /* lower part of owner's project id */
uint16_t di_projid_hi; /* higher part of owner's project id */
uint8_t di_pad[6]; /* unused, zeroed space */
uint16_t di_flushiter; /* incremented on flush */
union {
/* Number of data fork extents if NREXT64 is set */
uint64_t di_big_nextents;

/* Padding for V3 inodes without NREXT64 set. */
uint64_t di_v3_pad;

/* Padding and inode flush counter for V2 inodes. */
struct {
uint8_t di_v2_pad[6]; /* V2 inode zeroed space */
uint16_t di_flushiter; /* V2 inode incremented on flush */
};
};
xfs_log_timestamp_t di_atime; /* time last accessed */
xfs_log_timestamp_t di_mtime; /* time last modified */
xfs_log_timestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
uint32_t di_nextents; /* number of extents in data fork */
uint16_t di_anextents; /* number of extents in attribute fork*/
union {
/*
* For V2 inodes and V3 inodes without NREXT64 set, this
* is the number of data and attr fork extents.
*/
struct {
uint32_t di_nextents;
uint16_t di_anextents;
} __packed;

/* Number of attr fork extents if NREXT64 is set. */
struct {
uint32_t di_big_anextents;
uint16_t di_nrext64_pad;
} __packed;
} __packed;
uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
int8_t di_aformat; /* format of attr fork's data */
uint32_t di_dmevmask; /* DMIG event mask */
Expand Down
23 changes: 19 additions & 4 deletions fs/xfs/xfs_inode_item.c
Expand Up @@ -359,6 +359,21 @@ xfs_copy_dm_fields_to_log_dinode(
}
}

static inline void
xfs_inode_to_log_dinode_iext_counters(
struct xfs_inode *ip,
struct xfs_log_dinode *to)
{
if (xfs_inode_has_large_extent_counts(ip)) {
to->di_big_nextents = xfs_ifork_nextents(&ip->i_df);
to->di_big_anextents = xfs_ifork_nextents(ip->i_afp);
to->di_nrext64_pad = 0;
} else {
to->di_nextents = xfs_ifork_nextents(&ip->i_df);
to->di_anextents = xfs_ifork_nextents(ip->i_afp);
}
}

static void
xfs_inode_to_log_dinode(
struct xfs_inode *ip,
Expand All @@ -374,7 +389,6 @@ xfs_inode_to_log_dinode(
to->di_projid_lo = ip->i_projid & 0xffff;
to->di_projid_hi = ip->i_projid >> 16;

memset(to->di_pad, 0, sizeof(to->di_pad));
memset(to->di_pad3, 0, sizeof(to->di_pad3));
to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
Expand All @@ -386,8 +400,6 @@ xfs_inode_to_log_dinode(
to->di_size = ip->i_disk_size;
to->di_nblocks = ip->i_nblocks;
to->di_extsize = ip->i_extsize;
to->di_nextents = xfs_ifork_nextents(&ip->i_df);
to->di_anextents = xfs_ifork_nextents(ip->i_afp);
to->di_forkoff = ip->i_forkoff;
to->di_aformat = xfs_ifork_format(ip->i_afp);
to->di_flags = ip->i_diflags;
Expand All @@ -407,11 +419,14 @@ xfs_inode_to_log_dinode(
to->di_lsn = lsn;
memset(to->di_pad2, 0, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
to->di_flushiter = 0;
to->di_v3_pad = 0;
} else {
to->di_version = 2;
to->di_flushiter = ip->i_flushiter;
memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
}

xfs_inode_to_log_dinode_iext_counters(ip, to);
}

/*
Expand Down
88 changes: 75 additions & 13 deletions fs/xfs/xfs_inode_item_recover.c
Expand Up @@ -149,6 +149,22 @@ static inline bool xfs_log_dinode_has_large_extent_counts(
(ld->di_flags2 & XFS_DIFLAG2_NREXT64);
}

static inline void
xfs_log_dinode_to_disk_iext_counters(
struct xfs_log_dinode *from,
struct xfs_dinode *to)
{
if (xfs_log_dinode_has_large_extent_counts(from)) {
to->di_big_nextents = cpu_to_be64(from->di_big_nextents);
to->di_big_anextents = cpu_to_be32(from->di_big_anextents);
to->di_nrext64_pad = cpu_to_be16(from->di_nrext64_pad);
} else {
to->di_nextents = cpu_to_be32(from->di_nextents);
to->di_anextents = cpu_to_be16(from->di_anextents);
}

}

STATIC void
xfs_log_dinode_to_disk(
struct xfs_log_dinode *from,
Expand All @@ -165,7 +181,6 @@ xfs_log_dinode_to_disk(
to->di_nlink = cpu_to_be32(from->di_nlink);
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));

to->di_atime = xfs_log_dinode_to_disk_ts(from, from->di_atime);
to->di_mtime = xfs_log_dinode_to_disk_ts(from, from->di_mtime);
Expand All @@ -174,8 +189,6 @@ xfs_log_dinode_to_disk(
to->di_size = cpu_to_be64(from->di_size);
to->di_nblocks = cpu_to_be64(from->di_nblocks);
to->di_extsize = cpu_to_be32(from->di_extsize);
to->di_nextents = cpu_to_be32(from->di_nextents);
to->di_anextents = cpu_to_be16(from->di_anextents);
to->di_forkoff = from->di_forkoff;
to->di_aformat = from->di_aformat;
to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
Expand All @@ -193,10 +206,64 @@ xfs_log_dinode_to_disk(
to->di_lsn = cpu_to_be64(lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
to->di_v3_pad = from->di_v3_pad;
} else {
to->di_flushiter = cpu_to_be16(from->di_flushiter);
memcpy(to->di_v2_pad, from->di_v2_pad, sizeof(to->di_v2_pad));
}

xfs_log_dinode_to_disk_iext_counters(from, to);
}

STATIC int
xlog_dinode_verify_extent_counts(
struct xfs_mount *mp,
struct xfs_log_dinode *ldip)
{
xfs_extnum_t nextents;
xfs_aextnum_t anextents;

if (xfs_log_dinode_has_large_extent_counts(ldip)) {
if (!xfs_has_large_extent_counts(mp) ||
(ldip->di_nrext64_pad != 0)) {
XFS_CORRUPTION_ERROR(
"Bad log dinode large extent count format",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
xfs_alert(mp,
"Bad inode 0x%llx, large extent counts %d, padding 0x%x",
ldip->di_ino, xfs_has_large_extent_counts(mp),
ldip->di_nrext64_pad);
return -EFSCORRUPTED;
}

nextents = ldip->di_big_nextents;
anextents = ldip->di_big_anextents;
} else {
if (ldip->di_version == 3 && ldip->di_v3_pad != 0) {
XFS_CORRUPTION_ERROR(
"Bad log dinode di_v3_pad",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
xfs_alert(mp,
"Bad inode 0x%llx, di_v3_pad 0x%llx",
ldip->di_ino, ldip->di_v3_pad);
return -EFSCORRUPTED;
}

nextents = ldip->di_nextents;
anextents = ldip->di_anextents;
}

if (unlikely(nextents + anextents > ldip->di_nblocks)) {
XFS_CORRUPTION_ERROR("Bad log dinode extent counts",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
xfs_alert(mp,
"Bad inode 0x%llx, large extent counts %d, nextents 0x%llx, anextents 0x%x, nblocks 0x%llx",
ldip->di_ino, xfs_has_large_extent_counts(mp), nextents,
anextents, ldip->di_nblocks);
return -EFSCORRUPTED;
}

return 0;
}

STATIC int
Expand Down Expand Up @@ -347,16 +414,11 @@ xlog_recover_inode_commit_pass2(
goto out_release;
}
}
if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
XFS_CORRUPTION_ERROR("Bad log dinode extent counts",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
xfs_alert(mp,
"Bad inode 0x%llx, nextents 0x%x, anextents 0x%x, nblocks 0x%llx",
in_f->ilf_ino, ldip->di_nextents, ldip->di_anextents,
ldip->di_nblocks);
error = -EFSCORRUPTED;

error = xlog_dinode_verify_extent_counts(mp, ldip);
if (error)
goto out_release;
}

if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
XFS_CORRUPTION_ERROR("Bad log dinode fork offset",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
Expand Down

0 comments on commit 28be4fd

Please sign in to comment.