Skip to content

Commit 8016e29

Browse files
harshadjstytso
authored andcommitted
ext4: fast commit recovery path
This patch adds fast commit recovery path support for Ext4 file system. We add several helper functions that are similar in spirit to e2fsprogs journal recovery path handlers. Example of such functions include - a simple block allocator, idempotent block bitmap update function etc. Using these routines and the fast commit log in the fast commit area, the recovery path (ext4_fc_replay()) performs fast commit log recovery. Reported-by: kernel test robot <lkp@intel.com> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com> Link: https://lore.kernel.org/r/20201015203802.3597742-8-harshadshirwadkar@gmail.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent 5b849b5 commit 8016e29

File tree

14 files changed

+1821
-131
lines changed

14 files changed

+1821
-131
lines changed

fs/ext4/balloc.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
368368
struct buffer_head *bh)
369369
{
370370
ext4_fsblk_t blk;
371-
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
371+
struct ext4_group_info *grp;
372+
373+
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
374+
return 0;
375+
376+
grp = ext4_get_group_info(sb, block_group);
372377

373378
if (buffer_verified(bh))
374379
return 0;

fs/ext4/ext4.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,7 @@ struct ext4_inode_info {
11701170
#define EXT4_FC_COMMITTING 0x0010 /* File system underoing a fast
11711171
* commit.
11721172
*/
1173+
#define EXT4_FC_REPLAY 0x0020 /* Fast commit replay ongoing */
11731174

11741175
/*
11751176
* Misc. filesystem flags
@@ -1666,6 +1667,10 @@ struct ext4_sb_info {
16661667
struct buffer_head *s_fc_bh;
16671668
struct ext4_fc_stats s_fc_stats;
16681669
u64 s_fc_avg_commit_time;
1670+
#ifdef CONFIG_EXT4_DEBUG
1671+
int s_fc_debug_max_replay;
1672+
#endif
1673+
struct ext4_fc_replay_state s_fc_replay_state;
16691674
};
16701675

16711676
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2708,6 +2713,7 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
27082713
struct dx_hash_info *hinfo);
27092714

27102715
/* ialloc.c */
2716+
extern int ext4_mark_inode_used(struct super_block *sb, int ino);
27112717
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
27122718
const struct qstr *qstr, __u32 goal,
27132719
uid_t *owner, __u32 i_flags,
@@ -2749,6 +2755,8 @@ void ext4_fc_stop_ineligible(struct super_block *sb);
27492755
void ext4_fc_start_update(struct inode *inode);
27502756
void ext4_fc_stop_update(struct inode *inode);
27512757
void ext4_fc_del(struct inode *inode);
2758+
bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block);
2759+
void ext4_fc_replay_cleanup(struct super_block *sb);
27522760
int ext4_fc_commit(journal_t *journal, tid_t commit_tid);
27532761
int __init ext4_fc_init_dentry_cache(void);
27542762

@@ -2781,8 +2789,12 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
27812789
ext4_fsblk_t block, unsigned long count);
27822790
extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
27832791
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
2792+
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
2793+
int len, int state);
27842794

27852795
/* inode.c */
2796+
void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
2797+
struct ext4_inode_info *ei);
27862798
int ext4_inode_is_fast_symlink(struct inode *inode);
27872799
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
27882800
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
@@ -2829,6 +2841,8 @@ extern int ext4_sync_inode(handle_t *, struct inode *);
28292841
extern void ext4_dirty_inode(struct inode *, int);
28302842
extern int ext4_change_inode_journal_flag(struct inode *, int);
28312843
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
2844+
extern int ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino,
2845+
struct ext4_iloc *iloc);
28322846
extern int ext4_inode_attach_jinode(struct inode *inode);
28332847
extern int ext4_can_truncate(struct inode *inode);
28342848
extern int ext4_truncate(struct inode *);
@@ -2862,12 +2876,15 @@ extern int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
28622876
/* ioctl.c */
28632877
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
28642878
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
2879+
extern void ext4_reset_inode_seed(struct inode *inode);
28652880

28662881
/* migrate.c */
28672882
extern int ext4_ext_migrate(struct inode *);
28682883
extern int ext4_ind_migrate(struct inode *inode);
28692884

28702885
/* namei.c */
2886+
extern int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2887+
struct inode *inode);
28712888
extern int ext4_dirblock_csum_verify(struct inode *inode,
28722889
struct buffer_head *bh);
28732890
extern int ext4_orphan_add(handle_t *, struct inode *);
@@ -3447,6 +3464,10 @@ extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,
34473464
extern int ext4_ci_compare(const struct inode *parent,
34483465
const struct qstr *fname,
34493466
const struct qstr *entry, bool quick);
3467+
extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
3468+
struct inode *inode);
3469+
extern int __ext4_link(struct inode *dir, struct inode *inode,
3470+
struct dentry *dentry);
34503471

34513472
#define S_SHIFT 12
34523473
static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
@@ -3547,6 +3568,11 @@ extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
35473568
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
35483569
int check_cred, int restart_cred,
35493570
int revoke_cred);
3571+
extern void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end);
3572+
extern int ext4_ext_replay_set_iblocks(struct inode *inode);
3573+
extern int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
3574+
int len, int unwritten, ext4_fsblk_t pblk);
3575+
extern int ext4_ext_clear_bb(struct inode *inode);
35503576

35513577

35523578
/* move_extent.c */

fs/ext4/ext4_jbd2.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
100100
return ERR_PTR(err);
101101

102102
journal = EXT4_SB(sb)->s_journal;
103-
if (!journal)
103+
if (!journal || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
104104
return ext4_get_nojournal();
105105
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
106106
GFP_NOFS, type, line);

fs/ext4/extents.c

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5804,3 +5804,264 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
58045804

58055805
return err ? err : mapped;
58065806
}
5807+
5808+
/*
5809+
* Updates physical block address and unwritten status of extent
5810+
* starting at lblk start and of len. If such an extent doesn't exist,
5811+
* this function splits the extent tree appropriately to create an
5812+
* extent like this. This function is called in the fast commit
5813+
* replay path. Returns 0 on success and error on failure.
5814+
*/
5815+
int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5816+
int len, int unwritten, ext4_fsblk_t pblk)
5817+
{
5818+
struct ext4_ext_path *path = NULL, *ppath;
5819+
struct ext4_extent *ex;
5820+
int ret;
5821+
5822+
path = ext4_find_extent(inode, start, NULL, 0);
5823+
if (!path)
5824+
return -EINVAL;
5825+
ex = path[path->p_depth].p_ext;
5826+
if (!ex) {
5827+
ret = -EFSCORRUPTED;
5828+
goto out;
5829+
}
5830+
5831+
if (le32_to_cpu(ex->ee_block) != start ||
5832+
ext4_ext_get_actual_len(ex) != len) {
5833+
/* We need to split this extent to match our extent first */
5834+
ppath = path;
5835+
down_write(&EXT4_I(inode)->i_data_sem);
5836+
ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5837+
up_write(&EXT4_I(inode)->i_data_sem);
5838+
if (ret)
5839+
goto out;
5840+
kfree(path);
5841+
path = ext4_find_extent(inode, start, NULL, 0);
5842+
if (IS_ERR(path))
5843+
return -1;
5844+
ppath = path;
5845+
ex = path[path->p_depth].p_ext;
5846+
WARN_ON(le32_to_cpu(ex->ee_block) != start);
5847+
if (ext4_ext_get_actual_len(ex) != len) {
5848+
down_write(&EXT4_I(inode)->i_data_sem);
5849+
ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5850+
start + len, 1);
5851+
up_write(&EXT4_I(inode)->i_data_sem);
5852+
if (ret)
5853+
goto out;
5854+
kfree(path);
5855+
path = ext4_find_extent(inode, start, NULL, 0);
5856+
if (IS_ERR(path))
5857+
return -EINVAL;
5858+
ex = path[path->p_depth].p_ext;
5859+
}
5860+
}
5861+
if (unwritten)
5862+
ext4_ext_mark_unwritten(ex);
5863+
else
5864+
ext4_ext_mark_initialized(ex);
5865+
ext4_ext_store_pblock(ex, pblk);
5866+
down_write(&EXT4_I(inode)->i_data_sem);
5867+
ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5868+
up_write(&EXT4_I(inode)->i_data_sem);
5869+
out:
5870+
ext4_ext_drop_refs(path);
5871+
kfree(path);
5872+
ext4_mark_inode_dirty(NULL, inode);
5873+
return ret;
5874+
}
5875+
5876+
/* Try to shrink the extent tree */
5877+
void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5878+
{
5879+
struct ext4_ext_path *path = NULL;
5880+
struct ext4_extent *ex;
5881+
ext4_lblk_t old_cur, cur = 0;
5882+
5883+
while (cur < end) {
5884+
path = ext4_find_extent(inode, cur, NULL, 0);
5885+
if (IS_ERR(path))
5886+
return;
5887+
ex = path[path->p_depth].p_ext;
5888+
if (!ex) {
5889+
ext4_ext_drop_refs(path);
5890+
kfree(path);
5891+
ext4_mark_inode_dirty(NULL, inode);
5892+
return;
5893+
}
5894+
old_cur = cur;
5895+
cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5896+
if (cur <= old_cur)
5897+
cur = old_cur + 1;
5898+
ext4_ext_try_to_merge(NULL, inode, path, ex);
5899+
down_write(&EXT4_I(inode)->i_data_sem);
5900+
ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5901+
up_write(&EXT4_I(inode)->i_data_sem);
5902+
ext4_mark_inode_dirty(NULL, inode);
5903+
ext4_ext_drop_refs(path);
5904+
kfree(path);
5905+
}
5906+
}
5907+
5908+
/* Check if *cur is a hole and if it is, skip it */
5909+
static void skip_hole(struct inode *inode, ext4_lblk_t *cur)
5910+
{
5911+
int ret;
5912+
struct ext4_map_blocks map;
5913+
5914+
map.m_lblk = *cur;
5915+
map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5916+
5917+
ret = ext4_map_blocks(NULL, inode, &map, 0);
5918+
if (ret != 0)
5919+
return;
5920+
*cur = *cur + map.m_len;
5921+
}
5922+
5923+
/* Count number of blocks used by this inode and update i_blocks */
5924+
int ext4_ext_replay_set_iblocks(struct inode *inode)
5925+
{
5926+
struct ext4_ext_path *path = NULL, *path2 = NULL;
5927+
struct ext4_extent *ex;
5928+
ext4_lblk_t cur = 0, end;
5929+
int numblks = 0, i, ret = 0;
5930+
ext4_fsblk_t cmp1, cmp2;
5931+
struct ext4_map_blocks map;
5932+
5933+
/* Determin the size of the file first */
5934+
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5935+
EXT4_EX_NOCACHE);
5936+
if (IS_ERR(path))
5937+
return PTR_ERR(path);
5938+
ex = path[path->p_depth].p_ext;
5939+
if (!ex) {
5940+
ext4_ext_drop_refs(path);
5941+
kfree(path);
5942+
goto out;
5943+
}
5944+
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5945+
ext4_ext_drop_refs(path);
5946+
kfree(path);
5947+
5948+
/* Count the number of data blocks */
5949+
cur = 0;
5950+
while (cur < end) {
5951+
map.m_lblk = cur;
5952+
map.m_len = end - cur;
5953+
ret = ext4_map_blocks(NULL, inode, &map, 0);
5954+
if (ret < 0)
5955+
break;
5956+
if (ret > 0)
5957+
numblks += ret;
5958+
cur = cur + map.m_len;
5959+
}
5960+
5961+
/*
5962+
* Count the number of extent tree blocks. We do it by looking up
5963+
* two successive extents and determining the difference between
5964+
* their paths. When path is different for 2 successive extents
5965+
* we compare the blocks in the path at each level and increment
5966+
* iblocks by total number of differences found.
5967+
*/
5968+
cur = 0;
5969+
skip_hole(inode, &cur);
5970+
path = ext4_find_extent(inode, cur, NULL, 0);
5971+
if (IS_ERR(path))
5972+
goto out;
5973+
numblks += path->p_depth;
5974+
ext4_ext_drop_refs(path);
5975+
kfree(path);
5976+
while (cur < end) {
5977+
path = ext4_find_extent(inode, cur, NULL, 0);
5978+
if (IS_ERR(path))
5979+
break;
5980+
ex = path[path->p_depth].p_ext;
5981+
if (!ex) {
5982+
ext4_ext_drop_refs(path);
5983+
kfree(path);
5984+
return 0;
5985+
}
5986+
cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
5987+
ext4_ext_get_actual_len(ex));
5988+
skip_hole(inode, &cur);
5989+
5990+
path2 = ext4_find_extent(inode, cur, NULL, 0);
5991+
if (IS_ERR(path2)) {
5992+
ext4_ext_drop_refs(path);
5993+
kfree(path);
5994+
break;
5995+
}
5996+
ex = path2[path2->p_depth].p_ext;
5997+
for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
5998+
cmp1 = cmp2 = 0;
5999+
if (i <= path->p_depth)
6000+
cmp1 = path[i].p_bh ?
6001+
path[i].p_bh->b_blocknr : 0;
6002+
if (i <= path2->p_depth)
6003+
cmp2 = path2[i].p_bh ?
6004+
path2[i].p_bh->b_blocknr : 0;
6005+
if (cmp1 != cmp2 && cmp2 != 0)
6006+
numblks++;
6007+
}
6008+
ext4_ext_drop_refs(path);
6009+
ext4_ext_drop_refs(path2);
6010+
kfree(path);
6011+
kfree(path2);
6012+
}
6013+
6014+
out:
6015+
inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6016+
ext4_mark_inode_dirty(NULL, inode);
6017+
return 0;
6018+
}
6019+
6020+
int ext4_ext_clear_bb(struct inode *inode)
6021+
{
6022+
struct ext4_ext_path *path = NULL;
6023+
struct ext4_extent *ex;
6024+
ext4_lblk_t cur = 0, end;
6025+
int j, ret = 0;
6026+
struct ext4_map_blocks map;
6027+
6028+
/* Determin the size of the file first */
6029+
path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6030+
EXT4_EX_NOCACHE);
6031+
if (IS_ERR(path))
6032+
return PTR_ERR(path);
6033+
ex = path[path->p_depth].p_ext;
6034+
if (!ex) {
6035+
ext4_ext_drop_refs(path);
6036+
kfree(path);
6037+
return 0;
6038+
}
6039+
end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6040+
ext4_ext_drop_refs(path);
6041+
kfree(path);
6042+
6043+
cur = 0;
6044+
while (cur < end) {
6045+
map.m_lblk = cur;
6046+
map.m_len = end - cur;
6047+
ret = ext4_map_blocks(NULL, inode, &map, 0);
6048+
if (ret < 0)
6049+
break;
6050+
if (ret > 0) {
6051+
path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6052+
if (!IS_ERR_OR_NULL(path)) {
6053+
for (j = 0; j < path->p_depth; j++) {
6054+
6055+
ext4_mb_mark_bb(inode->i_sb,
6056+
path[j].p_block, 1, 0);
6057+
}
6058+
ext4_ext_drop_refs(path);
6059+
kfree(path);
6060+
}
6061+
ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6062+
}
6063+
cur = cur + map.m_len;
6064+
}
6065+
6066+
return 0;
6067+
}

0 commit comments

Comments
 (0)