Skip to content

Commit

Permalink
ext4: snapshot file
Browse files Browse the repository at this point in the history
Ext4 snapshot implementation as a file inside the file system.
Snapshot files are marked with the snapfile flag and have special
read-only address space ops.


Signed-off-by: Amir Goldstein <amir73il@users.sf.net>
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
  • Loading branch information
Amir Goldstein committed Jun 6, 2011
1 parent 7f951ea commit 9d461a4
Show file tree
Hide file tree
Showing 7 changed files with 446 additions and 5 deletions.
70 changes: 67 additions & 3 deletions fs/ext4/ext4.h
Expand Up @@ -348,17 +348,23 @@ struct flex_groups {
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
/* snapshot persistent flags */
#define EXT4_SNAPFILE_FL 0x01000000 /* snapshot file */
#define EXT4_SNAPFILE_DELETED_FL 0x04000000 /* snapshot is deleted */
#define EXT4_SNAPFILE_SHRUNK_FL 0x08000000 /* snapshot was shrunk */
/* end of snapshot flags */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */

#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */

#define EXT4_FL_USER_VISIBLE 0x014BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x014B80FF /* User modifiable flags */

/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL | EXT4_SNAPFILE_FL)

/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
Expand Down Expand Up @@ -405,6 +411,9 @@ enum {
EXT4_INODE_EXTENTS = 19, /* Inode uses extents */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
EXT4_INODE_EOFBLOCKS = 22, /* Blocks allocated beyond EOF */
EXT4_INODE_SNAPFILE = 24, /* Snapshot file/dir */
EXT4_INODE_SNAPFILE_DELETED = 26, /* Snapshot is deleted */
EXT4_INODE_SNAPFILE_SHRUNK = 27, /* Snapshot was shrunk */
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};

Expand Down Expand Up @@ -451,6 +460,9 @@ static inline void ext4_check_flag_values(void)
CHECK_FLAG_VALUE(EXTENTS);
CHECK_FLAG_VALUE(EA_INODE);
CHECK_FLAG_VALUE(EOFBLOCKS);
CHECK_FLAG_VALUE(SNAPFILE);
CHECK_FLAG_VALUE(SNAPFILE_DELETED);
CHECK_FLAG_VALUE(SNAPFILE_SHRUNK);
CHECK_FLAG_VALUE(RESERVED);
}

Expand Down Expand Up @@ -789,6 +801,14 @@ struct ext4_inode_info {

struct list_head i_orphan; /* unlinked but open inodes */

/*
* In-memory snapshot list overrides i_orphan to link snapshot inodes,
* but unlike the real orphan list, the next snapshot inode number
* is stored in i_next_snapshot_ino and not in i_dtime
*/
#define i_snaplist i_orphan
__u32 i_next_snapshot_ino;

/*
* i_disksize keeps track of what the inode size is ON DISK, not
* in memory. During truncate, i_size is set to the new size by
Expand Down Expand Up @@ -1145,6 +1165,8 @@ struct ext4_sb_info {
u32 s_max_batch_time;
u32 s_min_batch_time;
struct block_device *journal_bdev;
struct mutex s_snapshot_mutex; /* protects 2 fields below: */
struct inode *s_active_snapshot; /* [ s_snapshot_mutex ] */
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
Expand Down Expand Up @@ -1261,6 +1283,24 @@ enum {
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
EXT4_STATE_LAST
};

/*
* Snapshot dynamic state flags (starting at offset EXT4_STATE_LAST)
* These flags are read by GETSNAPFLAGS ioctl and interpreted by the lssnap
* utility. Do not change these values.
*/
enum {
EXT4_SNAPSTATE_LIST = 0, /* snapshot is on list (S) */
EXT4_SNAPSTATE_ENABLED = 1, /* snapshot is enabled (n) */
EXT4_SNAPSTATE_ACTIVE = 2, /* snapshot is active (a) */
EXT4_SNAPSTATE_INUSE = 3, /* snapshot is in-use (p) */
EXT4_SNAPSTATE_DELETED = 4, /* snapshot is deleted (s) */
EXT4_SNAPSTATE_SHRUNK = 5, /* snapshot was shrunk (h) */
EXT4_SNAPSTATE_OPEN = 6, /* snapshot is mounted (o) */
EXT4_SNAPSTATE_TAGGED = 7, /* snapshot is tagged (t) */
EXT4_SNAPSTATE_LAST
};

#define EXT4_INODE_BIT_FNS(name, field, offset) \
Expand All @@ -1277,16 +1317,29 @@ static inline void ext4_clear_inode_##name(struct inode *inode, int bit) \
clear_bit(bit + (offset), &EXT4_I(inode)->i_##field); \
}

#define EXT4_INODE_FLAGS_FNS(name, field, offset, count) \
static inline int ext4_get_##name##_flags(struct inode *inode) \
{ \
return (EXT4_I(inode)->i_##field >> (offset)) & \
((1UL << (count)) - 1); \
} \

EXT4_INODE_BIT_FNS(flag, flags, 0)
#if (BITS_PER_LONG < 64)
EXT4_INODE_BIT_FNS(state, state_flags, 0)
EXT4_INODE_BIT_FNS(snapstate, state_flags, EXT4_STATE_LAST)
EXT4_INODE_FLAGS_FNS(snapstate, state_flags, EXT4_STATE_LAST, \
EXT4_SNAPSTATE_LAST)

static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
{
(ei)->i_state_flags = 0;
}
#else
EXT4_INODE_BIT_FNS(state, flags, 32)
EXT4_INODE_BIT_FNS(snapstate, flags, 32 + EXT4_STATE_LAST)
EXT4_INODE_FLAGS_FNS(snapstate, flags, 32 + EXT4_STATE_LAST, \
EXT4_SNAPSTATE_LAST)

static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
{
Expand All @@ -1301,6 +1354,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#endif

#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
#define NEXT_SNAPSHOT(inode) (EXT4_I(inode)->i_next_snapshot_ino)

/*
* Codes for operating systems
Expand Down Expand Up @@ -1783,6 +1837,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);

/* snapshot_inode.c */
extern int ext4_snapshot_readpage(struct file *file, struct page *page);

/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
Expand Down Expand Up @@ -2006,6 +2064,12 @@ struct ext4_group_info {
void *bb_bitmap;
#endif
struct rw_semaphore alloc_sem;
/*
* bg_cow_bitmap is reset to zero on mount time and on every snapshot
* take and initialized lazily on first block group write access.
* bg_cow_bitmap is protected by sb_bgl_lock().
*/
unsigned long bg_cow_bitmap; /* COW bitmap cache */
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
* regions, index is order.
* bb_counters[3] = 5 means
Expand Down
2 changes: 2 additions & 0 deletions fs/ext4/ext4_jbd2.h
Expand Up @@ -369,6 +369,8 @@ static inline int ext4_snapshot_should_move_data(struct inode *inode)
return 0;
if (EXT4_JOURNAL(inode) == NULL)
return 0;
if (ext4_snapshot_excluded(inode))
return 0;
/* when a data block is journaled, it is already COWed as metadata */
if (ext4_should_journal_data(inode))
return 0;
Expand Down
8 changes: 6 additions & 2 deletions fs/ext4/ialloc.c
Expand Up @@ -1049,8 +1049,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
goto fail_free_drop;

if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
/* set extent flag only for directory, file and normal symlink*/
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
/*
* Set extent flag only for non-snapshot file, directory
* and normal symlink
*/
if ((S_ISREG(mode) && !ext4_snapshot_file(inode)) ||
S_ISDIR(mode) || S_ISLNK(mode)) {
ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_ext_tree_init(handle, inode);
}
Expand Down
29 changes: 29 additions & 0 deletions fs/ext4/inode.c
Expand Up @@ -4139,9 +4139,38 @@ static const struct address_space_operations ext4_da_aops = {
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
static int ext4_no_writepage(struct page *page,
struct writeback_control *wbc)
{
unlock_page(page);
return -EIO;
}

/*
* Snapshot file page operations:
* always readpage (by page) with buffer tracked read.
* user cannot writepage or direct_IO to a snapshot file.
*
* snapshot file pages are written to disk after a COW operation in "ordered"
* mode and are never changed after that again, so there is no data corruption
* risk when using "ordered" mode on snapshot files.
* some snapshot data pages are written to disk by sync_dirty_buffer(), namely
* the snapshot COW bitmaps and a few initial blocks copied on snapshot_take().
*/
static const struct address_space_operations ext4_snapfile_aops = {
.readpage = ext4_readpage,
.readpages = ext4_readpages,
.writepage = ext4_no_writepage,
.bmap = ext4_bmap,
.invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage,
};

void ext4_set_aops(struct inode *inode)
{
if (ext4_snapshot_file(inode))
inode->i_mapping->a_ops = &ext4_snapfile_aops;
else
if (ext4_should_order_data(inode) &&
test_opt(inode->i_sb, DELALLOC))
inode->i_mapping->a_ops = &ext4_da_aops;
Expand Down
106 changes: 106 additions & 0 deletions fs/ext4/snapshot.h
Expand Up @@ -288,6 +288,14 @@ static inline int ext4_snapshot_get_delete_access(handle_t *handle,

/* snapshot_ctl.c */

/*
* Snapshot constructor/destructor
*/
extern int ext4_snapshot_load(struct super_block *sb,
struct ext4_super_block *es, int read_only);
extern int ext4_snapshot_update(struct super_block *sb, int cleanup,
int read_only);
extern void ext4_snapshot_destroy(struct super_block *sb);

static inline int init_ext4_snapshot(void)
{
Expand All @@ -299,7 +307,105 @@ static inline void exit_ext4_snapshot(void)
}


/* tests if @inode is a snapshot file */
static inline int ext4_snapshot_file(struct inode *inode)
{
if (!S_ISREG(inode->i_mode))
/* a snapshots directory */
return 0;
return ext4_test_inode_flag(inode, EXT4_INODE_SNAPFILE);
}

/* tests if @inode is on the on-disk snapshot list */
static inline int ext4_snapshot_list(struct inode *inode)
{
return ext4_test_inode_snapstate(inode, EXT4_SNAPSTATE_LIST);
}

/*
* ext4_snapshot_excluded():
* Checks if the file should be excluded from snapshot.
*
* Returns 0 for normal file.
* Returns > 0 for 'excluded' file.
* Returns < 0 for 'ignored' file (stonger than 'excluded').
*
* Excluded and ignored file blocks are not moved to snapshot.
* Ignored file metadata blocks are not COWed to snapshot.
* Excluded file metadata blocks are zeroed in the snapshot file.
* XXX: Excluded files code is experimental,
* but ignored files code isn't.
*/
static inline int ext4_snapshot_excluded(struct inode *inode)
{
/* directory blocks and global filesystem blocks cannot be 'excluded' */
if (!inode || !S_ISREG(inode->i_mode))
return 0;
/* snapshot files are 'ignored' */
if (ext4_snapshot_file(inode))
return -1;
return 0;
}

/* tests if the file system has an active snapshot */
static inline int ext4_snapshot_active(struct ext4_sb_info *sbi)
{
if (unlikely((sbi)->s_active_snapshot))
return 1;
return 0;
}

/*
* tests if the file system has an active snapshot and returns its inode.
* active snapshot is only changed under journal_lock_updates(),
* so it is safe to use the returned inode during a transaction.
*/
static inline struct inode *ext4_snapshot_has_active(struct super_block *sb)
{
return EXT4_SB(sb)->s_active_snapshot;
}

/*
* tests if @inode is the current active snapshot.
* active snapshot is only changed under journal_lock_updates(),
* so the test result never changes during a transaction.
*/
static inline int ext4_snapshot_is_active(struct inode *inode)
{
return (inode == EXT4_SB(inode->i_sb)->s_active_snapshot);
}


#define SNAPSHOT_TRANSACTION_ID(sb) \
((EXT4_I(EXT4_SB(sb)->s_active_snapshot))->i_datasync_tid)

/**
* set transaction ID for active snapshot
*
* this function is called after freeze_super() returns but before
* calling unfreeze_super() to record the tid at time when a snapshot is
* taken.
*/
static inline void ext4_snapshot_set_tid(struct super_block *sb)
{
BUG_ON(!ext4_snapshot_active(EXT4_SB(sb)));
SNAPSHOT_TRANSACTION_ID(sb) =
EXT4_SB(sb)->s_journal->j_transaction_sequence;
}

/* get trancation ID of active snapshot */
static inline tid_t ext4_snapshot_get_tid(struct super_block *sb)
{
BUG_ON(!ext4_snapshot_active(EXT4_SB(sb)));
return SNAPSHOT_TRANSACTION_ID(sb);
}

/* test if thereis a mow that is in or before current transcation */
static inline int ext4_snapshot_mow_in_tid(struct inode *inode)
{
return tid_geq(EXT4_I(inode)->i_datasync_tid,
ext4_snapshot_get_tid(inode->i_sb));
}


#else /* CONFIG_EXT4_FS_SNAPSHOT */
Expand Down

0 comments on commit 9d461a4

Please sign in to comment.