Permalink
Browse files

e4snap: add snap clone file support

snapclone file is used to support writable snapshot.
If an user wants to modify a snapshot, a snapclone file
is created and mounted via loop, the snapclone file stores
the diff between original snapshot and written snapshot.
  • Loading branch information...
1 parent 96e8649 commit 67fa93a19c6da1eb7bfe06c5d165282e09c0e608 @YANGYongqiang committed Jun 27, 2012
Showing with 549 additions and 8 deletions.
  1. +9 −0 fs/ext4/Kconfig
  2. +27 −0 fs/ext4/ext4.h
  3. +5 −0 fs/ext4/file.c
  4. +6 −0 fs/ext4/ialloc.c
  5. +110 −2 fs/ext4/inode.c
  6. +17 −2 fs/ext4/ioctl.c
  7. +15 −1 fs/ext4/namei.c
  8. +30 −0 fs/ext4/snapshot.h
  9. +319 −0 fs/ext4/snapshot_ctl.c
  10. +11 −3 fs/ext4/snapshot_inode.c
View
@@ -185,6 +185,15 @@ config EXT4_FS_SNAPSHOT_FILE
Snapshot files are marked with the snapfile flag and have special
read-only address space ops.
+config EXT4_FS_SNAPCLONE_FILE
@amir73il

amir73il Jul 1, 2012

to stay with convention I would use EXT4_FS_SNAPHOT_CLONE,
(which depends on EXT4_FS_SNAPHOT)

+ bool "snapclone file"
+ depends on EXT4_FS_SNAPSHOT
+ default y
+ help
+ Ext4 snapclone implementation as a file inside the file system.
+ Snapshot clones are marked with the snapclone flag and have special
+ address space ops supporting writable snapshot.
+
config EXT4_FS_SNAPSHOT_FILE_READ
bool "snapshot file - read through to block device"
depends on EXT4_FS_SNAPSHOT_FILE
View
@@ -472,12 +472,27 @@ struct flex_groups {
#define EXT4_SNAPFILE_FL 0x01000000 /* snapshot file */
#define EXT4_SNAPFILE_DELETED_FL 0x04000000 /* snapshot is deleted */
#define EXT4_SNAPFILE_SHRUNK_FL 0x08000000 /* snapshot was shrunk */
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+#define EXT4_SNAPCLONE_FL 0x10000000 /* snapshot file */
@amir73il

amir73il Jul 1, 2012

the flag I would call EXT4_SNAPFILE_CLONE_FL and I would avoid the extra ifdef here

+#endif
/* end of snapshot flags */
#endif
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
@amir73il

amir73il Jul 1, 2012

I would drop this ifdef and simply add the new flag to the visible/modify mask

+#define EXT4_FL_USER_VISIBLE 0x114BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x114B80FF /* User modifiable flags */
+
+/* Flags that should be inherited by new inodes from their parent. */
+#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
+ EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
+ EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
+ EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_SNAPFILE_FL | EXT4_SNAPCLONE_FL)
@amir73il

amir73il Jul 1, 2012

SNAPFILE_CLONE should be inherited (from parent directory)

+#else
#define EXT4_FL_USER_VISIBLE 0x014BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x014B80FF /* User modifiable flags */
@@ -487,6 +502,7 @@ struct flex_groups {
EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL | EXT4_SNAPFILE_FL)
+#endif
#else
#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
@@ -548,6 +564,9 @@ enum {
EXT4_INODE_SNAPFILE_DELETED = 26, /* Snapshot is deleted */
EXT4_INODE_SNAPFILE_SHRUNK = 27, /* Snapshot was shrunk */
#endif
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
@amir73il

amir73il Jul 1, 2012

I would leave all new ifdefs out of this file

+ EXT4_INODE_SNAPCLONE = 28, /* Snapclone file/dir */
+#endif
EXT4_INODE_RESERVED = 31, /* reserved for ext4 lib */
};
@@ -1079,6 +1098,9 @@ struct ext4_inode_info {
#define EXT4_FLAGS_IS_SNAPSHOT 0x0010 /* Is a snapshot image */
#define EXT4_FLAGS_FIX_SNAPSHOT 0x0020 /* Corrupted snapshot */
#define EXT4_FLAGS_FIX_EXCLUDE 0x0040 /* Bad exclude bitmap */
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+#define EXT4_FLAGS_IS_SNAPCLONE 0x0080 /* Is a snapclone image */
+#endif
#define EXT4_SET_FLAGS(sb, mask) \
do { \
@@ -1509,6 +1531,7 @@ enum {
EXT4_SNAPSTATE_SHRUNK = 5, /* snapshot was shrunk (h) */
EXT4_SNAPSTATE_OPEN = 6, /* snapshot is mounted (o) */
EXT4_SNAPSTATE_TAGGED = 7, /* snapshot is tagged (t) */
+ EXT4_SNAPSTATE_CLONED = 8, /* snapshot is cloned (c) */
EXT4_SNAPSTATE_LAST
#endif
};
@@ -2228,6 +2251,10 @@ extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
__u32 start_minor_hash, __u32 *next_hash);
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+extern int ext4_add_nondir(handle_t *handle,
+ struct dentry *dentry, struct inode *inode);
+#endif
/* resize.c */
extern int ext4_group_add(struct super_block *sb,
View
@@ -50,6 +50,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
ext4_discard_preallocations(inode);
up_write(&EXT4_I(inode)->i_data_sem);
}
+
if (is_dx(inode) && filp->private_data)
ext4_htree_free_dir_info(filp->private_data);
@@ -169,7 +170,11 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
char buf[64], *cp;
#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE_PERM
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
@amir73il

amir73il Jul 1, 2012

I would loose new ifdef and make ext4_snapclone_file() evaluate to 0 when SNAPSHOT_CLONE is not defined,
unless you want to use the new ifdefs for extract_patches script?

+ if (ext4_snapshot_file(inode) && !ext4_snapclone_file(inode) &&
+#else
if (ext4_snapshot_file(inode) &&
+#endif
(filp->f_flags & O_ACCMODE) != O_RDONLY)
/*
* allow only read-only access to snapshot files
View
@@ -933,8 +933,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
* Set extent flag only for non-snapshot file, directory
* and normal symlink
*/
+#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE
@amir73il

amir73il Jul 1, 2012

I think you meant ifdef SNAPHOT_CLONE, again I would drop the extra idfef

+ if ((S_ISREG(mode) && !ext4_snapshot_file(inode) &&
+ !ext4_snapclone_file(inode)) ||
@amir73il

amir73il Jul 1, 2012

why do you need to test snapclone_file here?
isn't snapclone_file a private case of snapshot_file?

@YANGYongqiang

YANGYongqiang Aug 12, 2012

Owner

Sorry, at first I cleared snapshot flag on snapclone, and this line is the dirty code that I did not clear.

+ S_ISDIR(mode) || S_ISLNK(mode)) {
+#else
if ((S_ISREG(mode) && !ext4_snapshot_file(inode)) ||
S_ISDIR(mode) || S_ISLNK(mode)) {
+#endif
#else
/* set extent flag only for directory, file and normal symlink*/
if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
View
@@ -142,6 +142,11 @@ void ext4_evict_inode(struct inode *inode)
ext4_ioend_wait(inode);
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
@amir73il

amir73il Jul 1, 2012

if you can think of ways to reduce the changes to inode.c it would be better.
for example, implement ext4_snapshot_evict_inode() op

+ if (ext4_snapclone_file(inode))
+ ext4_snapclone_destroy(inode);
+#endif
+
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
@@ -676,13 +681,30 @@ static int ext4_partial_write_begin(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = 1;
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode)) {
+ ret = ext4_snapshot_get_block(inode, iblock, bh, 0);
+ if (ret == 0) {
+ BUG_ON(!buffer_mapped(bh));
+ ret = 1;
+ }
+ } else {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ }
+#else
ret = ext4_map_blocks(NULL, inode, &map, 0);
+#endif
if (ret <= 0)
return ret;
if (!buffer_uptodate(bh) && !buffer_unwritten(bh)) {
/* map existing block for read */
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (!ext4_snapclone_file(inode))
+ map_bh(bh, inode->i_sb, map.m_pblk);
+#else
map_bh(bh, inode->i_sb, map.m_pblk);
+#endif
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
/* clear existing block mapping */
@@ -728,8 +750,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
}
#ifdef CONFIG_EXT4_FS_SNAPSHOT_HOOKS_DATA
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (((flags & EXT4_GET_BLOCKS_MOVE_ON_WRITE) ||
+ ext4_snapclone_file(inode)) &&
+ buffer_partial_write(bh)) {
+#else
if ((flags & EXT4_GET_BLOCKS_MOVE_ON_WRITE) &&
buffer_partial_write(bh)) {
+#endif
/* Read existing block data before moving it to snapshot */
ret = ext4_partial_write_begin(inode, iblock, bh);
if (ret < 0)
@@ -767,6 +795,10 @@ int ext4_get_block_mow(struct inode *inode, sector_t iblock,
{
int flags = create ? EXT4_GET_BLOCKS_CREATE : 0;
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode))
+ iblock = SNAPSHOT_IBLOCK(iblock);
@amir73il

amir73il Jul 1, 2012

it's very confusing that iblock means one thing for MOW and another thing for snapclone write.
at the very least this line requires a comment to explain the differences.
considering the amount of times you need to convert iblock in this function it makes sense
to pass it SNAPSHOT_IBLOCK to begin with and then the name of the arg (iblock) would be correct

+#endif
if (ext4_snapshot_should_move_data(inode))
flags |= EXT4_GET_BLOCKS_MOVE_ON_WRITE;
return _ext4_get_block(inode, iblock, bh, flags);
@@ -1000,8 +1032,15 @@ static void ext4_snapshot_write_begin(struct inode *inode,
* guarantee this we have to know that the transaction is not restarted.
* Can we count on that?
*/
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if ((!EXT4_SNAPSHOTS(inode->i_sb) ||
+ !ext4_snapshot_should_move_data(inode)) &&
+ !(ext4_snapclone_file(inode)))
+
+#else
if (!EXT4_SNAPSHOTS(inode->i_sb) ||
!ext4_snapshot_should_move_data(inode))
+#endif
return;
if (!page_has_buffers(page))
@@ -1011,14 +1050,19 @@ static void ext4_snapshot_write_begin(struct inode *inode,
/*
* make sure that get_block() is called even if the buffer is
* mapped, but not if it is already a part of any transaction.
- * in data=ordered,the only mode supported by ext4, all dirty
+ * in data=ordered,the only mode supported by ext4 snapshot, all dirty
* data buffers are flushed on snapshot take via freeze_fs()
* API.
*/
if (!buffer_jbd(bh) && !buffer_delay(bh)) {
clear_buffer_mapped(bh);
/* explicitly request move-on-write */
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if ((!delay || ext4_snapclone_file(inode)) &&
+ len < PAGE_CACHE_SIZE)
+#else
if (!delay && len < PAGE_CACHE_SIZE)
+#endif
/* read block before moving it to snapshot */
set_buffer_partial_write(bh);
}
@@ -1753,6 +1797,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
* EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
* variables are updated after the blocks have been allocated.
*/
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(mpd->inode))
+ next = SNAPSHOT_IBLOCK(next);
+#endif
map.m_lblk = next;
map.m_len = max_blocks;
get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
@@ -2054,6 +2102,10 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
BUG_ON(create == 0);
BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode))
+ iblock = SNAPSHOT_IBLOCK(iblock);
+#endif
map.m_lblk = iblock;
map.m_len = 1;
@@ -3064,6 +3116,10 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
inode->i_ino, create);
if (ext4_snapshot_should_move_data(inode))
flags |= EXT4_GET_BLOCKS_MOVE_ON_WRITE;
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode))
+ iblock = SNAPSHOT_IBLOCK(iblock);
+#endif
return _ext4_get_block(inode, iblock, bh_result, flags);
#else
ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
@@ -3408,6 +3464,43 @@ static const struct address_space_operations ext4_da_aops = {
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
};
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+static const struct address_space_operations ext4_snapclone_da_aops = {
+#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE_READ
+ .readpage = ext4_snapshot_readpage,
+#else
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+#endif
+ .writepage = ext4_writepage,
+ .writepages = ext4_da_writepages,
+ .write_begin = ext4_da_write_begin,
+ .write_end = ext4_da_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_da_invalidatepage,
+ .releasepage = ext4_releasepage,
+};
+
+#endif
+
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+static const struct address_space_operations ext4_snapclone_aops = {
+#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE_READ
+ .readpage = ext4_snapshot_readpage,
+#else
+ .readpage = ext4_readpage,
+ .readpages = ext4_readpages,
+#endif
+ .writepage = ext4_writepage,
+ .write_begin = ext4_write_begin,
+ .write_end = ext4_ordered_write_end,
+ .bmap = ext4_bmap,
+ .invalidatepage = ext4_invalidatepage,
+ .releasepage = ext4_releasepage,
+};
+
+#endif
+
#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE
static int ext4_no_writepage(struct page *page,
struct writeback_control *wbc)
@@ -3443,6 +3536,15 @@ static const struct address_space_operations ext4_snapfile_aops = {
void ext4_set_aops(struct inode *inode)
{
+ /* We can not change order of snapclone and snapshot. */
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode) &&
+ test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_snapclone_da_aops;
+ else if (ext4_snapclone_file(inode))
+ inode->i_mapping->a_ops = &ext4_snapclone_aops;
+ else
+#endif
#ifdef CONFIG_EXT4_FS_SNAPSHOT_FILE
if (ext4_snapshot_file(inode))
inode->i_mapping->a_ops = &ext4_snapfile_aops;
@@ -4090,7 +4192,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
#endif
INIT_LIST_HEAD(&ei->i_orphan);
-
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode)) {
+ ret = ext4_snapclone_load(inode);
+ if (ret)
+ goto bad_inode;
+ }
+#endif
/*
* Set transaction id's of transactions that have to be committed
* to finish f[data]sync. We set them to currently running transaction
View
@@ -87,13 +87,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto flags_out;
}
#ifdef CONFIG_EXT4_FS_SNAPSHOT_CTL
-
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ /*
+ * The SNAPFILE and SNAPCLONE flag can only be changed on directories by
@amir73il

amir73il Jul 1, 2012

are you sure you need to set SNAPFILE_CLONE on directories?

+ * the relevant capability.
+ * It can only be inherited by regular files.
+ */
+ if ((flags ^ oldflags) &
+ (EXT4_SNAPFILE_FL | EXT4_SNAPCLONE_FL)) {
+#else
/*
* The SNAPFILE flag can only be changed on directories by
* the relevant capability.
* It can only be inherited by regular files.
*/
if ((flags ^ oldflags) & EXT4_SNAPFILE_FL) {
+#endif
if (!S_ISDIR(inode->i_mode)) {
err = -ENOTDIR;
goto flags_out;
@@ -236,8 +245,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
goto snapflags_err;
-
+#ifdef CONFIG_EXT4_FS_SNAPCLONE_FILE
+ if (ext4_snapclone_file(inode))
+ err = ext4_snapclone_set_flags(handle, inode, flags);
+ else
+ err = ext4_snapshot_set_flags(handle, inode, flags);
+#else
err = ext4_snapshot_set_flags(handle, inode, flags);
+#endif
if (err)
goto snapflags_err;
Oops, something went wrong.

1 comment on commit 67fa93a

nice work :-)

Please sign in to comment.