Skip to content

Commit 1265925

Browse files
naotakdave
authored andcommitted
btrfs: implement log-structured superblock for ZONED mode
Superblock (and its copies) is the only data structure in btrfs which has a fixed location on a device. Since we cannot overwrite in a sequential write required zone, we cannot place superblock in the zone. One easy solution is limiting superblock and copies to be placed only in conventional zones. However, this method has two downsides: one is reduced number of superblock copies. The location of the second copy of superblock is 256GB, which is in a sequential write required zone on typical devices in the market today. So, the number of superblock and copies is limited to be two. Second downside is that we cannot support devices which have no conventional zones at all. To solve these two problems, we employ superblock log writing. It uses two adjacent zones as a circular buffer to write updated superblocks. Once the first zone is filled up, start writing into the second one. Then, when both zones are filled up and before starting to write to the first zone again, it reset the first zone. We can determine the position of the latest superblock by reading write pointer information from a device. One corner case is when both zones are full. For this situation, we read out the last superblock of each zone, and compare them to determine which zone is older. The following zones are reserved as the circular buffer on ZONED btrfs. - The primary superblock: zones 0 and 1 - The first copy: zones 16 and 17 - The second copy: zones 1024 or zone at 256GB which is minimum, and next to it If these reserved zones are conventional, superblock is written fixed at the start of the zone without logging. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent a589dde commit 1265925

File tree

6 files changed

+429
-12
lines changed

6 files changed

+429
-12
lines changed

fs/btrfs/block-group.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,6 +1679,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
16791679
static int exclude_super_stripes(struct btrfs_block_group *cache)
16801680
{
16811681
struct btrfs_fs_info *fs_info = cache->fs_info;
1682+
const bool zoned = btrfs_is_zoned(fs_info);
16821683
u64 bytenr;
16831684
u64 *logical;
16841685
int stripe_len;
@@ -1700,6 +1701,14 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
17001701
if (ret)
17011702
return ret;
17021703

1704+
/* Shouldn't have super stripes in sequential zones */
1705+
if (zoned && nr) {
1706+
btrfs_err(fs_info,
1707+
"zoned: block group %llu must not contain super block",
1708+
cache->start);
1709+
return -EUCLEAN;
1710+
}
1711+
17031712
while (nr--) {
17041713
u64 len = min_t(u64, stripe_len,
17051714
cache->start + cache->length - logical[nr]);

fs/btrfs/disk-io.c

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3488,10 +3488,17 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
34883488
{
34893489
struct btrfs_super_block *super;
34903490
struct page *page;
3491-
u64 bytenr;
3491+
u64 bytenr, bytenr_orig;
34923492
struct address_space *mapping = bdev->bd_inode->i_mapping;
3493+
int ret;
3494+
3495+
bytenr_orig = btrfs_sb_offset(copy_num);
3496+
ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
3497+
if (ret == -ENOENT)
3498+
return ERR_PTR(-EINVAL);
3499+
else if (ret)
3500+
return ERR_PTR(ret);
34933501

3494-
bytenr = btrfs_sb_offset(copy_num);
34953502
if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
34963503
return ERR_PTR(-EINVAL);
34973504

@@ -3505,7 +3512,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
35053512
return ERR_PTR(-ENODATA);
35063513
}
35073514

3508-
if (btrfs_super_bytenr(super) != bytenr) {
3515+
if (btrfs_super_bytenr(super) != bytenr_orig) {
35093516
btrfs_release_disk_super(super);
35103517
return ERR_PTR(-EINVAL);
35113518
}
@@ -3560,7 +3567,8 @@ static int write_dev_supers(struct btrfs_device *device,
35603567
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
35613568
int i;
35623569
int errors = 0;
3563-
u64 bytenr;
3570+
int ret;
3571+
u64 bytenr, bytenr_orig;
35643572

35653573
if (max_mirrors == 0)
35663574
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3572,12 +3580,22 @@ static int write_dev_supers(struct btrfs_device *device,
35723580
struct bio *bio;
35733581
struct btrfs_super_block *disk_super;
35743582

3575-
bytenr = btrfs_sb_offset(i);
3583+
bytenr_orig = btrfs_sb_offset(i);
3584+
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
3585+
if (ret == -ENOENT) {
3586+
continue;
3587+
} else if (ret < 0) {
3588+
btrfs_err(device->fs_info,
3589+
"couldn't get super block location for mirror %d",
3590+
i);
3591+
errors++;
3592+
continue;
3593+
}
35763594
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
35773595
device->commit_total_bytes)
35783596
break;
35793597

3580-
btrfs_set_super_bytenr(sb, bytenr);
3598+
btrfs_set_super_bytenr(sb, bytenr_orig);
35813599

35823600
crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
35833601
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
@@ -3622,6 +3640,7 @@ static int write_dev_supers(struct btrfs_device *device,
36223640
bio->bi_opf |= REQ_FUA;
36233641

36243642
btrfsic_submit_bio(bio);
3643+
btrfs_advance_sb_log(device, i);
36253644
}
36263645
return errors < i ? 0 : -1;
36273646
}
@@ -3638,6 +3657,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
36383657
int i;
36393658
int errors = 0;
36403659
bool primary_failed = false;
3660+
int ret;
36413661
u64 bytenr;
36423662

36433663
if (max_mirrors == 0)
@@ -3646,7 +3666,15 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
36463666
for (i = 0; i < max_mirrors; i++) {
36473667
struct page *page;
36483668

3649-
bytenr = btrfs_sb_offset(i);
3669+
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
3670+
if (ret == -ENOENT) {
3671+
break;
3672+
} else if (ret < 0) {
3673+
errors++;
3674+
if (i == 0)
3675+
primary_failed = true;
3676+
continue;
3677+
}
36503678
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
36513679
device->commit_total_bytes)
36523680
break;

fs/btrfs/scrub.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "rcu-string.h"
2121
#include "raid56.h"
2222
#include "block-group.h"
23+
#include "zoned.h"
2324

2425
/*
2526
* This is only the first step towards a full-features scrub. It reads all
@@ -3732,6 +3733,8 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
37323733
if (bytenr + BTRFS_SUPER_INFO_SIZE >
37333734
scrub_dev->commit_total_bytes)
37343735
break;
3736+
if (!btrfs_check_super_location(scrub_dev, bytenr))
3737+
continue;
37353738

37363739
ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
37373740
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,

fs/btrfs/volumes.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,7 +1276,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
12761276
}
12771277

12781278
static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
1279-
u64 bytenr)
1279+
u64 bytenr, u64 bytenr_orig)
12801280
{
12811281
struct btrfs_super_block *disk_super;
12821282
struct page *page;
@@ -1307,7 +1307,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
13071307
/* align our pointer to the offset of the super block */
13081308
disk_super = p + offset_in_page(bytenr);
13091309

1310-
if (btrfs_super_bytenr(disk_super) != bytenr ||
1310+
if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
13111311
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
13121312
btrfs_release_disk_super(p);
13131313
return ERR_PTR(-EINVAL);
@@ -1342,7 +1342,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
13421342
bool new_device_added = false;
13431343
struct btrfs_device *device = NULL;
13441344
struct block_device *bdev;
1345-
u64 bytenr;
1345+
u64 bytenr, bytenr_orig;
1346+
int ret;
13461347

13471348
lockdep_assert_held(&uuid_mutex);
13481349

@@ -1352,14 +1353,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
13521353
* So, we need to add a special mount option to scan for
13531354
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
13541355
*/
1355-
bytenr = btrfs_sb_offset(0);
13561356
flags |= FMODE_EXCL;
13571357

13581358
bdev = blkdev_get_by_path(path, flags, holder);
13591359
if (IS_ERR(bdev))
13601360
return ERR_CAST(bdev);
13611361

1362-
disk_super = btrfs_read_disk_super(bdev, bytenr);
1362+
bytenr_orig = btrfs_sb_offset(0);
1363+
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
1364+
if (ret)
1365+
return ERR_PTR(ret);
1366+
1367+
disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
13631368
if (IS_ERR(disk_super)) {
13641369
device = ERR_CAST(disk_super);
13651370
goto error_bdev_put;
@@ -2023,6 +2028,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
20232028
if (IS_ERR(disk_super))
20242029
continue;
20252030

2031+
if (bdev_is_zoned(bdev)) {
2032+
btrfs_reset_sb_log_zones(bdev, copy_num);
2033+
continue;
2034+
}
2035+
20262036
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
20272037

20282038
page = virt_to_page(disk_super);

0 commit comments

Comments
 (0)