Skip to content

Commit de881df

Browse files
aravind-wdcJaegeuk Kim
authored andcommitted
f2fs: support zone capacity less than zone size
NVMe Zoned Namespace devices can have zone-capacity less than zone-size. Zone-capacity indicates the maximum number of sectors that are usable in a zone beginning from the first sector of the zone. This makes the sectors sectors after the zone-capacity till zone-size to be unusable. This patch set tracks zone-size and zone-capacity in zoned devices and calculate the usable blocks per segment and usable segments per section. If zone-capacity is less than zone-size mark only those segments which start before zone-capacity as free segments. All segments at and beyond zone-capacity are treated as permanently used segments. In cases where zone-capacity does not align with segment size the last segment will start before zone-capacity and end beyond the zone-capacity of the zone. For such spanning segments only sectors within the zone-capacity are used. During writes and GC manage the usable segments in a section and usable blocks per segment. Segments which are beyond zone-capacity are never allocated, and do not need to be garbage collected, only the segments which are before zone-capacity needs to garbage collected. For spanning segments based on the number of usable blocks in that segment, write to blocks only up to zone-capacity. Zone-capacity is device specific and cannot be configured by the user. Since NVMe ZNS device zones are sequentially write only, a block device with conventional zones or any normal block device is needed along with the ZNS device for the metadata operations of F2fs. A typical nvme-cli output of a zoned device shows zone start and capacity and write pointer as below: SLBA: 0x0 WP: 0x0 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x20000 WP: 0x20000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ SLBA: 0x40000 WP: 0x40000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ Here zone size is 64MB, capacity is 49MB, WP is at zone start as the zones are in EMPTY state. For each zone, only zone start + 49MB is usable area, any lba/sector after 49MB cannot be read or written to, the drive will fail any attempts to read/write. So, the second zone starts at 64MB and is usable till 113MB (64 + 49) and the range between 113 and 128MB is again unusable. The next zone starts at 128MB, and so on. Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
1 parent 581cb3a commit de881df

File tree

7 files changed

+275
-37
lines changed

7 files changed

+275
-37
lines changed

Documentation/filesystems/f2fs.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -772,3 +772,18 @@ Compress metadata layout::
772772
+-------------+-------------+----------+----------------------------+
773773
| data length | data chksum | reserved | compressed data |
774774
+-------------+-------------+----------+----------------------------+
775+
776+
NVMe Zoned Namespace devices
777+
----------------------------
778+
779+
- ZNS defines a per-zone capacity which can be equal or less than the
780+
zone-size. Zone-capacity is the number of usable blocks in the zone.
781+
F2fs checks if zone-capacity is less than zone-size, if it is, then any
782+
segment which starts after the zone-capacity is marked as not-free in
783+
the free segment bitmap at initial mount time. These segments are marked
784+
as permanently used so they are not allocated for writes and
785+
consequently are not needed to be garbage collected. In case the
786+
zone-capacity is not aligned to default segment size(2MB), then a segment
787+
can start before the zone-capacity and span across zone-capacity boundary.
788+
Such spanning segments are also considered as usable segments. All blocks
789+
past the zone-capacity are considered unusable in these segments.

fs/f2fs/f2fs.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,7 @@ struct f2fs_dev_info {
12091209
#ifdef CONFIG_BLK_DEV_ZONED
12101210
unsigned int nr_blkz; /* Total number of zones */
12111211
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
1212+
block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
12121213
#endif
12131214
};
12141215

@@ -3378,6 +3379,10 @@ void f2fs_destroy_segment_manager_caches(void);
33783379
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
33793380
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
33803381
enum page_type type, enum temp_type temp);
3382+
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
3383+
unsigned int segno);
3384+
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
3385+
unsigned int segno);
33813386

33823387
/*
33833388
* checkpoint.c

fs/f2fs/gc.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -266,13 +266,14 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
266266
unsigned char age = 0;
267267
unsigned char u;
268268
unsigned int i;
269+
unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
269270

270-
for (i = 0; i < sbi->segs_per_sec; i++)
271+
for (i = 0; i < usable_segs_per_sec; i++)
271272
mtime += get_seg_entry(sbi, start + i)->mtime;
272273
vblocks = get_valid_blocks(sbi, segno, true);
273274

274-
mtime = div_u64(mtime, sbi->segs_per_sec);
275-
vblocks = div_u64(vblocks, sbi->segs_per_sec);
275+
mtime = div_u64(mtime, usable_segs_per_sec);
276+
vblocks = div_u64(vblocks, usable_segs_per_sec);
276277

277278
u = (vblocks * 100) >> sbi->log_blocks_per_seg;
278279

@@ -536,6 +537,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
536537
int phase = 0;
537538
bool fggc = (gc_type == FG_GC);
538539
int submitted = 0;
540+
unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
539541

540542
start_addr = START_BLOCK(sbi, segno);
541543

@@ -545,7 +547,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
545547
if (fggc && phase == 2)
546548
atomic_inc(&sbi->wb_sync_req[NODE]);
547549

548-
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
550+
for (off = 0; off < usable_blks_in_seg; off++, entry++) {
549551
nid_t nid = le32_to_cpu(entry->nid);
550552
struct page *node_page;
551553
struct node_info ni;
@@ -1033,13 +1035,14 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
10331035
int off;
10341036
int phase = 0;
10351037
int submitted = 0;
1038+
unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
10361039

10371040
start_addr = START_BLOCK(sbi, segno);
10381041

10391042
next_step:
10401043
entry = sum;
10411044

1042-
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
1045+
for (off = 0; off < usable_blks_in_seg; off++, entry++) {
10431046
struct page *data_page;
10441047
struct inode *inode;
10451048
struct node_info dni; /* dnode info for the data */
@@ -1204,6 +1207,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
12041207
if (__is_large_section(sbi))
12051208
end_segno = rounddown(end_segno, sbi->segs_per_sec);
12061209

1210+
/*
1211+
* zone-capacity can be less than zone-size in zoned devices,
1212+
* resulting in less than expected usable segments in the zone,
1213+
* calculate the end segno in the zone which can be garbage collected
1214+
*/
1215+
if (f2fs_sb_has_blkzoned(sbi))
1216+
end_segno -= sbi->segs_per_sec -
1217+
f2fs_usable_segs_in_sec(sbi, segno);
1218+
12071219
/* readahead multi ssa blocks those have contiguous address */
12081220
if (__is_large_section(sbi))
12091221
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1356,7 +1368,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
13561368
goto stop;
13571369

13581370
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
1359-
if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
1371+
if (gc_type == FG_GC &&
1372+
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
13601373
sec_freed++;
13611374
total_freed += seg_freed;
13621375

fs/f2fs/gc.h

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,49 @@ struct gc_inode_list {
4444
/*
4545
* inline functions
4646
*/
47+
48+
/*
49+
* On a Zoned device zone-capacity can be less than zone-size and if
50+
* zone-capacity is not aligned to f2fs segment size(2MB), then the segment
51+
* starting just before zone-capacity has some blocks spanning across the
52+
* zone-capacity, these blocks are not usable.
53+
* Such spanning segments can be in free list so calculate the sum of usable
54+
* blocks in currently free segments including normal and spanning segments.
55+
*/
56+
static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
57+
{
58+
block_t free_seg_blks = 0;
59+
struct free_segmap_info *free_i = FREE_I(sbi);
60+
int j;
61+
62+
spin_lock(&free_i->segmap_lock);
63+
for (j = 0; j < MAIN_SEGS(sbi); j++)
64+
if (!test_bit(j, free_i->free_segmap))
65+
free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
66+
spin_unlock(&free_i->segmap_lock);
67+
68+
return free_seg_blks;
69+
}
70+
71+
static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
72+
{
73+
if (f2fs_sb_has_blkzoned(sbi))
74+
return free_segs_blk_count_zoned(sbi);
75+
76+
return free_segments(sbi) << sbi->log_blocks_per_seg;
77+
}
78+
4779
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
4880
{
49-
if (free_segments(sbi) < overprovision_segments(sbi))
81+
block_t free_blks, ovp_blks;
82+
83+
free_blks = free_segs_blk_count(sbi);
84+
ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
85+
86+
if (free_blks < ovp_blks)
5087
return 0;
51-
else
52-
return (free_segments(sbi) - overprovision_segments(sbi))
53-
<< sbi->log_blocks_per_seg;
88+
89+
return free_blks - ovp_blks;
5490
}
5591

5692
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)

0 commit comments

Comments
 (0)