Skip to content
Permalink
Browse files
f2fs: support zone capacity less than zone size
NVMe Zoned Namespace devices can have zone-capacity less than zone-size.
Zone-capacity indicates the maximum number of sectors that are usable in
a zone beginning from the first sector of the zone. This makes the sectors
sectors after the zone-capacity till zone-size to be unusable.
This patch set tracks zone-size and zone-capacity in zoned devices and
calculate the usable blocks per segment and usable segments per section.

If zone-capacity is less than zone-size mark only those segments which
start before zone-capacity as free segments. All segments at and beyond
zone-capacity are treated as permanently used segments. In cases where
zone-capacity does not align with segment size the last segment will start
before zone-capacity and end beyond the zone-capacity of the zone. For
such spanning segments only sectors within the zone-capacity are used.

During writes and GC manage the usable segments in a section and usable
blocks per segment. Segments which are beyond zone-capacity are never
allocated, and do not need to be garbage collected, only the segments
which are before zone-capacity needs to garbage collected.
For spanning segments based on the number of usable blocks in that
segment, write to blocks only up to zone-capacity.

Zone-capacity is device specific and cannot be configured by the user.
Since NVMe ZNS device zones are sequentially write only, a block device
with conventional zones or any normal block device is needed along with
the ZNS device for the metadata operations of F2fs.

A typical nvme-cli output of a zoned device shows zone start and capacity
and write pointer as below:

SLBA: 0x0     WP: 0x0     Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ
SLBA: 0x20000 WP: 0x20000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ
SLBA: 0x40000 WP: 0x40000 Cap: 0x18800 State: EMPTY Type: SEQWRITE_REQ

Here zone size is 64MB, capacity is 49MB, WP is at zone start as the zones
are in EMPTY state. For each zone, only zone start + 49MB is usable area,
any lba/sector after 49MB cannot be read or written to, the drive will fail
any attempts to read/write. So, the second zone starts at 64MB and is
usable till 113MB (64 + 49) and the range between 113 and 128MB is
again unusable. The next zone starts at 128MB, and so on.

Signed-off-by: Aravind Ramesh <aravind.ramesh@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
  • Loading branch information
aravind-wdc authored and Jaegeuk Kim committed Aug 4, 2020
1 parent 828add7 commit 550451dc977aeb0000104eb9777fd1dca07e5efb
Show file tree
Hide file tree
Showing 7 changed files with 275 additions and 37 deletions.
@@ -766,3 +766,18 @@ Compress metadata layout::
+-------------+-------------+----------+----------------------------+
| data length | data chksum | reserved | compressed data |
+-------------+-------------+----------+----------------------------+

NVMe Zoned Namespace devices
----------------------------

- ZNS defines a per-zone capacity which can be equal or less than the
zone-size. Zone-capacity is the number of usable blocks in the zone.
F2fs checks if zone-capacity is less than zone-size, if it is, then any
segment which starts after the zone-capacity is marked as not-free in
the free segment bitmap at initial mount time. These segments are marked
as permanently used so they are not allocated for writes and
consequently are not needed to be garbage collected. In case the
zone-capacity is not aligned to default segment size(2MB), then a segment
can start before the zone-capacity and span across zone-capacity boundary.
Such spanning segments are also considered as usable segments. All blocks
past the zone-capacity are considered unusable in these segments.
@@ -1209,6 +1209,7 @@ struct f2fs_dev_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
#endif
};

@@ -3378,6 +3379,10 @@ void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp);
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int segno);
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno);

/*
* checkpoint.c
@@ -266,13 +266,14 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned char age = 0;
unsigned char u;
unsigned int i;
unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);

for (i = 0; i < sbi->segs_per_sec; i++)
for (i = 0; i < usable_segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, true);

mtime = div_u64(mtime, sbi->segs_per_sec);
vblocks = div_u64(vblocks, sbi->segs_per_sec);
mtime = div_u64(mtime, usable_segs_per_sec);
vblocks = div_u64(vblocks, usable_segs_per_sec);

u = (vblocks * 100) >> sbi->log_blocks_per_seg;

@@ -536,6 +537,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
int phase = 0;
bool fggc = (gc_type == FG_GC);
int submitted = 0;
unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);

start_addr = START_BLOCK(sbi, segno);

@@ -545,7 +547,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
if (fggc && phase == 2)
atomic_inc(&sbi->wb_sync_req[NODE]);

for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
for (off = 0; off < usable_blks_in_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
@@ -1033,13 +1035,14 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
int off;
int phase = 0;
int submitted = 0;
unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);

start_addr = START_BLOCK(sbi, segno);

next_step:
entry = sum;

for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
for (off = 0; off < usable_blks_in_seg; off++, entry++) {
struct page *data_page;
struct inode *inode;
struct node_info dni; /* dnode info for the data */
@@ -1204,6 +1207,15 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
if (__is_large_section(sbi))
end_segno = rounddown(end_segno, sbi->segs_per_sec);

/*
* zone-capacity can be less than zone-size in zoned devices,
* resulting in less than expected usable segments in the zone,
* calculate the end segno in the zone which can be garbage collected
*/
if (f2fs_sb_has_blkzoned(sbi))
end_segno -= sbi->segs_per_sec -
f2fs_usable_segs_in_sec(sbi, segno);

/* readahead multi ssa blocks those have contiguous address */
if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1356,7 +1368,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
goto stop;

seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
if (gc_type == FG_GC &&
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
total_freed += seg_freed;

@@ -44,13 +44,49 @@ struct gc_inode_list {
/*
* inline functions
*/

/*
* On a Zoned device zone-capacity can be less than zone-size and if
* zone-capacity is not aligned to f2fs segment size(2MB), then the segment
* starting just before zone-capacity has some blocks spanning across the
* zone-capacity, these blocks are not usable.
* Such spanning segments can be in free list so calculate the sum of usable
* blocks in currently free segments including normal and spanning segments.
*/
static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
{
block_t free_seg_blks = 0;
struct free_segmap_info *free_i = FREE_I(sbi);
int j;

spin_lock(&free_i->segmap_lock);
for (j = 0; j < MAIN_SEGS(sbi); j++)
if (!test_bit(j, free_i->free_segmap))
free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
spin_unlock(&free_i->segmap_lock);

return free_seg_blks;
}

static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
{
if (f2fs_sb_has_blkzoned(sbi))
return free_segs_blk_count_zoned(sbi);

return free_segments(sbi) << sbi->log_blocks_per_seg;
}

static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
{
if (free_segments(sbi) < overprovision_segments(sbi))
block_t free_blks, ovp_blks;

free_blks = free_segs_blk_count(sbi);
ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;

if (free_blks < ovp_blks)
return 0;
else
return (free_segments(sbi) - overprovision_segments(sbi))
<< sbi->log_blocks_per_seg;

return free_blks - ovp_blks;
}

static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)

0 comments on commit 550451d

Please sign in to comment.