Skip to content

Commit ece5cff

Browse files
committed
md: Support write-intent bitmaps with externally managed metadata.
In this case, the metadata needs to not be in the same sector as the bitmap. md will not read/write any bitmap metadata. Config must be done via sysfs and when a recovery makes the array non-degraded again, writing 'true' to 'bitmap/can_clear' will allow bits in the bitmap to be cleared again. Signed-off-by: NeilBrown <neilb@suse.de>
1 parent 624ce4f commit ece5cff

File tree

4 files changed

+137
-33
lines changed

4 files changed

+137
-33
lines changed

Documentation/md.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,22 @@ All md devices contain:
322322
'backlog' sets a limit on the number of concurrent background
323323
writes. If there are more than this, new writes will by
324324
synchronous.
325+
bitmap/metadata
326+
This can be either 'internal' or 'external'.
327+
'internal' is the default and means the metadata for the bitmap
328+
is stored in the first 256 bytes of the allocated space and is
329+
managed by the md module.
330+
'external' means that bitmap metadata is managed externally to
331+
the kernel (i.e. by some userspace program)
332+
bitmap/can_clear
333+
This is either 'true' or 'false'. If 'true', then bits in the
334+
bitmap will be cleared when the corresponding blocks are thought
335+
to be in-sync. If 'false', bits will never be cleared.
336+
This is automatically set to 'false' if a write happens on a
337+
degraded array, or if the array becomes degraded during a write.
338+
When metadata is managed externally, it should be set to true
339+
once the array becomes non-degraded, and this fact has been
340+
recorded in the metadata.
325341

326342

327343

drivers/md/bitmap.c

Lines changed: 119 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
497497

498498
if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
499499
return;
500+
if (bitmap->mddev->bitmap_info.external)
501+
return;
500502
spin_lock_irqsave(&bitmap->lock, flags);
501503
if (!bitmap->sb_page) { /* no superblock */
502504
spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
676678
* general bitmap file operations
677679
*/
678680

681+
/*
682+
* on-disk bitmap:
683+
*
684+
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
685+
* file a page at a time. There's a superblock at the start of the file.
686+
*/
679687
/* calculate the index of the page that contains this bit */
680-
static inline unsigned long file_page_index(unsigned long chunk)
688+
static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
681689
{
682-
return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
690+
if (!bitmap->mddev->bitmap_info.external)
691+
chunk += sizeof(bitmap_super_t) << 3;
692+
return chunk >> PAGE_BIT_SHIFT;
683693
}
684694

685695
/* calculate the (bit) offset of this bit within a page */
686-
static inline unsigned long file_page_offset(unsigned long chunk)
696+
static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
687697
{
688-
return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
698+
if (!bitmap->mddev->bitmap_info.external)
699+
chunk += sizeof(bitmap_super_t) << 3;
700+
return chunk & (PAGE_BITS - 1);
689701
}
690702

691703
/*
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
698710
static inline struct page *filemap_get_page(struct bitmap *bitmap,
699711
unsigned long chunk)
700712
{
701-
if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
702-
return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
713+
if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
714+
return bitmap->filemap[file_page_index(bitmap, chunk)
715+
- file_page_index(bitmap, 0)];
703716
}
704717

705718

@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
722735
spin_unlock_irqrestore(&bitmap->lock, flags);
723736

724737
while (pages--)
725-
if (map[pages]->index != 0) /* 0 is sb_page, release it below */
738+
if (map[pages] != sb_page) /* 0 is sb_page, release it below */
726739
free_buffers(map[pages]);
727740
kfree(map);
728741
kfree(attr);
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
833846

834847
page = filemap_get_page(bitmap, chunk);
835848
if (!page) return;
836-
bit = file_page_offset(chunk);
849+
bit = file_page_offset(bitmap, chunk);
837850

838851
/* set the bit */
839852
kaddr = kmap_atomic(page, KM_USER0);
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
931944
"recovery\n", bmname(bitmap));
932945

933946
bytes = (chunks + 7) / 8;
947+
if (!bitmap->mddev->bitmap_info.external)
948+
bytes += sizeof(bitmap_super_t);
934949

935-
num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
950+
951+
num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
936952

937-
if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
953+
if (file && i_size_read(file->f_mapping->host) < bytes) {
938954
printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
939955
bmname(bitmap),
940956
(unsigned long) i_size_read(file->f_mapping->host),
941-
bytes + sizeof(bitmap_super_t));
957+
bytes);
942958
goto err;
943959
}
944960

@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
959975

960976
for (i = 0; i < chunks; i++) {
961977
int b;
962-
index = file_page_index(i);
963-
bit = file_page_offset(i);
978+
index = file_page_index(bitmap, i);
979+
bit = file_page_offset(bitmap, i);
964980
if (index != oldindex) { /* this is a new page, read it in */
965981
int count;
966982
/* unmap the old page, we're done with it */
967983
if (index == num_pages-1)
968-
count = bytes + sizeof(bitmap_super_t)
969-
- index * PAGE_SIZE;
984+
count = bytes - index * PAGE_SIZE;
970985
else
971986
count = PAGE_SIZE;
972-
if (index == 0) {
987+
if (index == 0 && bitmap->sb_page) {
973988
/*
974989
* if we're here then the superblock page
975990
* contains some bits (PAGE_SIZE != sizeof sb)
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
11641179
/* We are possibly going to clear some bits, so make
11651180
* sure that events_cleared is up-to-date.
11661181
*/
1167-
if (bitmap->need_sync) {
1182+
if (bitmap->need_sync &&
1183+
bitmap->mddev->bitmap_info.external == 0) {
11681184
bitmap_super_t *sb;
11691185
bitmap->need_sync = 0;
11701186
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
11741190
write_page(bitmap, bitmap->sb_page, 1);
11751191
}
11761192
spin_lock_irqsave(&bitmap->lock, flags);
1177-
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1193+
if (!bitmap->need_sync)
1194+
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
11781195
}
11791196
bmc = bitmap_get_counter(bitmap,
11801197
(sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
11891206
if (*bmc == 2) {
11901207
*bmc=1; /* maybe clear the bit next time */
11911208
set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1192-
} else if (*bmc == 1) {
1209+
} else if (*bmc == 1 && !bitmap->need_sync) {
11931210
/* we can clear the bit */
11941211
*bmc = 0;
11951212
bitmap_count_page(bitmap,
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
11991216
/* clear the bit */
12001217
paddr = kmap_atomic(page, KM_USER0);
12011218
if (bitmap->flags & BITMAP_HOSTENDIAN)
1202-
clear_bit(file_page_offset(j), paddr);
1219+
clear_bit(file_page_offset(bitmap, j),
1220+
paddr);
12031221
else
1204-
ext2_clear_bit(file_page_offset(j), paddr);
1222+
ext2_clear_bit(file_page_offset(bitmap, j),
1223+
paddr);
12051224
kunmap_atomic(paddr, KM_USER0);
12061225
}
12071226
} else
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
13561375
bitmap->events_cleared < bitmap->mddev->events) {
13571376
bitmap->events_cleared = bitmap->mddev->events;
13581377
bitmap->need_sync = 1;
1378+
sysfs_notify_dirent(bitmap->sysfs_can_clear);
13591379
}
13601380

13611381
if (!success && ! (*bmc & NEEDED_MASK))
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
16131633
if (mddev->thread)
16141634
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
16151635

1636+
if (bitmap->sysfs_can_clear)
1637+
sysfs_put(bitmap->sysfs_can_clear);
1638+
16161639
bitmap_free(bitmap);
16171640
}
16181641

@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
16291652
struct file *file = mddev->bitmap_info.file;
16301653
int err;
16311654
sector_t start;
1655+
struct sysfs_dirent *bm;
16321656

16331657
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
16341658

@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
16481672

16491673
bitmap->mddev = mddev;
16501674

1675+
bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1676+
if (bm) {
1677+
bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1678+
sysfs_put(bm);
1679+
} else
1680+
bitmap->sysfs_can_clear = NULL;
1681+
16511682
bitmap->file = file;
16521683
if (file) {
16531684
get_file(file);
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
16581689
vfs_fsync(file, file->f_dentry, 1);
16591690
}
16601691
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1661-
err = bitmap_read_sb(bitmap);
1692+
if (!mddev->bitmap_info.external)
1693+
err = bitmap_read_sb(bitmap);
1694+
else {
1695+
err = 0;
1696+
if (mddev->bitmap_info.chunksize == 0 ||
1697+
mddev->bitmap_info.daemon_sleep == 0)
1698+
/* chunksize and time_base need to be
1699+
* set first. */
1700+
err = -EINVAL;
1701+
}
16621702
if (err)
16631703
goto error;
16641704

@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
17771817
return rv;
17781818
if (offset == 0)
17791819
return -EINVAL;
1780-
if (mddev->major_version == 0 &&
1820+
if (mddev->bitmap_info.external == 0 &&
1821+
mddev->major_version == 0 &&
17811822
offset != mddev->bitmap_info.default_offset)
17821823
return -EINVAL;
17831824
mddev->bitmap_info.offset = offset;
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
19061947
static struct md_sysfs_entry bitmap_chunksize =
19071948
__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
19081949

1950+
static ssize_t metadata_show(mddev_t *mddev, char *page)
1951+
{
1952+
return sprintf(page, "%s\n", (mddev->bitmap_info.external
1953+
? "external" : "internal"));
1954+
}
1955+
1956+
static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
1957+
{
1958+
if (mddev->bitmap ||
1959+
mddev->bitmap_info.file ||
1960+
mddev->bitmap_info.offset)
1961+
return -EBUSY;
1962+
if (strncmp(buf, "external", 8) == 0)
1963+
mddev->bitmap_info.external = 1;
1964+
else if (strncmp(buf, "internal", 8) == 0)
1965+
mddev->bitmap_info.external = 0;
1966+
else
1967+
return -EINVAL;
1968+
return len;
1969+
}
1970+
1971+
static struct md_sysfs_entry bitmap_metadata =
1972+
__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
1973+
1974+
static ssize_t can_clear_show(mddev_t *mddev, char *page)
1975+
{
1976+
int len;
1977+
if (mddev->bitmap)
1978+
len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
1979+
"false" : "true"));
1980+
else
1981+
len = sprintf(page, "\n");
1982+
return len;
1983+
}
1984+
1985+
static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
1986+
{
1987+
if (mddev->bitmap == NULL)
1988+
return -ENOENT;
1989+
if (strncmp(buf, "false", 5) == 0)
1990+
mddev->bitmap->need_sync = 1;
1991+
else if (strncmp(buf, "true", 4) == 0) {
1992+
if (mddev->degraded)
1993+
return -EBUSY;
1994+
mddev->bitmap->need_sync = 0;
1995+
} else
1996+
return -EINVAL;
1997+
return len;
1998+
}
1999+
2000+
static struct md_sysfs_entry bitmap_can_clear =
2001+
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2002+
19092003
static struct attribute *md_bitmap_attrs[] = {
19102004
&bitmap_location.attr,
19112005
&bitmap_timeout.attr,
19122006
&bitmap_backlog.attr,
19132007
&bitmap_chunksize.attr,
2008+
&bitmap_metadata.attr,
2009+
&bitmap_can_clear.attr,
19142010
NULL
19152011
};
19162012
struct attribute_group md_bitmap_group = {

drivers/md/bitmap.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
118118
(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
119119
#define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
120120

121-
/*
122-
* on-disk bitmap:
123-
*
124-
* Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
125-
* file a page at a time. There's a superblock at the start of the file.
126-
*/
127-
128-
/* map chunks (bits) to file pages - offset by the size of the superblock */
129-
#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
130-
131121
#endif
132122

133123
/*
@@ -250,6 +240,7 @@ struct bitmap {
250240
wait_queue_head_t write_wait;
251241
wait_queue_head_t overflow_wait;
252242

243+
struct sysfs_dirent *sysfs_can_clear;
253244
};
254245

255246
/* the bitmap API */

drivers/md/md.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ struct mddev_s
296296
unsigned long chunksize;
297297
unsigned long daemon_sleep; /* how many seconds between updates? */
298298
unsigned long max_write_behind; /* write-behind mode */
299+
int external;
299300
} bitmap_info;
300301

301302
struct list_head all_mddevs;

0 commit comments

Comments
 (0)