Skip to content

Commit 8e71022

Browse files
Coly Liaxboe
authored andcommitted
bcache: make bch_btree_check() to be multithreaded
When registering a cache device, bch_btree_check() is called to check all btree nodes, to make sure the btree is consistent and not corrupted. bch_btree_check() is recursively executed in a single thread, when there are a lot of data cached and the btree is huge, it may take very long time to check all the btree nodes. In my testing, I observed it took around 50 minutes to finish bch_btree_check(). When checking the bcache btree nodes, the cache set is not running yet, and indeed the whole tree is in read-only state, it is safe to create multiple threads to check the btree in parallel. This patch tries to create multiple threads, and each thread tries to one-by-one check the sub-tree indexed by a key from the btree root node. The parallel thread number depends on how many keys in the btree root node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in practice is should be min(cpu-number/2, root-node-keys-number). Signed-off-by: Coly Li <colyli@suse.de> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent feac1a7 commit 8e71022

File tree

2 files changed

+188
-3
lines changed

2 files changed

+188
-3
lines changed

drivers/md/bcache/btree.c

Lines changed: 166 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
18971897
return ret;
18981898
}
18991899

1900+
1901+
static int bch_btree_check_thread(void *arg)
1902+
{
1903+
int ret;
1904+
struct btree_check_info *info = arg;
1905+
struct btree_check_state *check_state = info->state;
1906+
struct cache_set *c = check_state->c;
1907+
struct btree_iter iter;
1908+
struct bkey *k, *p;
1909+
int cur_idx, prev_idx, skip_nr;
1910+
int i, n;
1911+
1912+
k = p = NULL;
1913+
i = n = 0;
1914+
cur_idx = prev_idx = 0;
1915+
ret = 0;
1916+
1917+
/* root node keys are checked before thread created */
1918+
bch_btree_iter_init(&c->root->keys, &iter, NULL);
1919+
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
1920+
BUG_ON(!k);
1921+
1922+
p = k;
1923+
while (k) {
1924+
/*
1925+
* Fetch a root node key index, skip the keys which
1926+
* should be fetched by other threads, then check the
1927+
* sub-tree indexed by the fetched key.
1928+
*/
1929+
spin_lock(&check_state->idx_lock);
1930+
cur_idx = check_state->key_idx;
1931+
check_state->key_idx++;
1932+
spin_unlock(&check_state->idx_lock);
1933+
1934+
skip_nr = cur_idx - prev_idx;
1935+
1936+
while (skip_nr) {
1937+
k = bch_btree_iter_next_filter(&iter,
1938+
&c->root->keys,
1939+
bch_ptr_bad);
1940+
if (k)
1941+
p = k;
1942+
else {
1943+
/*
1944+
* No more keys to check in root node,
1945+
* current checking threads are enough,
1946+
* stop creating more.
1947+
*/
1948+
atomic_set(&check_state->enough, 1);
1949+
/* Update check_state->enough earlier */
1950+
smp_mb();
1951+
goto out;
1952+
}
1953+
skip_nr--;
1954+
cond_resched();
1955+
}
1956+
1957+
if (p) {
1958+
struct btree_op op;
1959+
1960+
btree_node_prefetch(c->root, p);
1961+
c->gc_stats.nodes++;
1962+
bch_btree_op_init(&op, 0);
1963+
ret = bcache_btree(check_recurse, p, c->root, &op);
1964+
if (ret)
1965+
goto out;
1966+
}
1967+
p = NULL;
1968+
prev_idx = cur_idx;
1969+
cond_resched();
1970+
}
1971+
1972+
out:
1973+
info->result = ret;
1974+
/* update check_state->started among all CPUs */
1975+
smp_mb();
1976+
if (atomic_dec_and_test(&check_state->started))
1977+
wake_up(&check_state->wait);
1978+
1979+
return ret;
1980+
}
1981+
1982+
1983+
1984+
static int bch_btree_chkthread_nr(void)
1985+
{
1986+
int n = num_online_cpus()/2;
1987+
1988+
if (n == 0)
1989+
n = 1;
1990+
else if (n > BCH_BTR_CHKTHREAD_MAX)
1991+
n = BCH_BTR_CHKTHREAD_MAX;
1992+
1993+
return n;
1994+
}
1995+
19001996
int bch_btree_check(struct cache_set *c)
19011997
{
1902-
struct btree_op op;
1998+
int ret = 0;
1999+
int i;
2000+
struct bkey *k = NULL;
2001+
struct btree_iter iter;
2002+
struct btree_check_state *check_state;
2003+
char name[32];
19032004

1904-
bch_btree_op_init(&op, SHRT_MAX);
2005+
/* check and mark root node keys */
2006+
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
2007+
bch_initial_mark_key(c, c->root->level, k);
2008+
2009+
bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
2010+
2011+
if (c->root->level == 0)
2012+
return 0;
2013+
2014+
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
2015+
if (!check_state)
2016+
return -ENOMEM;
2017+
2018+
check_state->c = c;
2019+
check_state->total_threads = bch_btree_chkthread_nr();
2020+
check_state->key_idx = 0;
2021+
spin_lock_init(&check_state->idx_lock);
2022+
atomic_set(&check_state->started, 0);
2023+
atomic_set(&check_state->enough, 0);
2024+
init_waitqueue_head(&check_state->wait);
19052025

1906-
return bcache_btree_root(check_recurse, c, &op);
2026+
/*
2027+
* Run multiple threads to check btree nodes in parallel,
2028+
* if check_state->enough is non-zero, it means current
2029+
* running check threads are enough, unncessary to create
2030+
* more.
2031+
*/
2032+
for (i = 0; i < check_state->total_threads; i++) {
2033+
/* fetch latest check_state->enough earlier */
2034+
smp_mb();
2035+
if (atomic_read(&check_state->enough))
2036+
break;
2037+
2038+
check_state->infos[i].result = 0;
2039+
check_state->infos[i].state = check_state;
2040+
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
2041+
atomic_inc(&check_state->started);
2042+
2043+
check_state->infos[i].thread =
2044+
kthread_run(bch_btree_check_thread,
2045+
&check_state->infos[i],
2046+
name);
2047+
if (IS_ERR(check_state->infos[i].thread)) {
2048+
pr_err("fails to run thread bch_btrchk[%d]", i);
2049+
for (--i; i >= 0; i--)
2050+
kthread_stop(check_state->infos[i].thread);
2051+
ret = -ENOMEM;
2052+
goto out;
2053+
}
2054+
}
2055+
2056+
wait_event_interruptible(check_state->wait,
2057+
atomic_read(&check_state->started) == 0 ||
2058+
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
2059+
2060+
for (i = 0; i < check_state->total_threads; i++) {
2061+
if (check_state->infos[i].result) {
2062+
ret = check_state->infos[i].result;
2063+
goto out;
2064+
}
2065+
}
2066+
2067+
out:
2068+
kfree(check_state);
2069+
return ret;
19072070
}
19082071

19092072
void bch_initial_gc_finish(struct cache_set *c)

drivers/md/bcache/btree.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,9 @@ struct btree {
145145
struct bio *bio;
146146
};
147147

148+
149+
150+
148151
#define BTREE_FLAG(flag) \
149152
static inline bool btree_node_ ## flag(struct btree *b) \
150153
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
@@ -216,6 +219,25 @@ struct btree_op {
216219
unsigned int insert_collision:1;
217220
};
218221

222+
struct btree_check_state;
223+
struct btree_check_info {
224+
struct btree_check_state *state;
225+
struct task_struct *thread;
226+
int result;
227+
};
228+
229+
#define BCH_BTR_CHKTHREAD_MAX 64
230+
struct btree_check_state {
231+
struct cache_set *c;
232+
int total_threads;
233+
int key_idx;
234+
spinlock_t idx_lock;
235+
atomic_t started;
236+
atomic_t enough;
237+
wait_queue_head_t wait;
238+
struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
239+
};
240+
219241
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
220242
{
221243
memset(op, 0, sizeof(struct btree_op));

0 commit comments

Comments
 (0)