Skip to content

Commit

Permalink
Merge tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/ker…
Browse files Browse the repository at this point in the history
…nel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - several fixes for handling directory item (inserting, removing,
   iteration, error handling)

 - fix transaction commit stalls when auto relocation is running and
   blocks other tasks that want to commit

 - fix a build error when DEBUG is enabled

 - fix lockdep warning in inode number lookup ioctl

 - fix race when finishing block group creation

 - remove link to obsolete wiki in several files

* tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org
  btrfs: assert delayed node locked when removing delayed item
  btrfs: remove BUG() after failure to insert delayed dir index item
  btrfs: improve error message after failure to add delayed dir index item
  btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio
  btrfs: check for BTRFS_FS_ERROR in pending ordered assert
  btrfs: fix lockdep splat and potential deadlock after failure running delayed items
  btrfs: do not block starts waiting on previous transaction commit
  btrfs: release path before inode lookup during the ino lookup ioctl
  btrfs: fix race between finishing block group creation and its item update
  • Loading branch information
torvalds committed Sep 12, 2023
2 parents 2c758ce + 5facccc commit 3669558
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 66 deletions.
1 change: 0 additions & 1 deletion Documentation/filesystems/btrfs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki

https://btrfs.readthedocs.io

https://btrfs.wiki.kernel.org

that maintains information about administration tasks, frequently asked
questions, use cases, mount options, comprehensible changelogs, features,
Expand Down
1 change: 0 additions & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org
S: Maintained
W: https://btrfs.readthedocs.io
W: https://btrfs.wiki.kernel.org/
Q: https://patchwork.kernel.org/project/linux-btrfs/list/
C: irc://irc.libera.chat/btrfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ config BTRFS_FS
continue to be mountable and usable by newer kernels.

For more information, please see the web pages at
http://btrfs.wiki.kernel.org.
https://btrfs.readthedocs.io

To compile this file system support as a module, choose M here. The
module will be called btrfs.
Expand Down
12 changes: 10 additions & 2 deletions fs/btrfs/block-group.c
Original file line number Diff line number Diff line change
Expand Up @@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_release_path(path);
/* We didn't update the block group item, need to revert @commit_used. */
if (ret < 0) {
/*
* We didn't update the block group item, need to revert commit_used
* unless the block group item didn't exist yet - this is to prevent a
* race with a concurrent insertion of the block group item, with
* insert_block_group_item(), that happened just after we attempted to
* update. In that case we would reset commit_used to 0 just after the
* insertion set it to a value greater than 0 - if the block group later
* becomes with 0 used bytes, we would incorrectly skip its update.
*/
if (ret < 0 && ret != -ENOENT) {
spin_lock(&cache->lock);
cache->commit_used = old_commit_used;
spin_unlock(&cache->lock);
Expand Down
104 changes: 71 additions & 33 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,25 +412,29 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)

static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root;

/* Not inserted, ignore it. */
if (RB_EMPTY_NODE(&delayed_item->rb_node))
return;

delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
/* If it's in a rbtree, then we need to have delayed node locked. */
lockdep_assert_held(&delayed_node->mutex);

delayed_root = delayed_node->root->fs_info->delayed_root;

BUG_ON(!delayed_root);

if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_item->delayed_node->ins_root;
root = &delayed_node->ins_root;
else
root = &delayed_item->delayed_node->del_root;
root = &delayed_node->del_root;

rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node);
delayed_item->delayed_node->count--;
delayed_node->count--;

finish_one_item(delayed_root);
}
Expand Down Expand Up @@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
btrfs_abort_transaction(trans, ret);
break;
}

prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
/*
* See the comment below about releasing path before releasing
* node. If the commit of delayed items was successful the path
* should always be released, but in case of an error, it may
* point to locked extent buffers (a leaf at the very least).
*/
ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
}

/*
* Release the path to avoid a potential deadlock and lockdep splat when
* releasing the delayed node, as that requires taking the delayed node's
* mutex. If another task starts running delayed items before we take
* the mutex, it will first lock the mutex and then it may try to lock
* the same btree path (leaf).
*/
btrfs_free_path(path);

if (curr_node)
btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;

return ret;
Expand Down Expand Up @@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
}

/* Will return 0 or -ENOMEM */
static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return;

/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}

/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len,
struct btrfs_inode *dir,
Expand Down Expand Up @@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,

mutex_lock(&delayed_node->mutex);

/*
* First attempt to insert the delayed item. This is to make the error
* handling path simpler in case we fail (-EEXIST). There's no risk of
* any other task coming in and running the delayed item before we do
* the metadata space reservation below, because we are holding the
* delayed node's mutex and that mutex must also be locked before the
* node's delayed items can be run.
*/
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
name_len, name, index, btrfs_root_id(delayed_node->root),
delayed_node->inode_id, dir->index_cnt,
delayed_node->index_cnt, ret);
btrfs_release_delayed_item(delayed_item);
btrfs_release_dir_index_item_space(trans);
mutex_unlock(&delayed_node->mutex);
goto release_node;
}

if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len;
Expand All @@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible.
*/
if (WARN_ON(ret)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item);
mutex_unlock(&delayed_node->mutex);
goto release_node;
}

delayed_node->index_item_leaves++;
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) {
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);

/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}

ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->root_key.objectid,
delayed_node->inode_id, ret);
BUG();
} else {
btrfs_release_dir_index_item_space(trans);
}
mutex_unlock(&delayed_node->mutex);

Expand Down
22 changes: 12 additions & 10 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio)
{
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage;
struct extent_buffer *eb;
int cur_bit = 0;
Expand All @@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb);
return filemap_dirty_folio(mapping, folio);
}

ASSERT(spi);
subpage = folio_get_private(folio);

ASSERT(subpage->dirty_bitmap);
while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
for (cur_bit = spi->dirty_offset;
cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
cur_bit++) {
unsigned long flags;
u64 cur;
u16 tmp = (1 << cur_bit);

spin_lock_irqsave(&subpage->lock, flags);
if (!(tmp & subpage->dirty_bitmap)) {
if (!test_bit(cur_bit, subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags);
cur_bit++;
continue;
}
spin_unlock_irqrestore(&subpage->lock, flags);
Expand All @@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb);
free_extent_buffer(eb);

cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
}
return filemap_dirty_folio(mapping, folio);
}
Expand Down Expand Up @@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg)

delta = ktime_get_seconds() - cur->start_time;
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
cur->state < TRANS_STATE_COMMIT_START &&
cur->state < TRANS_STATE_COMMIT_PREP &&
delta < fs_info->commit_interval) {
spin_unlock(&fs_info->trans_lock);
delay -= msecs_to_jiffies((delta - 1) * 1000);
Expand Down Expand Up @@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
BTRFS_LOCKDEP_TRANS_COMMIT_START);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
Expand Down Expand Up @@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
while (!list_empty(&fs_info->trans_list)) {
t = list_first_entry(&fs_info->trans_list,
struct btrfs_transaction, list);
if (t->state >= TRANS_STATE_COMMIT_START) {
if (t->state >= TRANS_STATE_COMMIT_PREP) {
refcount_inc(&t->use_count);
spin_unlock(&fs_info->trans_lock);
btrfs_wait_for_commit(fs_info, t->transid);
Expand Down
8 changes: 7 additions & 1 deletion fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put;
}

/*
* We don't need the path anymore, so release it and
* avoid deadlocks and lockdep warnings in case
* btrfs_iget() needs to lookup the inode from its root
* btree and lock the same leaf.
*/
btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
Expand All @@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put;
}

btrfs_release_path(path);
key.objectid = key.offset;
key.offset = (u64)-1;
dirid = key.objectid;
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/locking.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
};

enum btrfs_lockdep_trans_states {
BTRFS_LOCKDEP_TRANS_COMMIT_START,
BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
BTRFS_LOCKDEP_TRANS_COMPLETED,
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/ordered-data.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);

ASSERT(trans);
ASSERT(trans || BTRFS_FS_ERROR(fs_info));
if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait);
Expand Down

0 comments on commit 3669558

Please sign in to comment.