Skip to content

Commit 761d7ac

Browse files
committed
Allow zfs_purgedir() to skip inodes undergoing eviction
When destroying a file which contains xattrs the xattr directory inode and its child inodes may be acquired with zfs_zget(). This can result in a deadlock if these inodes are part of the same disposal list. This is only possible in zfs_purgedir() because it is called from evict() while processing this disposal list. Prevent this deadlock by allowing zfs_zget() to fail in zfs_purgedir(). The object will be left in the unlinked set and processing of it will be deferred via the existing mechanisms. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue openzfs#4816
1 parent 5c27b29 commit 761d7ac

File tree

3 files changed

+31
-21
lines changed

3 files changed

+31
-21
lines changed

include/sys/zfs_znode.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ extern void zfs_znode_init(void);
305305
extern void zfs_znode_fini(void);
306306
extern int zfs_znode_hold_compare(const void *, const void *);
307307
extern int zfs_zget(zfs_sb_t *, uint64_t, znode_t **);
308+
extern int zfs_zget_retry(zfs_sb_t *, uint64_t, znode_t **, boolean_t);
308309
extern int zfs_rezget(znode_t *);
309310
extern void zfs_zinactive(znode_t *);
310311
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);

module/zfs/zfs_dir.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ zfs_unlinked_drain(zfs_sb_t *zsb)
518518
* We need to re-mark these list entries for deletion,
519519
* so we pull them back into core and set zp->z_unlinked.
520520
*/
521-
error = zfs_zget(zsb, zap.za_first_integer, &zp);
521+
error = zfs_zget_retry(zsb, zap.za_first_integer, &zp, B_FALSE);
522522

523523
/*
524524
* We may pick up znodes that are already marked for deletion.
@@ -561,8 +561,8 @@ zfs_purgedir(znode_t *dzp)
561561
for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
562562
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
563563
zap_cursor_advance(&zc)) {
564-
error = zfs_zget(zsb,
565-
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
564+
error = zfs_zget_retry(zsb,
565+
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp, B_FALSE);
566566
if (error) {
567567
skipped += 1;
568568
continue;

module/zfs/zfs_znode.c

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,8 +1061,8 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
10611061
}
10621062
}
10631063

1064-
int
1065-
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
1064+
static int
1065+
zfs_zget_impl(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp, boolean_t retry)
10661066
{
10671067
dmu_object_info_t doi;
10681068
dmu_buf_t *db;
@@ -1116,28 +1116,25 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
11161116
* called iput_final() to start the eviction process.
11171117
* The SA handle is still valid but because the VFS
11181118
* requires that the eviction succeed we must drop
1119-
* our locks and references to allow the eviction to
1120-
* complete. The zfs_zget() may then be retried.
1121-
*
1122-
* This unlikely case could be optimized by registering
1123-
* a sops->drop_inode() callback. The callback would
1124-
* need to detect the active SA hold thereby informing
1125-
* the VFS that this inode should not be evicted.
1119+
* our locks and references and attempt to allow the
1120+
* eviction to complete.
11261121
*/
1127-
if (igrab(ZTOI(zp)) == NULL) {
1128-
mutex_exit(&zp->z_lock);
1129-
sa_buf_rele(db, NULL);
1130-
zfs_znode_hold_exit(zsb, zh);
1131-
/* inode might need this to finish evict */
1132-
cond_resched();
1133-
goto again;
1122+
if (igrab(ZTOI(zp)) != NULL) {
1123+
*zpp = zp;
1124+
err = 0;
1125+
} else {
1126+
err = SET_ERROR(EAGAIN);
11341127
}
1135-
*zpp = zp;
1136-
err = 0;
11371128
}
11381129
mutex_exit(&zp->z_lock);
11391130
sa_buf_rele(db, NULL);
11401131
zfs_znode_hold_exit(zsb, zh);
1132+
1133+
if (err == EAGAIN && retry == B_TRUE) {
1134+
cond_resched();
1135+
goto again;
1136+
}
1137+
11411138
return (err);
11421139
}
11431140

@@ -1162,6 +1159,18 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
11621159
return (err);
11631160
}
11641161

1162+
int
1163+
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
1164+
{
1165+
return (zfs_zget_impl(zsb, obj_num, zpp, B_TRUE));
1166+
}
1167+
1168+
int
1169+
zfs_zget_retry(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp, boolean_t retry)
1170+
{
1171+
return (zfs_zget_impl(zsb, obj_num, zpp, retry));
1172+
}
1173+
11651174
int
11661175
zfs_rezget(znode_t *zp)
11671176
{

0 commit comments

Comments
 (0)