Skip to content

Commit f859e71

Browse files
author
Dan McDonald
committed
14424 tmpfs can be induced to deadlock
Reviewed by: Robert Mustacchi <rm@fingolfin.org> Reviewed by: Andy Fiddaman <andy@omnios.org> Reviewed by: Mike Zeller <mike.zeller@joyent.com> Approved by: Robert Mustacchi <rm@fingolfin.org>
1 parent d278f1c commit f859e71

File tree

1 file changed

+64
-2
lines changed

1 file changed

+64
-2
lines changed

Diff for: usr/src/uts/common/fs/tmpfs/tmp_dir.c

+64-2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,11 @@ static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
5555
#define T_HASH_SIZE 8192 /* must be power of 2 */
5656
#define T_MUTEX_SIZE 64
5757

58+
/* Non-static so compilers won't constant-fold these away. */
59+
clock_t tmpfs_rename_backoff_delay = 1;
60+
unsigned int tmpfs_rename_backoff_tries = 0;
61+
unsigned long tmpfs_rename_loops = 0;
62+
5863
static struct tdirent *t_hashtable[T_HASH_SIZE];
5964
static kmutex_t t_hashmutex[T_MUTEX_SIZE];
6065

@@ -267,8 +272,65 @@ tdirenter(
267272
* to see if it has been removed while it was unlocked.
268273
*/
269274
if (op == DE_LINK || op == DE_RENAME) {
270-
if (tp != dir)
271-
rw_enter(&tp->tn_rwlock, RW_WRITER);
275+
if (tp != dir) {
276+
unsigned int tries = 0;
277+
278+
/*
279+
* If we are acquiring tp->tn_rwlock (for SOURCE)
280+
* inside here, we must consider the following:
281+
*
282+
* - dir->tn_rwlock (TARGET) is already HELD (see
283+
* above ASSERT()).
284+
*
285+
* - It is possible our SOURCE is a parent of our
286+
* TARGET. Yes it's unusual, but it will return an
287+
* error below via tdircheckpath().
288+
*
289+
* - It is also possible that another thread,
290+
* concurrent to this one, is performing
291+
* rmdir(TARGET), which means it will first acquire
292+
* SOURCE's lock, THEN acquire TARGET's lock, which
293+
* could result in this thread holding TARGET and
294+
* trying for SOURCE, but the other thread holding
295+
* SOURCE and trying for TARGET. This is deadlock,
296+
* and it's inducible.
297+
*
298+
* To prevent this, we borrow some techniques from UFS
299+
* and rw_tryenter(), delaying if we fail, and
300+
* if someone tweaks the number of backoff tries to be
301+
* nonzero, return EBUSY after that number of tries.
302+
*/
303+
while (!rw_tryenter(&tp->tn_rwlock, RW_WRITER)) {
304+
/*
305+
* Sloppy, but this is a diagnostic so atomic
306+
* increment would be overkill.
307+
*/
308+
tmpfs_rename_loops++;
309+
310+
if (tmpfs_rename_backoff_tries != 0) {
311+
if (tries > tmpfs_rename_backoff_tries)
312+
return (EBUSY);
313+
tries++;
314+
}
315+
/*
316+
* NOTE: We're still holding dir->tn_rwlock,
317+
* so drop it over the delay, so any other
318+
* thread can get its business done.
319+
*
320+
* No state change or state inspection happens
321+
* prior to here, so it is not wholly dangerous
322+
* to release-and-reacquire dir->tn_rwlock.
323+
*
324+
* Hold the vnode of dir in case it gets
325+
* released by another thread, though.
326+
*/
327+
VN_HOLD(TNTOV(dir));
328+
rw_exit(&dir->tn_rwlock);
329+
delay(tmpfs_rename_backoff_delay);
330+
rw_enter(&dir->tn_rwlock, RW_WRITER);
331+
VN_RELE(TNTOV(dir));
332+
}
333+
}
272334
mutex_enter(&tp->tn_tlock);
273335
if (tp->tn_nlink == 0) {
274336
mutex_exit(&tp->tn_tlock);

0 commit comments

Comments
 (0)