@@ -55,6 +55,11 @@ static int tdiraddentry(struct tmpnode *, struct tmpnode *, char *,
5555#define T_HASH_SIZE 8192 /* must be power of 2 */
5656#define T_MUTEX_SIZE 64
5757
58+ /* Non-static so compilers won't constant-fold these away. */
59+ clock_t tmpfs_rename_backoff_delay = 1 ;
60+ unsigned int tmpfs_rename_backoff_tries = 0 ;
61+ unsigned long tmpfs_rename_loops = 0 ;
62+
5863static struct tdirent * t_hashtable [T_HASH_SIZE ];
5964static kmutex_t t_hashmutex [T_MUTEX_SIZE ];
6065
@@ -267,8 +272,65 @@ tdirenter(
267272 * to see if it has been removed while it was unlocked.
268273 */
269274 if (op == DE_LINK || op == DE_RENAME ) {
270- if (tp != dir )
271- rw_enter (& tp -> tn_rwlock , RW_WRITER );
275+ if (tp != dir ) {
276+ unsigned int tries = 0 ;
277+
278+ /*
279+ * If we are acquiring tp->tn_rwlock (for SOURCE)
280+ * inside here, we must consider the following:
281+ *
282+ * - dir->tn_rwlock (TARGET) is already HELD (see
283+ * above ASSERT()).
284+ *
285+ * - It is possible our SOURCE is a parent of our
286+ * TARGET. Yes it's unusual, but it will return an
287+ * error below via tdircheckpath().
288+ *
289+ * - It is also possible that another thread,
290+ * concurrent to this one, is performing
291+ * rmdir(TARGET), which means it will first acquire
292+ * SOURCE's lock, THEN acquire TARGET's lock, which
293+ * could result in this thread holding TARGET and
294+ * trying for SOURCE, but the other thread holding
295+ * SOURCE and trying for TARGET. This is deadlock,
296+ * and it's inducible.
297+ *
298+ * To prevent this, we borrow some techniques from UFS
299+ * and rw_tryenter(), delaying if we fail, and
300+ * if someone tweaks the number of backoff tries to be
301+ * nonzero, return EBUSY after that number of tries.
302+ */
303+ while (!rw_tryenter (& tp -> tn_rwlock , RW_WRITER )) {
304+ /*
305+ * Sloppy, but this is a diagnostic so atomic
306+ * increment would be overkill.
307+ */
308+ tmpfs_rename_loops ++ ;
309+
310+ if (tmpfs_rename_backoff_tries != 0 ) {
311+ if (tries > tmpfs_rename_backoff_tries )
312+ return (EBUSY );
313+ tries ++ ;
314+ }
315+ /*
316+ * NOTE: We're still holding dir->tn_rwlock,
317+ * so drop it over the delay, so any other
318+ * thread can get its business done.
319+ *
320+ * No state change or state inspection happens
321+ * prior to here, so it is not wholly dangerous
322+ * to release-and-reacquire dir->tn_rwlock.
323+ *
324+ * Hold the vnode of dir in case it gets
325+ * released by another thread, though.
326+ */
327+ VN_HOLD (TNTOV (dir ));
328+ rw_exit (& dir -> tn_rwlock );
329+ delay (tmpfs_rename_backoff_delay );
330+ rw_enter (& dir -> tn_rwlock , RW_WRITER );
331+ VN_RELE (TNTOV (dir ));
332+ }
333+ }
272334 mutex_enter (& tp -> tn_tlock );
273335 if (tp -> tn_nlink == 0 ) {
274336 mutex_exit (& tp -> tn_tlock );
0 commit comments