@@ -18,7 +18,6 @@ prototypes:
1818 char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
1919
2020locking rules:
21- none have BKL
2221 dcache_lock rename_lock ->d_lock may block
2322d_revalidate: no no no yes
2423d_hash no no no yes
@@ -42,18 +41,23 @@ ata *);
4241 int (*rename) (struct inode *, struct dentry *,
4342 struct inode *, struct dentry *);
4443 int (*readlink) (struct dentry *, char __user *,int);
45- int (*follow_link) (struct dentry *, struct nameidata *);
44+ void * (*follow_link) (struct dentry *, struct nameidata *);
45+ void (*put_link) (struct dentry *, struct nameidata *, void *);
4646 void (*truncate) (struct inode *);
4747 int (*permission) (struct inode *, int, struct nameidata *);
48+ int (*check_acl)(struct inode *, int);
4849 int (*setattr) (struct dentry *, struct iattr *);
4950 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
5051 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
5152 ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
5253 ssize_t (*listxattr) (struct dentry *, char *, size_t);
5354 int (*removexattr) (struct dentry *, const char *);
55+ void (*truncate_range)(struct inode *, loff_t, loff_t);
56+ long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
57+ int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
5458
5559locking rules:
56- all may block, none have BKL
60+ all may block
5761 i_mutex(inode)
5862lookup: yes
5963create: yes
@@ -66,19 +70,24 @@ rmdir: yes (both) (see below)
6670rename: yes (all) (see below)
6771readlink: no
6872follow_link: no
73+ put_link: no
6974truncate: yes (see below)
7075setattr: yes
7176permission: no
77+ check_acl: no
7278getattr: no
7379setxattr: yes
7480getxattr: no
7581listxattr: no
7682removexattr: yes
83+ truncate_range: yes
84+ fallocate: no
85+ fiemap: no
7786 Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
7887victim.
7988 cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
8089 ->truncate() is never called directly - it's a callback, not a
81- method. It's called by vmtruncate() - library function normally used by
90+ method. It's called by vmtruncate() - deprecated library function used by
8291->setattr(). Locking information above applies to that call (i.e. is
8392inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been
8493passed).
@@ -91,7 +100,7 @@ prototypes:
91100 struct inode *(*alloc_inode)(struct super_block *sb);
92101 void (*destroy_inode)(struct inode *);
93102 void (*dirty_inode) (struct inode *);
94- int (*write_inode) (struct inode *, int );
103+ int (*write_inode) (struct inode *, struct writeback_control *wbc );
95104 int (*drop_inode) (struct inode *);
96105 void (*evict_inode) (struct inode *);
97106 void (*put_super) (struct super_block *);
@@ -105,10 +114,10 @@ prototypes:
105114 int (*show_options)(struct seq_file *, struct vfsmount *);
106115 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
107116 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
117+ int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
108118
109119locking rules:
110120 All may block [not true, see below]
111- None have BKL
112121 s_umount
113122alloc_inode:
114123destroy_inode:
@@ -127,6 +136,7 @@ umount_begin: no
127136show_options: no (namespace_sem)
128137quota_read: no (see below)
129138quota_write: no (see below)
139+ bdev_try_to_free_page: no (see below)
130140
131141->statfs() has s_umount (shared) when called by ustat(2) (native or
132142compat), but that's an accident of bad API; s_umount is used to pin
@@ -139,19 +149,25 @@ be the only ones operating on the quota file by the quota code (via
139149dqio_sem) (unless an admin really wants to screw up something and
140150writes to quota files with quotas on). For other details about locking
141151see also dquot_operations section.
152+ ->bdev_try_to_free_page is called from the ->releasepage handler of
153+ the block device inode. See there for more details.
142154
143155--------------------------- file_system_type ---------------------------
144156prototypes:
145157 int (*get_sb) (struct file_system_type *, int,
146158 const char *, void *, struct vfsmount *);
159+ struct dentry *(*mount) (struct file_system_type *, int,
160+ const char *, void *);
147161 void (*kill_sb) (struct super_block *);
148162locking rules:
149- may block BKL
150- get_sb yes no
151- kill_sb yes no
163+ may block
164+ get_sb yes
165+ mount yes
166+ kill_sb yes
152167
153168->get_sb() returns error or 0 with locked superblock attached to the vfsmount
154169(exclusive on ->s_umount).
170+ ->mount() returns ERR_PTR or the root dentry.
155171->kill_sb() takes a write-locked superblock, does all shutdown work on it,
156172unlocks and drops the reference.
157173
@@ -176,27 +192,35 @@ prototypes:
176192 void (*freepage)(struct page *);
177193 int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
178194 loff_t offset, unsigned long nr_segs);
179- int (*launder_page) (struct page *);
195+ int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
196+ unsigned long *);
197+ int (*migratepage)(struct address_space *, struct page *, struct page *);
198+ int (*launder_page)(struct page *);
199+ int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
200+ int (*error_remove_page)(struct address_space *, struct page *);
180201
181202locking rules:
182203 All except set_page_dirty and freepage may block
183204
184- BKL PageLocked(page) i_mutex
185- writepage: no yes, unlocks (see below)
186- readpage: no yes, unlocks
187- sync_page: no maybe
188- writepages: no
189- set_page_dirty no no
190- readpages: no
191- write_begin: no locks the page yes
192- write_end: no yes, unlocks yes
193- perform_write: no n/a yes
194- bmap: no
195- invalidatepage: no yes
196- releasepage: no yes
197- freepage: no yes
198- direct_IO: no
199- launder_page: no yes
205+ PageLocked(page) i_mutex
206+ writepage: yes, unlocks (see below)
207+ readpage: yes, unlocks
208+ sync_page: maybe
209+ writepages:
210+ set_page_dirty no
211+ readpages:
212+ write_begin: locks the page yes
213+ write_end: yes, unlocks yes
214+ bmap:
215+ invalidatepage: yes
216+ releasepage: yes
217+ freepage: yes
218+ direct_IO:
219+ get_xip_mem: maybe
220+ migratepage: yes (both)
221+ launder_page: yes
222+ is_partially_uptodate: yes
223+ error_remove_page: yes
200224
201225 ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
202226may be called from the request handler (/dev/loop).
@@ -276,9 +300,8 @@ under spinlock (it cannot block) and is sometimes called with the page
276300not locked.
277301
278302 ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
279- filesystems and by the swapper. The latter will eventually go away. All
280- instances do not actually need the BKL. Please, keep it that way and don't
281- breed new callers.
303+ filesystems and by the swapper. The latter will eventually go away. Please,
304+ keep it that way and don't breed new callers.
282305
283306 ->invalidatepage() is called when the filesystem must attempt to drop
284307some or all of the buffers from the page when it is being truncated. It
@@ -299,47 +322,37 @@ cleaned, or an error value if not. Note that in order to prevent the page
299322getting mapped back in and redirtied, it needs to be kept locked
300323across the entire operation.
301324
302- Note: currently almost all instances of address_space methods are
303- using BKL for internal serialization and that's one of the worst sources
304- of contention. Normally they are calling library functions (in fs/buffer.c)
305- and pass foo_get_block() as a callback (on local block-based filesystems,
306- indeed). BKL is not needed for library stuff and is usually taken by
307- foo_get_block(). It's an overkill, since block bitmaps can be protected by
308- internal fs locking and real critical areas are much smaller than the areas
309- filesystems protect now.
310-
311325----------------------- file_lock_operations ------------------------------
312326prototypes:
313- void (*fl_insert)(struct file_lock *); /* lock insertion callback */
314- void (*fl_remove)(struct file_lock *); /* lock removal callback */
315327 void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
316328 void (*fl_release_private)(struct file_lock *);
317329
318330
319331locking rules:
320- BKL may block
321- fl_insert: yes no
322- fl_remove: yes no
323- fl_copy_lock: yes no
324- fl_release_private: yes yes
332+ file_lock_lock may block
333+ fl_copy_lock: yes no
334+ fl_release_private: maybe no
325335
326336----------------------- lock_manager_operations ---------------------------
327337prototypes:
328338 int (*fl_compare_owner)(struct file_lock *, struct file_lock *);
329339 void (*fl_notify)(struct file_lock *); /* unblock callback */
340+ int (*fl_grant)(struct file_lock *, struct file_lock *, int);
330341 void (*fl_release_private)(struct file_lock *);
331342 void (*fl_break)(struct file_lock *); /* break_lease callback */
343+ int (*fl_mylease)(struct file_lock *, struct file_lock *);
344+ int (*fl_change)(struct file_lock **, int);
332345
333346locking rules:
334- BKL may block
335- fl_compare_owner: yes no
336- fl_notify: yes no
337- fl_release_private: yes yes
338- fl_break: yes no
339-
340- Currently only NFSD and NLM provide instances of this class. None of the
341- them block. If you have out-of-tree instances - please, show up. Locking
342- in that area will change.
347+ file_lock_lock may block
348+ fl_compare_owner: yes no
349+ fl_notify: yes no
350+ fl_grant: no no
351+ fl_release_private: maybe no
352+ fl_break: yes no
353+ fl_mylease: yes no
354+ fl_change yes no
355+
343356--------------------------- buffer_head -----------------------------------
344357prototypes:
345358 void (*b_end_io)(struct buffer_head *bh, int uptodate);
@@ -364,17 +377,17 @@ prototypes:
364377 void (*swap_slot_free_notify) (struct block_device *, unsigned long);
365378
366379locking rules:
367- BKL bd_mutex
368- open: no yes
369- release: no yes
370- ioctl: no no
371- compat_ioctl: no no
372- direct_access: no no
373- media_changed: no no
374- unlock_native_capacity: no no
375- revalidate_disk: no no
376- getgeo: no no
377- swap_slot_free_notify: no no (see below)
380+ bd_mutex
381+ open: yes
382+ release: yes
383+ ioctl: no
384+ compat_ioctl: no
385+ direct_access: no
386+ media_changed: no
387+ unlock_native_capacity: no
388+ revalidate_disk: no
389+ getgeo: no
390+ swap_slot_free_notify: no (see below)
378391
379392media_changed, unlock_native_capacity and revalidate_disk are called only from
380393check_disk_change().
@@ -413,34 +426,21 @@ prototypes:
413426 unsigned long (*get_unmapped_area)(struct file *, unsigned long,
414427 unsigned long, unsigned long, unsigned long);
415428 int (*check_flags)(int);
429+ int (*flock) (struct file *, int, struct file_lock *);
430+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
431+ size_t, unsigned int);
432+ ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
433+ size_t, unsigned int);
434+ int (*setlease)(struct file *, long, struct file_lock **);
416435};
417436
418437locking rules:
419- All may block.
420- BKL
421- llseek: no (see below)
422- read: no
423- aio_read: no
424- write: no
425- aio_write: no
426- readdir: no
427- poll: no
428- unlocked_ioctl: no
429- compat_ioctl: no
430- mmap: no
431- open: no
432- flush: no
433- release: no
434- fsync: no (see below)
435- aio_fsync: no
436- fasync: no
437- lock: yes
438- readv: no
439- writev: no
440- sendfile: no
441- sendpage: no
442- get_unmapped_area: no
443- check_flags: no
438+ All may block except for ->setlease.
439+ No VFS locks held on entry except for ->fsync and ->setlease.
440+
441+ ->fsync() has i_mutex on inode.
442+
443+ ->setlease has the file_list_lock held and must not sleep.
444444
445445->llseek() locking has moved from llseek to the individual llseek
446446implementations. If your fs is not using generic_file_llseek, you
@@ -450,17 +450,10 @@ mutex or just to use i_size_read() instead.
450450Note: this does not protect the file->f_pos against concurrent modifications
451451since this is something the userspace has to take care about.
452452
453- Note: ext2_release() was *the* source of contention on fs-intensive
454- loads and dropping BKL on ->release() helps to get rid of that (we still
455- grab BKL for cases when we close a file that had been opened r/w, but that
456- can and should be done using the internal locking with smaller critical areas).
457- Current worst offender is ext2_get_block()...
458-
459- ->fasync() is called without BKL protection, and is responsible for
460- maintaining the FASYNC bit in filp->f_flags. Most instances call
461- fasync_helper(), which does that maintenance, so it's not normally
462- something one needs to worry about. Return values > 0 will be mapped to
463- zero in the VFS layer.
453+ ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
454+ Most instances call fasync_helper(), which does that maintenance, so it's
455+ not normally something one needs to worry about. Return values > 0 will be
456+ mapped to zero in the VFS layer.
464457
465458->readdir() and ->ioctl() on directories must be changed. Ideally we would
466459move ->readdir() to inode_operations and use a separate method for directory
@@ -471,8 +464,6 @@ components. And there are other reasons why the current interface is a mess...
471464->read on directories probably must go away - we should just enforce -EISDIR
472465in sys_read() and friends.
473466
474- ->fsync() has i_mutex on inode.
475-
476467--------------------------- dquot_operations -------------------------------
477468prototypes:
478469 int (*write_dquot) (struct dquot *);
@@ -507,12 +498,12 @@ prototypes:
507498 int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
508499
509500locking rules:
510- BKL mmap_sem PageLocked(page)
511- open: no yes
512- close: no yes
513- fault: no yes can return with page locked
514- page_mkwrite: no yes can return with page locked
515- access: no yes
501+ mmap_sem PageLocked(page)
502+ open: yes
503+ close: yes
504+ fault: yes can return with page locked
505+ page_mkwrite: yes can return with page locked
506+ access: yes
516507
517508 ->fault() is called when a previously not present pte is about
518509to be faulted in. The filesystem must find and return the page associated
@@ -539,6 +530,3 @@ VM_IO | VM_PFNMAP VMAs.
539530
540531(if you break something or notice that it is broken and do not fix it yourself
541532- at least put it here)
542-
543- ipc/shm.c::shm_delete() - may need BKL.
544- ->read() and ->write() in many drivers are (probably) missing BKL.
0 commit comments