Skip to content

Commit f006540

Browse files
committed
Merge tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache
Pull folio updates from Matthew Wilcox: - Fix an accounting bug that made NR_FILE_DIRTY grow without limit when running xfstests - Convert more of mpage to use folios - Remove add_to_page_cache() and add_to_page_cache_locked() - Convert find_get_pages_range() to filemap_get_folios() - Improvements to the read_cache_page() family of functions - Remove a few unnecessary checks of PageError - Some straightforward filesystem conversions to use folios - Split PageMovable users out from address_space_operations into their own movable_operations - Convert aops->migratepage to aops->migrate_folio - Remove nobh support (Christoph Hellwig) * tag 'folio-6.0' of git://git.infradead.org/users/willy/pagecache: (78 commits) fs: remove the NULL get_block case in mpage_writepages fs: don't call ->writepage from __mpage_writepage fs: remove the nobh helpers jfs: stop using the nobh helper ext2: remove nobh support ntfs3: refactor ntfs_writepages mm/folio-compat: Remove migration compatibility functions fs: Remove aops->migratepage() secretmem: Convert to migrate_folio hugetlb: Convert to migrate_folio aio: Convert to migrate_folio f2fs: Convert to filemap_migrate_folio() ubifs: Convert to filemap_migrate_folio() btrfs: Convert btrfs_migratepage to migrate_folio mm/migrate: Add filemap_migrate_folio() mm/migrate: Convert migrate_page() to migrate_folio() nfs: Convert to migrate_folio btrfs: Convert btree_migratepage to migrate_folio mm/migrate: Convert expected_page_refs() to folio_expected_refs() mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() ...
2 parents e087437 + cf5e7a6 commit f006540

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+830
-1887
lines changed

Documentation/admin-guide/cgroup-v1/memcg_test.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
9797
=============
9898

9999
Page Cache is charged at
100-
- add_to_page_cache_locked().
100+
- filemap_add_folio().
101101

102102
The logic is very clear. (About migration, see below)
103103

Documentation/filesystems/ext2.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,6 @@ acl Enable POSIX Access Control Lists support
5959
(requires CONFIG_EXT2_FS_POSIX_ACL).
6060
noacl Don't support POSIX ACLs.
6161

62-
nobh Do not attach buffer_heads to file pagecache.
63-
6462
quota, usrquota Enable user disk quota support
6563
(requires CONFIG_QUOTA).
6664

Documentation/filesystems/locking.rst

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,8 @@ prototypes::
252252
bool (*release_folio)(struct folio *, gfp_t);
253253
void (*free_folio)(struct folio *);
254254
int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
255-
bool (*isolate_page) (struct page *, isolate_mode_t);
256-
int (*migratepage)(struct address_space *, struct page *, struct page *);
257-
void (*putback_page) (struct page *);
255+
int (*migrate_folio)(struct address_space *, struct folio *dst,
256+
struct folio *src, enum migrate_mode);
258257
int (*launder_folio)(struct folio *);
259258
bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count);
260259
int (*error_remove_page)(struct address_space *, struct page *);
@@ -280,9 +279,7 @@ invalidate_folio: yes exclusive
280279
release_folio: yes
281280
free_folio: yes
282281
direct_IO:
283-
isolate_page: yes
284-
migratepage: yes (both)
285-
putback_page: yes
282+
migrate_folio: yes (both)
286283
launder_folio: yes
287284
is_partially_uptodate: yes
288285
error_remove_page: yes

Documentation/filesystems/vfs.rst

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,8 @@ cache in your filesystem. The following members are defined:
737737
bool (*release_folio)(struct folio *, gfp_t);
738738
void (*free_folio)(struct folio *);
739739
ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
740-
/* isolate a page for migration */
741-
bool (*isolate_page) (struct page *, isolate_mode_t);
742-
/* migrate the contents of a page to the specified target */
743-
int (*migratepage) (struct page *, struct page *);
744-
/* put migration-failed page back to right list */
745-
void (*putback_page) (struct page *);
740+
int (*migrate_folio)(struct mapping *, struct folio *dst,
741+
struct folio *src, enum migrate_mode);
746742
int (*launder_folio) (struct folio *);
747743
748744
bool (*is_partially_uptodate) (struct folio *, size_t from,
@@ -774,13 +770,38 @@ cache in your filesystem. The following members are defined:
774770
See the file "Locking" for more details.
775771

776772
``read_folio``
777-
called by the VM to read a folio from backing store. The folio
778-
will be locked when read_folio is called, and should be unlocked
779-
and marked uptodate once the read completes. If ->read_folio
780-
discovers that it cannot perform the I/O at this time, it can
781-
unlock the folio and return AOP_TRUNCATED_PAGE. In this case,
782-
the folio will be looked up again, relocked and if that all succeeds,
783-
->read_folio will be called again.
773+
Called by the page cache to read a folio from the backing store.
774+
The 'file' argument supplies authentication information to network
775+
filesystems, and is generally not used by block based filesystems.
776+
It may be NULL if the caller does not have an open file (eg if
777+
the kernel is performing a read for itself rather than on behalf
778+
of a userspace process with an open file).
779+
780+
If the mapping does not support large folios, the folio will
781+
contain a single page. The folio will be locked when read_folio
782+
is called. If the read completes successfully, the folio should
783+
be marked uptodate. The filesystem should unlock the folio
784+
once the read has completed, whether it was successful or not.
785+
The filesystem does not need to modify the refcount on the folio;
786+
the page cache holds a reference count and that will not be
787+
released until the folio is unlocked.
788+
789+
Filesystems may implement ->read_folio() synchronously.
790+
In normal operation, folios are read through the ->readahead()
791+
method. Only if this fails, or if the caller needs to wait for
792+
the read to complete will the page cache call ->read_folio().
793+
Filesystems should not attempt to perform their own readahead
794+
in the ->read_folio() operation.
795+
796+
If the filesystem cannot perform the read at this time, it can
797+
unlock the folio, do whatever action it needs to ensure that the
798+
read will succeed in the future and return AOP_TRUNCATED_PAGE.
799+
In this case, the caller should look up the folio, lock it,
800+
and call ->read_folio again.
801+
802+
Callers may invoke the ->read_folio() method directly, but using
803+
read_mapping_folio() will take care of locking, waiting for the
804+
read to complete and handle cases such as AOP_TRUNCATED_PAGE.
784805

785806
``writepages``
786807
called by the VM to write out pages associated with the
@@ -905,20 +926,12 @@ cache in your filesystem. The following members are defined:
905926
data directly between the storage and the application's address
906927
space.
907928

908-
``isolate_page``
909-
Called by the VM when isolating a movable non-lru page. If page
910-
is successfully isolated, VM marks the page as PG_isolated via
911-
__SetPageIsolated.
912-
913-
``migrate_page``
929+
``migrate_folio``
914930
This is used to compact the physical memory usage. If the VM
915-
wants to relocate a page (maybe off a memory card that is
916-
signalling imminent failure) it will pass a new page and an old
917-
page to this function. migrate_page should transfer any private
918-
data across and update any references that it has to the page.
919-
920-
``putback_page``
921-
Called by the VM when isolated page's migration fails.
931+
wants to relocate a folio (maybe from a memory device that is
932+
signalling imminent failure) it will pass a new folio and an old
933+
folio to this function. migrate_folio should transfer any private
934+
data across and update any references that it has to the folio.
922935

923936
``launder_folio``
924937
Called before freeing a folio - it writes back the dirty folio.

Documentation/vm/page_migration.rst

Lines changed: 10 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -152,110 +152,15 @@ Steps:
152152
Non-LRU page migration
153153
======================
154154

155-
Although migration originally aimed for reducing the latency of memory accesses
156-
for NUMA, compaction also uses migration to create high-order pages.
155+
Although migration originally aimed for reducing the latency of memory
156+
accesses for NUMA, compaction also uses migration to create high-order
157+
pages. For compaction purposes, it is also useful to be able to move
158+
non-LRU pages, such as zsmalloc and virtio-balloon pages.
157159

158-
Current problem of the implementation is that it is designed to migrate only
159-
*LRU* pages. However, there are potential non-LRU pages which can be migrated
160-
in drivers, for example, zsmalloc, virtio-balloon pages.
161-
162-
For virtio-balloon pages, some parts of migration code path have been hooked
163-
up and added virtio-balloon specific functions to intercept migration logics.
164-
It's too specific to a driver so other drivers who want to make their pages
165-
movable would have to add their own specific hooks in the migration path.
166-
167-
To overcome the problem, VM supports non-LRU page migration which provides
168-
generic functions for non-LRU movable pages without driver specific hooks
169-
in the migration path.
170-
171-
If a driver wants to make its pages movable, it should define three functions
172-
which are function pointers of struct address_space_operations.
173-
174-
1. ``bool (*isolate_page) (struct page *page, isolate_mode_t mode);``
175-
176-
What VM expects from isolate_page() function of driver is to return *true*
177-
if driver isolates the page successfully. On returning true, VM marks the page
178-
as PG_isolated so concurrent isolation in several CPUs skip the page
179-
for isolation. If a driver cannot isolate the page, it should return *false*.
180-
181-
Once page is successfully isolated, VM uses page.lru fields so driver
182-
shouldn't expect to preserve values in those fields.
183-
184-
2. ``int (*migratepage) (struct address_space *mapping,``
185-
| ``struct page *newpage, struct page *oldpage, enum migrate_mode);``
186-
187-
After isolation, VM calls migratepage() of driver with the isolated page.
188-
The function of migratepage() is to move the contents of the old page to the
189-
new page
190-
and set up fields of struct page newpage. Keep in mind that you should
191-
indicate to the VM the oldpage is no longer movable via __ClearPageMovable()
192-
under page_lock if you migrated the oldpage successfully and returned
193-
MIGRATEPAGE_SUCCESS. If driver cannot migrate the page at the moment, driver
194-
can return -EAGAIN. On -EAGAIN, VM will retry page migration in a short time
195-
because VM interprets -EAGAIN as "temporary migration failure". On returning
196-
any error except -EAGAIN, VM will give up the page migration without
197-
retrying.
198-
199-
Driver shouldn't touch the page.lru field while in the migratepage() function.
200-
201-
3. ``void (*putback_page)(struct page *);``
202-
203-
If migration fails on the isolated page, VM should return the isolated page
204-
to the driver so VM calls the driver's putback_page() with the isolated page.
205-
In this function, the driver should put the isolated page back into its own data
206-
structure.
207-
208-
Non-LRU movable page flags
209-
210-
There are two page flags for supporting non-LRU movable page.
211-
212-
* PG_movable
213-
214-
Driver should use the function below to make page movable under page_lock::
215-
216-
void __SetPageMovable(struct page *page, struct address_space *mapping)
217-
218-
It needs argument of address_space for registering migration
219-
family functions which will be called by VM. Exactly speaking,
220-
PG_movable is not a real flag of struct page. Rather, VM
221-
reuses the page->mapping's lower bits to represent it::
222-
223-
#define PAGE_MAPPING_MOVABLE 0x2
224-
page->mapping = page->mapping | PAGE_MAPPING_MOVABLE;
225-
226-
so driver shouldn't access page->mapping directly. Instead, driver should
227-
use page_mapping() which masks off the low two bits of page->mapping under
228-
page lock so it can get the right struct address_space.
229-
230-
For testing of non-LRU movable pages, VM supports __PageMovable() function.
231-
However, it doesn't guarantee to identify non-LRU movable pages because
232-
the page->mapping field is unified with other variables in struct page.
233-
If the driver releases the page after isolation by VM, page->mapping
234-
doesn't have a stable value although it has PAGE_MAPPING_MOVABLE set
235-
(look at __ClearPageMovable). But __PageMovable() is cheap to call whether
236-
page is LRU or non-LRU movable once the page has been isolated because LRU
237-
pages can never have PAGE_MAPPING_MOVABLE set in page->mapping. It is also
238-
good for just peeking to test non-LRU movable pages before more expensive
239-
checking with lock_page() in pfn scanning to select a victim.
240-
241-
For guaranteeing non-LRU movable page, VM provides PageMovable() function.
242-
Unlike __PageMovable(), PageMovable() validates page->mapping and
243-
mapping->a_ops->isolate_page under lock_page(). The lock_page() prevents
244-
sudden destroying of page->mapping.
245-
246-
Drivers using __SetPageMovable() should clear the flag via
247-
__ClearMovablePage() under page_lock() before the releasing the page.
248-
249-
* PG_isolated
250-
251-
To prevent concurrent isolation among several CPUs, VM marks isolated page
252-
as PG_isolated under lock_page(). So if a CPU encounters PG_isolated
253-
non-LRU movable page, it can skip it. Driver doesn't need to manipulate the
254-
flag because VM will set/clear it automatically. Keep in mind that if the
255-
driver sees a PG_isolated page, it means the page has been isolated by the
256-
VM so it shouldn't touch the page.lru field.
257-
The PG_isolated flag is aliased with the PG_reclaim flag so drivers
258-
shouldn't use PG_isolated for its own purposes.
160+
If a driver wants to make its pages movable, it should define a struct
161+
movable_operations. It then needs to call __SetPageMovable() on each
162+
page that it may be able to move. This uses the ``page->mapping`` field,
163+
so this field is not available for the driver to use for other purposes.
259164

260165
Monitoring Migration
261166
=====================
@@ -286,3 +191,5 @@ THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase.
286191

287192
Christoph Lameter, May 8, 2006.
288193
Minchan Kim, Mar 28, 2016.
194+
195+
.. kernel-doc:: include/linux/migrate.h

arch/powerpc/platforms/pseries/cmm.c

Lines changed: 3 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
#include <linux/stringify.h>
2020
#include <linux/swap.h>
2121
#include <linux/device.h>
22-
#include <linux/mount.h>
23-
#include <linux/pseudo_fs.h>
24-
#include <linux/magic.h>
2522
#include <linux/balloon_compaction.h>
2623
#include <asm/firmware.h>
2724
#include <asm/hvcall.h>
@@ -500,19 +497,6 @@ static struct notifier_block cmm_mem_nb = {
500497
};
501498

502499
#ifdef CONFIG_BALLOON_COMPACTION
503-
static struct vfsmount *balloon_mnt;
504-
505-
static int cmm_init_fs_context(struct fs_context *fc)
506-
{
507-
return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
508-
}
509-
510-
static struct file_system_type balloon_fs = {
511-
.name = "ppc-cmm",
512-
.init_fs_context = cmm_init_fs_context,
513-
.kill_sb = kill_anon_super,
514-
};
515-
516500
static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
517501
struct page *newpage, struct page *page,
518502
enum migrate_mode mode)
@@ -564,47 +548,13 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
564548
return MIGRATEPAGE_SUCCESS;
565549
}
566550

567-
static int cmm_balloon_compaction_init(void)
551+
static void cmm_balloon_compaction_init(void)
568552
{
569-
int rc;
570-
571553
balloon_devinfo_init(&b_dev_info);
572554
b_dev_info.migratepage = cmm_migratepage;
573-
574-
balloon_mnt = kern_mount(&balloon_fs);
575-
if (IS_ERR(balloon_mnt)) {
576-
rc = PTR_ERR(balloon_mnt);
577-
balloon_mnt = NULL;
578-
return rc;
579-
}
580-
581-
b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
582-
if (IS_ERR(b_dev_info.inode)) {
583-
rc = PTR_ERR(b_dev_info.inode);
584-
b_dev_info.inode = NULL;
585-
kern_unmount(balloon_mnt);
586-
balloon_mnt = NULL;
587-
return rc;
588-
}
589-
590-
b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
591-
return 0;
592-
}
593-
static void cmm_balloon_compaction_deinit(void)
594-
{
595-
if (b_dev_info.inode)
596-
iput(b_dev_info.inode);
597-
b_dev_info.inode = NULL;
598-
kern_unmount(balloon_mnt);
599-
balloon_mnt = NULL;
600555
}
601556
#else /* CONFIG_BALLOON_COMPACTION */
602-
static int cmm_balloon_compaction_init(void)
603-
{
604-
return 0;
605-
}
606-
607-
static void cmm_balloon_compaction_deinit(void)
557+
static void cmm_balloon_compaction_init(void)
608558
{
609559
}
610560
#endif /* CONFIG_BALLOON_COMPACTION */
@@ -622,9 +572,7 @@ static int cmm_init(void)
622572
if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
623573
return -EOPNOTSUPP;
624574

625-
rc = cmm_balloon_compaction_init();
626-
if (rc)
627-
return rc;
575+
cmm_balloon_compaction_init();
628576

629577
rc = register_oom_notifier(&cmm_oom_nb);
630578
if (rc < 0)
@@ -658,7 +606,6 @@ static int cmm_init(void)
658606
out_oom_notifier:
659607
unregister_oom_notifier(&cmm_oom_nb);
660608
out_balloon_compaction:
661-
cmm_balloon_compaction_deinit();
662609
return rc;
663610
}
664611

@@ -677,7 +624,6 @@ static void cmm_exit(void)
677624
unregister_memory_notifier(&cmm_mem_nb);
678625
cmm_free_pages(atomic_long_read(&loaned_pages));
679626
cmm_unregister_sysfs(&cmm_dev);
680-
cmm_balloon_compaction_deinit();
681627
}
682628

683629
/**

block/fops.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ const struct address_space_operations def_blk_aops = {
421421
.write_end = blkdev_write_end,
422422
.writepages = blkdev_writepages,
423423
.direct_IO = blkdev_direct_IO,
424-
.migratepage = buffer_migrate_page_norefs,
424+
.migrate_folio = buffer_migrate_folio_norefs,
425425
.is_dirty_writeback = buffer_check_dirty_writeback,
426426
};
427427

block/partitions/check.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@ struct parsed_partitions {
2424
};
2525

2626
typedef struct {
27-
struct page *v;
27+
struct folio *v;
2828
} Sector;
2929

3030
void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p);
3131
static inline void put_dev_sector(Sector p)
3232
{
33-
put_page(p.v);
33+
folio_put(p.v);
3434
}
3535

3636
static inline void

0 commit comments

Comments
 (0)