@@ -470,28 +470,6 @@ static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
470470 return vma -> vm_next ;
471471}
472472
473- /*
474- * munmap_vma_range() - munmap VMAs that overlap a range.
475- * @mm: The mm struct
476- * @start: The start of the range.
477- * @len: The length of the range.
478- * @pprev: pointer to the pointer that will be set to previous vm_area_struct
479- *
480- * Find all the vm_area_struct that overlap from @start to
481- * @end and munmap them. Set @pprev to the previous vm_area_struct.
482- *
483- * Returns: -ENOMEM on munmap failure or 0 on success.
484- */
485- static inline int
486- munmap_vma_range (struct mm_struct * mm , unsigned long start , unsigned long len ,
487- struct vm_area_struct * * pprev , struct list_head * uf )
488- {
489- while (range_has_overlap (mm , start , start + len , pprev ))
490- if (do_munmap (mm , start , len , uf ))
491- return - ENOMEM ;
492- return 0 ;
493- }
494-
495473static unsigned long count_vma_pages_range (struct mm_struct * mm ,
496474 unsigned long addr , unsigned long end )
497475{
@@ -618,6 +596,129 @@ static void __insert_vm_struct(struct mm_struct *mm, struct ma_state *mas,
618596 mm -> map_count ++ ;
619597}
620598
599+ /*
600+ * vma_expand - Expand an existing VMA
601+ *
602+ * @mas: The maple state
603+ * @vma: The vma to expand
604+ * @start: The start of the vma
605+ * @end: The exclusive end of the vma
606+ * @pgoff: The page offset of vma
607+ * @next: The current of next vma.
608+ *
609+ * Expand @vma to @start and @end. Can expand off the start and end. Will
610+ * expand over @next if it's different from @vma and @end == @next->vm_end.
611+ * Checking if the @vma can expand and merge with @next needs to be handled by
612+ * the caller.
613+ *
614+ * Returns: 0 on success
615+ */
616+ inline int vma_expand (struct ma_state * mas , struct vm_area_struct * vma ,
617+ unsigned long start , unsigned long end , pgoff_t pgoff ,
618+ struct vm_area_struct * next )
619+ {
620+ struct mm_struct * mm = vma -> vm_mm ;
621+ struct address_space * mapping = NULL ;
622+ struct rb_root_cached * root = NULL ;
623+ struct anon_vma * anon_vma = vma -> anon_vma ;
624+ struct file * file = vma -> vm_file ;
625+ bool remove_next = false;
626+
627+ if (next && (vma != next ) && (end == next -> vm_end )) {
628+ remove_next = true;
629+ if (next -> anon_vma && !vma -> anon_vma ) {
630+ int error ;
631+
632+ anon_vma = next -> anon_vma ;
633+ vma -> anon_vma = anon_vma ;
634+ error = anon_vma_clone (vma , next );
635+ if (error )
636+ return error ;
637+ }
638+ }
639+
640+ /* Not merging but overwriting any part of next is not handled. */
641+ VM_BUG_ON (next && !remove_next && next != vma && end > next -> vm_start );
642+ /* Only handles expanding */
643+ VM_BUG_ON (vma -> vm_start < start || vma -> vm_end > end );
644+
645+ if (mas_preallocate (mas , vma , GFP_KERNEL ))
646+ goto nomem ;
647+
648+ vma_adjust_trans_huge (vma , start , end , 0 );
649+
650+ if (file ) {
651+ mapping = file -> f_mapping ;
652+ root = & mapping -> i_mmap ;
653+ uprobe_munmap (vma , vma -> vm_start , vma -> vm_end );
654+ i_mmap_lock_write (mapping );
655+ }
656+
657+ if (anon_vma ) {
658+ anon_vma_lock_write (anon_vma );
659+ anon_vma_interval_tree_pre_update_vma (vma );
660+ }
661+
662+ if (file ) {
663+ flush_dcache_mmap_lock (mapping );
664+ vma_interval_tree_remove (vma , root );
665+ }
666+
667+ vma -> vm_start = start ;
668+ vma -> vm_end = end ;
669+ vma -> vm_pgoff = pgoff ;
670+ /* Note: mas must be pointing to the expanding VMA */
671+ vma_mas_store (vma , mas );
672+
673+ if (file ) {
674+ vma_interval_tree_insert (vma , root );
675+ flush_dcache_mmap_unlock (mapping );
676+ }
677+
678+ /* Expanding over the next vma */
679+ if (remove_next ) {
680+ /* Remove from mm linked list - also updates highest_vm_end */
681+ __vma_unlink_list (mm , next );
682+
683+ /* Kill the cache */
684+ vmacache_invalidate (mm );
685+
686+ if (file )
687+ __remove_shared_vm_struct (next , file , mapping );
688+
689+ } else if (!next ) {
690+ mm -> highest_vm_end = vm_end_gap (vma );
691+ }
692+
693+ if (anon_vma ) {
694+ anon_vma_interval_tree_post_update_vma (vma );
695+ anon_vma_unlock_write (anon_vma );
696+ }
697+
698+ if (file ) {
699+ i_mmap_unlock_write (mapping );
700+ uprobe_mmap (vma );
701+ }
702+
703+ if (remove_next ) {
704+ if (file ) {
705+ uprobe_munmap (next , next -> vm_start , next -> vm_end );
706+ fput (file );
707+ }
708+ if (next -> anon_vma )
709+ anon_vma_merge (vma , next );
710+ mm -> map_count -- ;
711+ mpol_put (vma_policy (next ));
712+ vm_area_free (next );
713+ }
714+
715+ validate_mm (mm );
716+ return 0 ;
717+
718+ nomem :
719+ return - ENOMEM ;
720+ }
721+
621722/*
622723 * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
623724 * is already present in an i_mmap tree without adjusting the tree.
@@ -1630,9 +1731,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16301731 struct list_head * uf )
16311732{
16321733 struct mm_struct * mm = current -> mm ;
1633- struct vm_area_struct * vma , * prev , * merge ;
1634- int error ;
1734+ struct vm_area_struct * vma = NULL ;
1735+ struct vm_area_struct * next , * prev , * merge ;
1736+ pgoff_t pglen = len >> PAGE_SHIFT ;
16351737 unsigned long charged = 0 ;
1738+ unsigned long end = addr + len ;
1739+ unsigned long merge_start = addr , merge_end = end ;
1740+ pgoff_t vm_pgoff ;
1741+ int error ;
1742+ MA_STATE (mas , & mm -> mm_mt , addr , end - 1 );
16361743
16371744 /* Check against address space limit. */
16381745 if (!may_expand_vm (mm , vm_flags , len >> PAGE_SHIFT )) {
@@ -1642,16 +1749,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16421749 * MAP_FIXED may remove pages of mappings that intersects with
16431750 * requested mapping. Account for the pages it would unmap.
16441751 */
1645- nr_pages = count_vma_pages_range (mm , addr , addr + len );
1752+ nr_pages = count_vma_pages_range (mm , addr , end );
16461753
16471754 if (!may_expand_vm (mm , vm_flags ,
16481755 (len >> PAGE_SHIFT ) - nr_pages ))
16491756 return - ENOMEM ;
16501757 }
16511758
1652- /* Clear old maps, set up prev and uf */
1653- if (munmap_vma_range (mm , addr , len , & prev , uf ))
1759+ /* Unmap any existing mapping in the area */
1760+ if (do_munmap (mm , addr , len , uf ))
16541761 return - ENOMEM ;
1762+
16551763 /*
16561764 * Private writable mapping: check memory availability
16571765 */
@@ -1662,14 +1770,43 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16621770 vm_flags |= VM_ACCOUNT ;
16631771 }
16641772
1665- /*
1666- * Can we just expand an old mapping?
1667- */
1668- vma = vma_merge (mm , prev , addr , addr + len , vm_flags ,
1669- NULL , file , pgoff , NULL , NULL_VM_UFFD_CTX , NULL );
1670- if (vma )
1671- goto out ;
1773+ next = mas_next (& mas , ULONG_MAX );
1774+ prev = mas_prev (& mas , 0 );
1775+ if (vm_flags & VM_SPECIAL )
1776+ goto cannot_expand ;
1777+
1778+ /* Attempt to expand an old mapping */
1779+ /* Check next */
1780+ if (next && next -> vm_start == end && !vma_policy (next ) &&
1781+ can_vma_merge_before (next , vm_flags , NULL , file , pgoff + pglen ,
1782+ NULL_VM_UFFD_CTX , NULL )) {
1783+ merge_end = next -> vm_end ;
1784+ vma = next ;
1785+ vm_pgoff = next -> vm_pgoff - pglen ;
1786+ }
1787+
1788+ /* Check prev */
1789+ if (prev && prev -> vm_end == addr && !vma_policy (prev ) &&
1790+ (vma ? can_vma_merge_after (prev , vm_flags , vma -> anon_vma , file ,
1791+ pgoff , vma -> vm_userfaultfd_ctx , NULL ) :
1792+ can_vma_merge_after (prev , vm_flags , NULL , file , pgoff ,
1793+ NULL_VM_UFFD_CTX , NULL ))) {
1794+ merge_start = prev -> vm_start ;
1795+ vma = prev ;
1796+ vm_pgoff = prev -> vm_pgoff ;
1797+ }
1798+
1799+
1800+ /* Actually expand, if possible */
1801+ if (vma &&
1802+ !vma_expand (& mas , vma , merge_start , merge_end , vm_pgoff , next )) {
1803+ khugepaged_enter_vma (vma , vm_flags );
1804+ goto expanded ;
1805+ }
16721806
1807+ mas .index = addr ;
1808+ mas .last = end - 1 ;
1809+ cannot_expand :
16731810 /*
16741811 * Determine the object being mapped and call the appropriate
16751812 * specific mapper. the address has already been validated, but
@@ -1682,7 +1819,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
16821819 }
16831820
16841821 vma -> vm_start = addr ;
1685- vma -> vm_end = addr + len ;
1822+ vma -> vm_end = end ;
16861823 vma -> vm_flags = vm_flags ;
16871824 vma -> vm_page_prot = vm_get_page_prot (vm_flags );
16881825 vma -> vm_pgoff = pgoff ;
@@ -1703,28 +1840,32 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
17031840 *
17041841 * Answer: Yes, several device drivers can do it in their
17051842 * f_op->mmap method. -DaveM
1706- * Bug: If addr is changed, prev, rb_link, rb_parent should
1707- * be updated for vma_link()
17081843 */
17091844 WARN_ON_ONCE (addr != vma -> vm_start );
17101845
17111846 addr = vma -> vm_start ;
1847+ mas_reset (& mas );
17121848
1713- /* If vm_flags changed after call_mmap(), we should try merge vma again
1714- * as we may succeed this time.
1849+ /*
1850+ * If vm_flags changed after call_mmap(), we should try merge
1851+ * vma again as we may succeed this time.
17151852 */
17161853 if (unlikely (vm_flags != vma -> vm_flags && prev )) {
17171854 merge = vma_merge (mm , prev , vma -> vm_start , vma -> vm_end , vma -> vm_flags ,
17181855 NULL , vma -> vm_file , vma -> vm_pgoff , NULL , NULL_VM_UFFD_CTX , NULL );
17191856 if (merge ) {
1720- /* ->mmap() can change vma->vm_file and fput the original file. So
1721- * fput the vma->vm_file here or we would add an extra fput for file
1722- * and cause general protection fault ultimately.
1857+ /*
1858+ * ->mmap() can change vma->vm_file and fput
1859+ * the original file. So fput the vma->vm_file
1860+ * here or we would add an extra fput for file
1861+ * and cause general protection fault
1862+ * ultimately.
17231863 */
17241864 fput (vma -> vm_file );
17251865 vm_area_free (vma );
17261866 vma = merge ;
17271867 /* Update vm_flags to pick up the change. */
1868+ addr = vma -> vm_start ;
17281869 vm_flags = vma -> vm_flags ;
17291870 goto unmap_writable ;
17301871 }
@@ -1748,14 +1889,30 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
17481889 goto free_vma ;
17491890 }
17501891
1751- if (vma_link ( mm , vma , prev )) {
1892+ if (mas_preallocate ( & mas , vma , GFP_KERNEL )) {
17521893 error = - ENOMEM ;
17531894 if (file )
17541895 goto unmap_and_free_vma ;
17551896 else
17561897 goto free_vma ;
17571898 }
17581899
1900+ if (vma -> vm_file )
1901+ i_mmap_lock_write (vma -> vm_file -> f_mapping );
1902+
1903+ vma_mas_store (vma , & mas );
1904+ __vma_link_list (mm , vma , prev );
1905+ mm -> map_count ++ ;
1906+ if (vma -> vm_file ) {
1907+ if (vma -> vm_flags & VM_SHARED )
1908+ mapping_allow_writable (vma -> vm_file -> f_mapping );
1909+
1910+ flush_dcache_mmap_lock (vma -> vm_file -> f_mapping );
1911+ vma_interval_tree_insert (vma , & vma -> vm_file -> f_mapping -> i_mmap );
1912+ flush_dcache_mmap_unlock (vma -> vm_file -> f_mapping );
1913+ i_mmap_unlock_write (vma -> vm_file -> f_mapping );
1914+ }
1915+
17591916 /*
17601917 * vma_merge() calls khugepaged_enter_vma() either, the below
17611918 * call covers the non-merge case.
@@ -1767,7 +1924,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
17671924 if (file && vm_flags & VM_SHARED )
17681925 mapping_unmap_writable (file -> f_mapping );
17691926 file = vma -> vm_file ;
1770- out :
1927+ expanded :
17711928 perf_event_mmap (vma );
17721929
17731930 vm_stat_account (mm , vm_flags , len >> PAGE_SHIFT );
@@ -1794,6 +1951,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
17941951
17951952 vma_set_page_prot (vma );
17961953
1954+ validate_mm (mm );
17971955 return addr ;
17981956
17991957unmap_and_free_vma :
@@ -1809,6 +1967,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
18091967unacct_error :
18101968 if (charged )
18111969 vm_unacct_memory (charged );
1970+ validate_mm (mm );
18121971 return error ;
18131972}
18141973
@@ -2632,10 +2791,6 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
26322791 prev = vma -> vm_prev ;
26332792 /* we have start < vma->vm_end */
26342793
2635- /* if it doesn't overlap, we have nothing.. */
2636- if (vma -> vm_start >= end )
2637- return 0 ;
2638-
26392794 /*
26402795 * If we need to split any vma, do it now to save pain later.
26412796 *
0 commit comments