@@ -1895,6 +1895,111 @@ static void buffer_tree_clear_mark(const struct extent_buffer *eb, xa_mark_t mar
18951895 xas_unlock_irqrestore (& xas , flags );
18961896}
18971897
1898+ static void buffer_tree_tag_for_writeback (struct btrfs_fs_info * fs_info ,
1899+ unsigned long start , unsigned long end )
1900+ {
1901+ XA_STATE (xas , & fs_info -> buffer_tree , start );
1902+ unsigned int tagged = 0 ;
1903+ void * eb ;
1904+
1905+ xas_lock_irq (& xas );
1906+ xas_for_each_marked (& xas , eb , end , PAGECACHE_TAG_DIRTY ) {
1907+ xas_set_mark (& xas , PAGECACHE_TAG_TOWRITE );
1908+ if (++ tagged % XA_CHECK_SCHED )
1909+ continue ;
1910+ xas_pause (& xas );
1911+ xas_unlock_irq (& xas );
1912+ cond_resched ();
1913+ xas_lock_irq (& xas );
1914+ }
1915+ xas_unlock_irq (& xas );
1916+ }
1917+
1918+ struct eb_batch {
1919+ unsigned int nr ;
1920+ unsigned int cur ;
1921+ struct extent_buffer * ebs [PAGEVEC_SIZE ];
1922+ };
1923+
1924+ static inline bool eb_batch_add (struct eb_batch * batch , struct extent_buffer * eb )
1925+ {
1926+ batch -> ebs [batch -> nr ++ ] = eb ;
1927+ return (batch -> nr < PAGEVEC_SIZE );
1928+ }
1929+
1930+ static inline void eb_batch_init (struct eb_batch * batch )
1931+ {
1932+ batch -> nr = 0 ;
1933+ batch -> cur = 0 ;
1934+ }
1935+
1936+ static inline struct extent_buffer * eb_batch_next (struct eb_batch * batch )
1937+ {
1938+ if (batch -> cur >= batch -> nr )
1939+ return NULL ;
1940+ return batch -> ebs [batch -> cur ++ ];
1941+ }
1942+
1943+ static inline void eb_batch_release (struct eb_batch * batch )
1944+ {
1945+ for (unsigned int i = 0 ; i < batch -> nr ; i ++ )
1946+ free_extent_buffer (batch -> ebs [i ]);
1947+ eb_batch_init (batch );
1948+ }
1949+
1950+ static inline struct extent_buffer * find_get_eb (struct xa_state * xas , unsigned long max ,
1951+ xa_mark_t mark )
1952+ {
1953+ struct extent_buffer * eb ;
1954+
1955+ retry :
1956+ eb = xas_find_marked (xas , max , mark );
1957+
1958+ if (xas_retry (xas , eb ))
1959+ goto retry ;
1960+
1961+ if (!eb )
1962+ return NULL ;
1963+
1964+ if (!atomic_inc_not_zero (& eb -> refs )) {
1965+ xas_reset (xas );
1966+ goto retry ;
1967+ }
1968+
1969+ if (unlikely (eb != xas_reload (xas ))) {
1970+ free_extent_buffer (eb );
1971+ xas_reset (xas );
1972+ goto retry ;
1973+ }
1974+
1975+ return eb ;
1976+ }
1977+
1978+ static unsigned int buffer_tree_get_ebs_tag (struct btrfs_fs_info * fs_info ,
1979+ unsigned long * start ,
1980+ unsigned long end , xa_mark_t tag ,
1981+ struct eb_batch * batch )
1982+ {
1983+ XA_STATE (xas , & fs_info -> buffer_tree , * start );
1984+ struct extent_buffer * eb ;
1985+
1986+ rcu_read_lock ();
1987+ while ((eb = find_get_eb (& xas , end , tag )) != NULL ) {
1988+ if (!eb_batch_add (batch , eb )) {
1989+ * start = ((eb -> start + eb -> len ) >> fs_info -> sectorsize_bits );
1990+ goto out ;
1991+ }
1992+ }
1993+ if (end == ULONG_MAX )
1994+ * start = ULONG_MAX ;
1995+ else
1996+ * start = end + 1 ;
1997+ out :
1998+ rcu_read_unlock ();
1999+
2000+ return batch -> nr ;
2001+ }
2002+
18982003/*
18992004 * The endio specific version which won't touch any unsafe spinlock in endio
19002005 * context.
@@ -1997,163 +2102,36 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
19972102}
19982103
19992104/*
2000- * Submit one subpage btree page .
2105+ * Wait for all eb writeback in the given range to finish .
20012106 *
2002- * The main difference to submit_eb_page() is:
2003- * - Page locking
2004- * For subpage, we don't rely on page locking at all.
2005- *
2006- * - Flush write bio
2007- * We only flush bio if we may be unable to fit current extent buffers into
2008- * current bio.
2009- *
2010- * Return >=0 for the number of submitted extent buffers.
2011- * Return <0 for fatal error.
2107+ * @fs_info: The fs_info for this file system.
2108+ * @start: The offset of the range to start waiting on writeback.
2109+ * @end: The end of the range, inclusive. This is meant to be used in
2110+ * conjuction with wait_marked_extents, so this will usually be
2111+ * the_next_eb->start - 1.
20122112 */
2013- static int submit_eb_subpage (struct folio * folio , struct writeback_control * wbc )
2113+ void btrfs_btree_wait_writeback_range (struct btrfs_fs_info * fs_info , u64 start ,
2114+ u64 end )
20142115{
2015- struct btrfs_fs_info * fs_info = folio_to_fs_info (folio );
2016- int submitted = 0 ;
2017- u64 folio_start = folio_pos (folio );
2018- int bit_start = 0 ;
2019- int sectors_per_node = fs_info -> nodesize >> fs_info -> sectorsize_bits ;
2020- const unsigned int blocks_per_folio = btrfs_blocks_per_folio (fs_info , folio );
2116+ struct eb_batch batch ;
2117+ unsigned long start_index = (start >> fs_info -> sectorsize_bits );
2118+ unsigned long end_index = (end >> fs_info -> sectorsize_bits );
20212119
2022- /* Lock and write each dirty extent buffers in the range */
2023- while (bit_start < blocks_per_folio ) {
2024- struct btrfs_subpage * subpage = folio_get_private (folio );
2120+ eb_batch_init (& batch );
2121+ while (start_index <= end_index ) {
20252122 struct extent_buffer * eb ;
2026- unsigned long flags ;
2027- u64 start ;
2123+ unsigned int nr_ebs ;
20282124
2029- /*
2030- * Take private lock to ensure the subpage won't be detached
2031- * in the meantime.
2032- */
2033- spin_lock (& folio -> mapping -> i_private_lock );
2034- if (!folio_test_private (folio )) {
2035- spin_unlock (& folio -> mapping -> i_private_lock );
2125+ nr_ebs = buffer_tree_get_ebs_tag (fs_info , & start_index , end_index ,
2126+ PAGECACHE_TAG_WRITEBACK , & batch );
2127+ if (!nr_ebs )
20362128 break ;
2037- }
2038- spin_lock_irqsave (& subpage -> lock , flags );
2039- if (!test_bit (bit_start + btrfs_bitmap_nr_dirty * blocks_per_folio ,
2040- subpage -> bitmaps )) {
2041- spin_unlock_irqrestore (& subpage -> lock , flags );
2042- spin_unlock (& folio -> mapping -> i_private_lock );
2043- bit_start += sectors_per_node ;
2044- continue ;
2045- }
2046-
2047- start = folio_start + bit_start * fs_info -> sectorsize ;
2048- bit_start += sectors_per_node ;
2049-
2050- /*
2051- * Here we just want to grab the eb without touching extra
2052- * spin locks, so call find_extent_buffer_nolock().
2053- */
2054- eb = find_extent_buffer_nolock (fs_info , start );
2055- spin_unlock_irqrestore (& subpage -> lock , flags );
2056- spin_unlock (& folio -> mapping -> i_private_lock );
2057-
2058- /*
2059- * The eb has already reached 0 refs thus find_extent_buffer()
2060- * doesn't return it. We don't need to write back such eb
2061- * anyway.
2062- */
2063- if (!eb )
2064- continue ;
2065-
2066- if (lock_extent_buffer_for_io (eb , wbc )) {
2067- write_one_eb (eb , wbc );
2068- submitted ++ ;
2069- }
2070- free_extent_buffer (eb );
2071- }
2072- return submitted ;
2073- }
2074-
2075- /*
2076- * Submit all page(s) of one extent buffer.
2077- *
2078- * @page: the page of one extent buffer
2079- * @eb_context: to determine if we need to submit this page, if current page
2080- * belongs to this eb, we don't need to submit
2081- *
2082- * The caller should pass each page in their bytenr order, and here we use
2083- * @eb_context to determine if we have submitted pages of one extent buffer.
2084- *
2085- * If we have, we just skip until we hit a new page that doesn't belong to
2086- * current @eb_context.
2087- *
2088- * If not, we submit all the page(s) of the extent buffer.
2089- *
2090- * Return >0 if we have submitted the extent buffer successfully.
2091- * Return 0 if we don't need to submit the page, as it's already submitted by
2092- * previous call.
2093- * Return <0 for fatal error.
2094- */
2095- static int submit_eb_page (struct folio * folio , struct btrfs_eb_write_context * ctx )
2096- {
2097- struct writeback_control * wbc = ctx -> wbc ;
2098- struct address_space * mapping = folio -> mapping ;
2099- struct extent_buffer * eb ;
2100- int ret ;
2101-
2102- if (!folio_test_private (folio ))
2103- return 0 ;
2104-
2105- if (btrfs_meta_is_subpage (folio_to_fs_info (folio )))
2106- return submit_eb_subpage (folio , wbc );
2107-
2108- spin_lock (& mapping -> i_private_lock );
2109- if (!folio_test_private (folio )) {
2110- spin_unlock (& mapping -> i_private_lock );
2111- return 0 ;
2112- }
2113-
2114- eb = folio_get_private (folio );
2115-
2116- /*
2117- * Shouldn't happen and normally this would be a BUG_ON but no point
2118- * crashing the machine for something we can survive anyway.
2119- */
2120- if (WARN_ON (!eb )) {
2121- spin_unlock (& mapping -> i_private_lock );
2122- return 0 ;
2123- }
2124-
2125- if (eb == ctx -> eb ) {
2126- spin_unlock (& mapping -> i_private_lock );
2127- return 0 ;
2128- }
2129- ret = atomic_inc_not_zero (& eb -> refs );
2130- spin_unlock (& mapping -> i_private_lock );
2131- if (!ret )
2132- return 0 ;
21332129
2134- ctx -> eb = eb ;
2135-
2136- ret = btrfs_check_meta_write_pointer (eb -> fs_info , ctx );
2137- if (ret ) {
2138- if (ret == - EBUSY )
2139- ret = 0 ;
2140- free_extent_buffer (eb );
2141- return ret ;
2142- }
2143-
2144- if (!lock_extent_buffer_for_io (eb , wbc )) {
2145- free_extent_buffer (eb );
2146- return 0 ;
2147- }
2148- /* Implies write in zoned mode. */
2149- if (ctx -> zoned_bg ) {
2150- /* Mark the last eb in the block group. */
2151- btrfs_schedule_zone_finish_bg (ctx -> zoned_bg , eb );
2152- ctx -> zoned_bg -> meta_write_pointer += eb -> len ;
2130+ while ((eb = eb_batch_next (& batch )) != NULL )
2131+ wait_on_extent_buffer_writeback (eb );
2132+ eb_batch_release (& batch );
2133+ cond_resched ();
21532134 }
2154- write_one_eb (eb , wbc );
2155- free_extent_buffer (eb );
2156- return 1 ;
21572135}
21582136
21592137int btree_write_cache_pages (struct address_space * mapping ,
@@ -2164,25 +2142,27 @@ int btree_write_cache_pages(struct address_space *mapping,
21642142 int ret = 0 ;
21652143 int done = 0 ;
21662144 int nr_to_write_done = 0 ;
2167- struct folio_batch fbatch ;
2168- unsigned int nr_folios ;
2169- pgoff_t index ;
2170- pgoff_t end ; /* Inclusive */
2145+ struct eb_batch batch ;
2146+ unsigned int nr_ebs ;
2147+ unsigned long index ;
2148+ unsigned long end ;
21712149 int scanned = 0 ;
21722150 xa_mark_t tag ;
21732151
2174- folio_batch_init ( & fbatch );
2152+ eb_batch_init ( & batch );
21752153 if (wbc -> range_cyclic ) {
2176- index = mapping -> writeback_index ; /* Start from prev offset */
2154+ index = (( mapping -> writeback_index << PAGE_SHIFT ) >> fs_info -> sectorsize_bits );
21772155 end = -1 ;
2156+
21782157 /*
21792158 * Start from the beginning does not need to cycle over the
21802159 * range, mark it as scanned.
21812160 */
21822161 scanned = (index == 0 );
21832162 } else {
2184- index = wbc -> range_start >> PAGE_SHIFT ;
2185- end = wbc -> range_end >> PAGE_SHIFT ;
2163+ index = (wbc -> range_start >> fs_info -> sectorsize_bits );
2164+ end = (wbc -> range_end >> fs_info -> sectorsize_bits );
2165+
21862166 scanned = 1 ;
21872167 }
21882168 if (wbc -> sync_mode == WB_SYNC_ALL )
@@ -2192,31 +2172,40 @@ int btree_write_cache_pages(struct address_space *mapping,
21922172 btrfs_zoned_meta_io_lock (fs_info );
21932173retry :
21942174 if (wbc -> sync_mode == WB_SYNC_ALL )
2195- tag_pages_for_writeback ( mapping , index , end );
2175+ buffer_tree_tag_for_writeback ( fs_info , index , end );
21962176 while (!done && !nr_to_write_done && (index <= end ) &&
2197- (nr_folios = filemap_get_folios_tag (mapping , & index , end ,
2198- tag , & fbatch ))) {
2199- unsigned i ;
2177+ (nr_ebs = buffer_tree_get_ebs_tag (fs_info , & index , end , tag , & batch ))) {
2178+ struct extent_buffer * eb ;
22002179
2201- for (i = 0 ; i < nr_folios ; i ++ ) {
2202- struct folio * folio = fbatch .folios [i ];
2180+ while ((eb = eb_batch_next (& batch )) != NULL ) {
2181+ ctx .eb = eb ;
2182+
2183+ ret = btrfs_check_meta_write_pointer (eb -> fs_info , & ctx );
2184+ if (ret ) {
2185+ if (ret == - EBUSY )
2186+ ret = 0 ;
22032187
2204- ret = submit_eb_page (folio , & ctx );
2205- if (ret == 0 )
2188+ if (ret ) {
2189+ done = 1 ;
2190+ break ;
2191+ }
2192+ free_extent_buffer (eb );
22062193 continue ;
2207- if (ret < 0 ) {
2208- done = 1 ;
2209- break ;
22102194 }
22112195
2212- /*
2213- * the filesystem may choose to bump up nr_to_write.
2214- * We have to make sure to honor the new nr_to_write
2215- * at any time
2216- */
2217- nr_to_write_done = wbc -> nr_to_write <= 0 ;
2196+ if (!lock_extent_buffer_for_io (eb , wbc ))
2197+ continue ;
2198+
2199+ /* Implies write in zoned mode. */
2200+ if (ctx .zoned_bg ) {
2201+ /* Mark the last eb in the block group. */
2202+ btrfs_schedule_zone_finish_bg (ctx .zoned_bg , eb );
2203+ ctx .zoned_bg -> meta_write_pointer += eb -> len ;
2204+ }
2205+ write_one_eb (eb , wbc );
22182206 }
2219- folio_batch_release (& fbatch );
2207+ nr_to_write_done = (wbc -> nr_to_write <= 0 );
2208+ eb_batch_release (& batch );
22202209 cond_resched ();
22212210 }
22222211 if (!scanned && !done ) {
0 commit comments