@@ -112,65 +112,72 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
112112 return kaddr ? 1 : 0 ;
113113}
114114
115- static void * z_erofs_lz4_handle_overlap (struct z_erofs_decompress_req * rq ,
115+ static void * z_erofs_lz4_handle_overlap (const struct z_erofs_decompress_req * rq ,
116116 void * inpage , void * out , unsigned int * inputmargin ,
117117 int * maptype , bool may_inplace )
118118{
119- unsigned int oend , omargin , total , i ;
119+ unsigned int oend , omargin , cnt , i ;
120120 struct page * * in ;
121- void * src , * tmp ;
122-
123- if (rq -> inplace_io ) {
124- oend = rq -> pageofs_out + rq -> outputsize ;
125- omargin = PAGE_ALIGN (oend ) - oend ;
126- if (rq -> partial_decoding || !may_inplace ||
127- omargin < LZ4_DECOMPRESS_INPLACE_MARGIN (rq -> inputsize ))
128- goto docopy ;
121+ void * src ;
129122
123+ /*
124+ * If in-place I/O isn't used, for example, the bounce compressed cache
125+ * can hold data for incomplete read requests. Just map the compressed
126+ * buffer as well and decompress directly.
127+ */
128+ if (!rq -> inplace_io ) {
129+ if (rq -> inpages <= 1 ) {
130+ * maptype = 0 ;
131+ return inpage ;
132+ }
133+ kunmap_local (inpage );
134+ src = erofs_vm_map_ram (rq -> in , rq -> inpages );
135+ if (!src )
136+ return ERR_PTR (- ENOMEM );
137+ * maptype = 1 ;
138+ return src ;
139+ }
140+ /*
141+ * Then, deal with in-place I/Os. The reasons why in-place I/O is useful
142+ * are: (1) It minimizes memory footprint during the I/O submission,
143+ * which is useful for slow storage (including network devices and
144+ * low-end HDDs/eMMCs) but with a lot inflight I/Os; (2) If in-place
145+ * decompression can also be applied, it will reuse the unique buffer so
146+ * that no extra CPU D-cache is polluted with temporary compressed data
147+ * for extreme performance.
148+ */
149+ oend = rq -> pageofs_out + rq -> outputsize ;
150+ omargin = PAGE_ALIGN (oend ) - oend ;
151+ if (!rq -> partial_decoding && may_inplace &&
152+ omargin >= LZ4_DECOMPRESS_INPLACE_MARGIN (rq -> inputsize )) {
130153 for (i = 0 ; i < rq -> inpages ; ++ i )
131154 if (rq -> out [rq -> outpages - rq -> inpages + i ] !=
132155 rq -> in [i ])
133- goto docopy ;
134- kunmap_local (inpage );
135- * maptype = 3 ;
136- return out + ((rq -> outpages - rq -> inpages ) << PAGE_SHIFT );
137- }
138-
139- if (rq -> inpages <= 1 ) {
140- * maptype = 0 ;
141- return inpage ;
156+ break ;
157+ if (i >= rq -> inpages ) {
158+ kunmap_local (inpage );
159+ * maptype = 3 ;
160+ return out + ((rq -> outpages - rq -> inpages ) << PAGE_SHIFT );
161+ }
142162 }
143- kunmap_local (inpage );
144- src = erofs_vm_map_ram (rq -> in , rq -> inpages );
145- if (!src )
146- return ERR_PTR (- ENOMEM );
147- * maptype = 1 ;
148- return src ;
149-
150- docopy :
151- /* Or copy compressed data which can be overlapped to per-CPU buffer */
152- in = rq -> in ;
163+ /*
164+ * If in-place decompression can't be applied, copy compressed data that
165+ * may potentially overlap during decompression to a per-CPU buffer.
166+ */
153167 src = z_erofs_get_gbuf (rq -> inpages );
154168 if (!src ) {
155169 DBG_BUGON (1 );
156170 kunmap_local (inpage );
157171 return ERR_PTR (- EFAULT );
158172 }
159173
160- tmp = src ;
161- total = rq -> inputsize ;
162- while (total ) {
163- unsigned int page_copycnt =
164- min_t (unsigned int , total , PAGE_SIZE - * inputmargin );
165-
174+ for (i = 0 , in = rq -> in ; i < rq -> inputsize ; i += cnt , ++ in ) {
175+ cnt = min_t (u32 , rq -> inputsize - i , PAGE_SIZE - * inputmargin );
166176 if (!inpage )
167177 inpage = kmap_local_page (* in );
168- memcpy (tmp , inpage + * inputmargin , page_copycnt );
178+ memcpy (src + i , inpage + * inputmargin , cnt );
169179 kunmap_local (inpage );
170180 inpage = NULL ;
171- tmp += page_copycnt ;
172- total -= page_copycnt ;
173- ++ in ;
174181 * inputmargin = 0 ;
175182 }
176183 * maptype = 2 ;
0 commit comments