@@ -45,7 +45,7 @@ struct bucket {
4545};
4646
4747/**
48- * calc_shannon_entropy () - Compute Shannon entropy of the sampled data.
48+ * has_low_entropy () - Compute Shannon entropy of the sampled data.
4949 * @bkt: Bytes counts of the sample.
5050 * @slen: Size of the sample.
5151 *
@@ -60,7 +60,7 @@ struct bucket {
6060 * Also Shannon entropy is the last computed heuristic; if we got this far and ended up
6161 * with uncertainty, just stay on the safe side and call it uncompressible.
6262 */
63- static bool calc_shannon_entropy (struct bucket * bkt , size_t slen )
63+ static bool has_low_entropy (struct bucket * bkt , size_t slen )
6464{
6565 const size_t threshold = 65 , max_entropy = 8 * ilog2 (16 );
6666 size_t i , p , p2 , len , sum = 0 ;
@@ -79,17 +79,21 @@ static bool calc_shannon_entropy(struct bucket *bkt, size_t slen)
7979 return ((sum * 100 / max_entropy ) <= threshold );
8080}
8181
82+ #define BYTE_DIST_BAD 0
83+ #define BYTE_DIST_GOOD 1
84+ #define BYTE_DIST_MAYBE 2
8285/**
8386 * calc_byte_distribution() - Compute byte distribution on the sampled data.
8487 * @bkt: Byte counts of the sample.
8588 * @slen: Size of the sample.
8689 *
8790 * Return:
88- * 1: High probability (normal (Gaussian) distribution) of the data being compressible.
89- * 0: A "hard no" for compression -- either a computed uniform distribution of the bytes (e.g.
90- * random or encrypted data), or calc_shannon_entropy() returned false (see above).
91- * 2: When computed byte distribution resulted in "low > n < high" grounds.
92- * calc_shannon_entropy() should be used for a final decision.
91+ * BYTE_DIST_BAD: A "hard no" for compression -- a computed uniform distribution of
92+ * the bytes (e.g. random or encrypted data).
93+ * BYTE_DIST_GOOD: High probability (normal (Gaussian) distribution) of the data being
94+ * compressible.
95+ * BYTE_DIST_MAYBE: When computed byte distribution resulted in "low > n < high"
96+ * grounds. has_low_entropy() should be used for a final decision.
9397 */
9498static int calc_byte_distribution (struct bucket * bkt , size_t slen )
9599{
@@ -101,7 +105,7 @@ static int calc_byte_distribution(struct bucket *bkt, size_t slen)
101105 sum += bkt [i ].count ;
102106
103107 if (sum > threshold )
104- return i ;
108+ return BYTE_DIST_BAD ;
105109
106110 for (; i < high && bkt [i ].count > 0 ; i ++ ) {
107111 sum += bkt [i ].count ;
@@ -110,36 +114,29 @@ static int calc_byte_distribution(struct bucket *bkt, size_t slen)
110114 }
111115
112116 if (i <= low )
113- return 1 ;
117+ return BYTE_DIST_GOOD ;
114118
115119 if (i >= high )
116- return 0 ;
120+ return BYTE_DIST_BAD ;
117121
118- return 2 ;
122+ return BYTE_DIST_MAYBE ;
119123}
120124
121- static bool check_ascii_bytes (const struct bucket * bkt )
125+ static bool is_mostly_ascii (const struct bucket * bkt )
122126{
123- const size_t threshold = 64 ;
124127 size_t count = 0 ;
125128 int i ;
126129
127- for (i = 0 ; i < threshold ; i ++ )
130+ for (i = 0 ; i < 256 ; i ++ )
128131 if (bkt [i ].count > 0 )
129- count ++ ;
132+ /* Too many non-ASCII (0-63) bytes. */
133+ if (++ count > 64 )
134+ return false;
130135
131- for (; i < 256 ; i ++ ) {
132- if (bkt [i ].count > 0 ) {
133- count ++ ;
134- if (count > threshold )
135- break ;
136- }
137- }
138-
139- return (count < threshold );
136+ return true;
140137}
141138
142- static bool check_repeated_data (const u8 * sample , size_t len )
139+ static bool has_repeated_data (const u8 * sample , size_t len )
143140{
144141 size_t s = len / 2 ;
145142
@@ -222,71 +219,79 @@ static int collect_sample(const struct iov_iter *iter, ssize_t max, u8 *sample)
222219 * is_compressible() - Determines if a chunk of data is compressible.
223220 * @data: Iterator containing uncompressed data.
224221 *
225- * Return:
226- * 0: @data is not compressible
227- * 1: @data is compressible
228- * -ENOMEM: failed to allocate memory for sample buffer
222+ * Return: true if @data is compressible, false otherwise.
229223 *
230224 * Tests shows that this function is quite reliable in predicting data compressibility,
231225 * matching close to 1:1 with the behaviour of LZ77 compression success and failures.
232226 */
233- static int is_compressible (const struct iov_iter * data )
227+ static bool is_compressible (const struct iov_iter * data )
234228{
235229 const size_t read_size = SZ_2K , bkt_size = 256 , max = SZ_4M ;
236230 struct bucket * bkt = NULL ;
237- int i = 0 , ret = 0 ;
238231 size_t len ;
239232 u8 * sample ;
233+ bool ret = false;
234+ int i ;
240235
236+ /* Preventive double check -- already checked in should_compress(). */
241237 len = iov_iter_count (data );
242- if (len < read_size )
243- return 0 ;
238+ if (unlikely ( len < read_size ) )
239+ return ret ;
244240
245241 if (len - read_size > max )
246242 len = max ;
247243
248244 sample = kvzalloc (len , GFP_KERNEL );
249- if (!sample )
250- return - ENOMEM ;
245+ if (!sample ) {
246+ WARN_ON_ONCE (1 );
247+
248+ return ret ;
249+ }
251250
252251 /* Sample 2K bytes per page of the uncompressed data. */
253- ret = collect_sample (data , len , sample );
254- if (ret < 0 )
252+ i = collect_sample (data , len , sample );
253+ if (i <= 0 ) {
254+ WARN_ON_ONCE (1 );
255+
255256 goto out ;
257+ }
256258
257- len = ret ;
258- ret = 1 ;
259+ len = i ;
260+ ret = true ;
259261
260- if (check_repeated_data (sample , len ))
262+ if (has_repeated_data (sample , len ))
261263 goto out ;
262264
263265 bkt = kcalloc (bkt_size , sizeof (* bkt ), GFP_KERNEL );
264266 if (!bkt ) {
265- kvfree (sample );
266- return - ENOMEM ;
267+ WARN_ON_ONCE (1 );
268+ ret = false;
269+
270+ goto out ;
267271 }
268272
269273 for (i = 0 ; i < len ; i ++ )
270274 bkt [sample [i ]].count ++ ;
271275
272- if (check_ascii_bytes (bkt ))
276+ if (is_mostly_ascii (bkt ))
273277 goto out ;
274278
275279 /* Sort in descending order */
276280 sort (bkt , bkt_size , sizeof (* bkt ), cmp_bkt , NULL );
277281
278- ret = calc_byte_distribution (bkt , len );
279- if (ret != 2 )
282+ i = calc_byte_distribution (bkt , len );
283+ if (i != BYTE_DIST_MAYBE ) {
284+ ret = !!i ;
285+
280286 goto out ;
287+ }
281288
282- ret = calc_shannon_entropy (bkt , len );
289+ ret = has_low_entropy (bkt , len );
283290out :
284291 kvfree (sample );
285292 kfree (bkt );
286293
287- WARN (ret < 0 , "%s: ret=%d\n" , __func__ , ret );
288-
289- return !!ret ;
294+ return ret ;
290295}
291296
292297bool should_compress (const struct cifs_tcon * tcon , const struct smb_rqst * rq )
@@ -305,7 +310,7 @@ bool should_compress(const struct cifs_tcon *tcon, const struct smb_rqst *rq)
305310 if (shdr -> Command == SMB2_WRITE ) {
306311 const struct smb2_write_req * wreq = rq -> rq_iov -> iov_base ;
307312
308- if (wreq -> Length < SMB_COMPRESS_MIN_LEN )
313+ if (le32_to_cpu ( wreq -> Length ) < SMB_COMPRESS_MIN_LEN )
309314 return false;
310315
311316 return is_compressible (& rq -> rq_iter );
0 commit comments