-
Notifications
You must be signed in to change notification settings - Fork 144
/
compress.py
171 lines (151 loc) · 4.98 KB
/
compress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import gzip
import os
import re
from io import BytesIO
try:
import brotli
brotli_installed = True
except ImportError:
brotli_installed = False
class Compressor(object):
# Extensions that it's not worth trying to compress
SKIP_COMPRESS_EXTENSIONS = (
# Images
"jpg",
"jpeg",
"png",
"gif",
"webp",
# Compressed files
"zip",
"gz",
"tgz",
"bz2",
"tbz",
"xz",
"br",
# Flash
"swf",
"flv",
# Fonts
"woff",
"woff2",
)
def __init__(
self, extensions=None, use_gzip=True, use_brotli=True, log=print, quiet=False
):
if extensions is None:
extensions = self.SKIP_COMPRESS_EXTENSIONS
self.extension_re = self.get_extension_re(extensions)
self.use_gzip = use_gzip
self.use_brotli = use_brotli and brotli_installed
if not quiet:
self.log = log
@staticmethod
def get_extension_re(extensions):
if not extensions:
return re.compile("^$")
else:
return re.compile(
r"\.({0})$".format("|".join(map(re.escape, extensions))), re.IGNORECASE
)
def should_compress(self, filename):
return not self.extension_re.search(filename)
def log(self, message):
pass
def compress(self, path):
with open(path, "rb") as f:
stat_result = os.fstat(f.fileno())
data = f.read()
size = len(data)
if self.use_brotli:
compressed = self.compress_brotli(data)
if self.is_compressed_effectively("Brotli", path, size, compressed):
yield self.write_data(path, compressed, ".br", stat_result)
else:
# If Brotli compression wasn't effective gzip won't be either
return
if self.use_gzip:
compressed = self.compress_gzip(data)
if self.is_compressed_effectively("Gzip", path, size, compressed):
yield self.write_data(path, compressed, ".gz", stat_result)
@staticmethod
def compress_gzip(data):
output = BytesIO()
# Explicitly set mtime to 0 so gzip content is fully determined
# by file content (0 = "no timestamp" according to gzip spec)
with gzip.GzipFile(
filename="", mode="wb", fileobj=output, compresslevel=9, mtime=0
) as gz_file:
gz_file.write(data)
return output.getvalue()
@staticmethod
def compress_brotli(data):
return brotli.compress(data)
def is_compressed_effectively(self, encoding_name, path, orig_size, data):
compressed_size = len(data)
if orig_size == 0:
is_effective = False
else:
ratio = compressed_size / orig_size
is_effective = ratio <= 0.95
if is_effective:
self.log(
"{0} compressed {1} ({2}K -> {3}K)".format(
encoding_name, path, orig_size // 1024, compressed_size // 1024
)
)
else:
self.log(
"Skipping {0} ({1} compression not effective)".format(
path, encoding_name
)
)
return is_effective
def write_data(self, path, data, suffix, stat_result):
filename = path + suffix
with open(filename, "wb") as f:
f.write(data)
os.utime(filename, (stat_result.st_atime, stat_result.st_mtime))
return filename
def main(root, **kwargs):
compressor = Compressor(**kwargs)
for dirpath, dirs, files in os.walk(root):
for filename in files:
if compressor.should_compress(filename):
path = os.path.join(dirpath, filename)
for compressed in compressor.compress(path):
pass
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Search for all files inside <root> *not* matching "
"<extensions> and produce compressed versions with "
"'.gz' and '.br' suffixes (as long as this results in a "
"smaller file)"
)
parser.add_argument(
"-q", "--quiet", help="Don't produce log output", action="store_true"
)
parser.add_argument(
"--no-gzip",
help="Don't produce gzip '.gz' files",
action="store_false",
dest="use_gzip",
)
parser.add_argument(
"--no-brotli",
help="Don't produce brotli '.br' files",
action="store_false",
dest="use_brotli",
)
parser.add_argument("root", help="Path root from which to search for files")
parser.add_argument(
"extensions",
nargs="*",
help="File extensions to exclude from compression "
"(default: {})".format(", ".join(Compressor.SKIP_COMPRESS_EXTENSIONS)),
default=Compressor.SKIP_COMPRESS_EXTENSIONS,
)
args = parser.parse_args()
main(**vars(args))