/
utils.py
427 lines (354 loc) · 13.9 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
# Copyright (C) 2015-2020 Chris Lalancette <clalancette@gmail.com>
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation;
# version 2.1 of the License.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''
Various utilities for PyCdlib.
'''
from __future__ import absolute_import
try:
import cStringIO # pylint: disable=import-error
except ImportError:
pass
import io
import os
import re
import sys
import time
from pycdlib import pycdlibexception
# For mypy annotations
if False: # pylint: disable=using-constant-test
from typing import BinaryIO, List, Tuple # NOQA pylint: disable=unused-import
def swab_32bit(x):
# type: (int) -> int
'''
A function to swab a 32-bit integer.
Parameters:
x - The 32-bit integer to swab.
Returns:
The swabbed version of the 32-bit integer.
'''
if x > (((1 << 32) - 1) & 0xFFFFFFFF) or x < 0:
raise pycdlibexception.PyCdlibInternalError('Invalid integer passed to swab; must be unsigned 32-bits!')
return ((x << 24) & 0xFF000000) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | ((x >> 24) & 0x000000FF)
def swab_16bit(x):
# type: (int) -> int
'''
A function to swab a 16-bit integer.
Parameters:
x - The 16-bit integer to swab.
Returns:
The swabbed version of the 16-bit integer.
'''
if x > (((1 << 16) - 1) & 0xFFFFFFFF) or x < 0:
raise pycdlibexception.PyCdlibInternalError('Invalid integer passed to swab; must be unsigned 16-bits!')
return ((x << 8) & 0xFF00) | ((x >> 8) & 0x00FF)
def ceiling_div(numer, denom):
# type: (int, int) -> int
'''
A function to do ceiling division; that is, dividing numerator by denominator
and taking the ceiling.
Parameters:
numer - The numerator for the division.
denom - The denominator for the division.
Returns:
The ceiling after dividing numerator by denominator.
'''
# Doing division and then getting the ceiling is tricky; we do upside-down
# floor division to make this happen.
# See https://stackoverflow.com/questions/14822184/is-there-a-ceiling-equivalent-of-operator-in-python.
return -(-numer // denom)
def copy_data(data_length, blocksize, infp, outfp):
# type: (int, int, BinaryIO, BinaryIO) -> None
'''
A utility function to copy data from the input file object to the output
file object.
Parameters:
data_length - The amount of data to copy.
blocksize - How much data to copy per iteration.
infp - The file object to copy data from.
outfp - The file object to copy data to.
Returns:
Nothing.
'''
left = data_length
readsize = blocksize
while left > 0:
if left < readsize:
readsize = left
data = infp.read(readsize)
# We have seen ISOs in the wild (Tribes Vengeance 1of4.iso) that
# lie about the size of their files, causing reads to fail (since
# we hit EOF before the supposed end of the file). If we got less data
# than we asked for, abort the loop silently.
data_len = len(data)
if data_len != readsize:
data_len = left
outfp.write(data)
left -= data_len
def encode_space_pad(instr, length, encoding):
# type: (bytes, int, str) -> bytes
'''
A function to pad out an input string with spaces to the length specified.
The space is first encoded into the specified encoding, then appended to
the input string until the length is reached.
Parameters:
instr - The input string to encode and pad.
length - The length to pad the input string to.
encoding - The encoding to use.
Returns:
The input string encoded in the encoding and padded with encoded spaces.
'''
output = instr.decode('utf-8').encode(encoding)
if len(output) > length:
raise pycdlibexception.PyCdlibInvalidInput('Input string too long!')
encoded_space = ' '.encode(encoding)
left = length - len(output)
while left > 0:
output += encoded_space
left -= len(encoded_space)
if left < 0:
output = output[:left]
return output
def normpath(path):
# type: (str) -> bytes
'''
Normalize the given path, eliminating double slashes, etc. This function is
a copy of the built-in python normpath, except we do *not* allow double
slashes at the start.
Parameters:
path - The path to normalize.
Returns:
The normalized path.
'''
sep = '/'
empty = ''
dot = '.'
dotdot = '..'
if path == empty:
return dot.encode('utf-8')
initial_slashes = path.startswith(sep)
comps = path.split(sep)
new_comps = [] # type: List[str]
for comp in comps:
if comp in (empty, dot):
continue
if comp != dotdot or (not initial_slashes and not new_comps) or (new_comps and new_comps[-1] == dotdot):
new_comps.append(comp)
elif new_comps:
new_comps.pop()
newpath = sep * initial_slashes + sep.join(new_comps)
if sys.version_info >= (3, 0):
newpath_bytes = newpath.encode('utf-8')
else:
newpath_bytes = newpath.decode('utf-8').encode('utf-8')
if not starts_with_slash(newpath_bytes):
raise pycdlibexception.PyCdlibInvalidInput('Must be a path starting with /')
return newpath_bytes
def gmtoffset_from_tm(tm, local):
# type: (float, time.struct_time) -> int
'''
A function to compute the GMT offset from the time in seconds since the epoch
and the local time object.
Parameters:
tm - The time in seconds since the epoch.
local - The struct_time object representing the local time.
Returns:
The gmtoffset.
'''
gmtime = time.gmtime(tm)
tmpyear = gmtime.tm_year - local.tm_year
tmpyday = gmtime.tm_yday - local.tm_yday
tmphour = gmtime.tm_hour - local.tm_hour
tmpmin = gmtime.tm_min - local.tm_min
if tmpyday < 0:
tmpyday = -1
else:
if tmpyear > 0:
tmpyday = 1
return -(tmpmin + 60 * (tmphour + 24 * tmpyday)) // 15
def zero_pad(fp, data_size, pad_size):
# type: (BinaryIO, int, int) -> None
'''
A function to write padding out from data_size up to pad_size
efficiently.
Parameters:
fp - The file object to use to write padding out to.
data_size - The current size of the data.
pad_size - The boundary size of data to pad out to.
Returns:
Nothing.
'''
padbytes = pad_size - (data_size % pad_size)
if padbytes == pad_size:
# Nothing to pad, get out.
return
fp.seek(padbytes - 1, os.SEEK_CUR)
fp.write(b'\x00')
def starts_with_slash(path):
# type: (bytes) -> bool
'''
A function to determine if a path starts with a slash. This is somewhat
difficult to do portably between Python2 and Python3 and with performance,
so we have a dedicated function for it.
Parameters:
path - The path to determine if it starts with a slash
Returns:
Whether the path starts with a slash.
'''
return bytearray(path)[0] == 47
def split_path(iso_path):
# type: (bytes) -> List[bytes]
'''
A function to take a fully-qualified iso path and split it into components.
Parameters:
iso_path - The path to split.
Returns:
The components of the path as a list.
'''
if not starts_with_slash(iso_path):
raise pycdlibexception.PyCdlibInvalidInput('Must be a path starting with /')
# Split the path along the slashes. Since our paths are always absolute,
# the front is blank.
return iso_path.split(b'/')[1:]
def file_object_supports_binary(fp):
# type: (BinaryIO) -> bool
'''
A function to check whether a file-like object supports binary mode.
Parameters:
fp - The file-like object to check for binary mode support.
Returns:
True if the file-like object supports binary mode, False otherwise.
'''
if hasattr(fp, 'mode'):
return 'b' in fp.mode
# Python 3
if sys.version_info >= (3, 0):
return isinstance(fp, (io.RawIOBase, io.BufferedIOBase))
# Python 2
return isinstance(fp, (cStringIO.OutputType, cStringIO.InputType, io.RawIOBase, io.BufferedIOBase))
def truncate_basename(basename, iso_level, is_dir):
# type: (str, int, bool) -> str
'''
A function to truncate a basename and make it conformant to the passed-in
ISO interchange level.
Parameters:
basename - The initial basename to truncate and translate
iso_level - The ISO interchange level to follow when truncating/translating
is_dir - Whether this is a directory or a file
Returns:
The truncated and translated name suitable for the ISO interchange level
specified.
'''
if iso_level == 4:
# ISO level 4 allows "anything", so just return the original.
return basename
if iso_level == 1:
maxlen = 8
else:
maxlen = 31 if is_dir else 30
# For performance reasons, we first truncate the string to the length
# allowed. Second, ISO9660 Levels 1, 2, and 3 require all uppercase names,
# so we uppercase it.
valid_base = basename[:maxlen].upper()
# Finally, ISO9660 requires only uppercase letters, 0-9, and underscore.
# Translate any non-compliant characters to underscore and return that.
return re.sub('[^A-Z0-9_]{1}', r'_', valid_base)
def mangle_file_for_iso9660(orig, iso_level):
# type: (str, int) -> Tuple[str, str]
'''
A function to take a regular Unix-style filename (including extension) and
produce a tuple consisting of an ISO9660-valid basename and an ISO9660-valid
extension.
Parameters:
orig - The original filename
iso_level - The ISO interchange level to conform to
Returns:
A tuple where the first entry is the ISO9660-compliant basename and where
the second entry is the ISO9660-compliant extension.
'''
# ISO9660 has a lot of restrictions on what valid names are. Here, we mangle
# the names to conform to those rules. In particular, the rules for
# filenames are:
# 1. Filenames can only consist of d-characters or d1-characters; these are
# defined in the Appendix as: 0-9A-Z_
# 2. Filenames look like:
# - zero or more d-characters (filename)
# - separator 1 (.)
# - zero or more d-characters (extension)
# - separate 2 (;)
# - version, between 0 and 32767
# If the filename contains zero characters, then the extension must contain
# at least one character, and vice versa.
# 3. If this is iso level one, then the length of the filename cannot
# exceed 8 and the length of the extension cannot exceed 3. In levels 2
# and 3, the length of the filename+extension cannot exceed 30.
#
# This function takes any valid Unix filename and converts it into one that
# is allowed by the above rules. It does this by substituting _ for any
# invalid characters in the filename, and by shortening the name to a form
# of aaa_xxxx.eee;1 (if necessary). The aaa is always the first three
# characters of the original filename; the xxxx is the next number in a
# sequence starting from 0.
valid_ext = ''
splitter = orig.split('.')
if iso_level == 4:
# A level 4 ISO allows 'anything', so just return the original.
if len(splitter) == 1:
return orig, valid_ext
ext = splitter[-1]
return orig[:len(orig) - len(ext) - 1], ext
if len(splitter) == 1:
# No extension specified, leave ext empty
basename = orig
else:
ext = splitter[-1]
basename = orig[:len(orig) - len(ext) - 1]
# If the extension is empty, too long (> 3), or contains any illegal
# characters, we treat it as part of the basename instead
extlen = len(ext)
if extlen == 0 or extlen > 3:
valid_ext = ''
basename = orig
else:
tmpext = ext.upper()
valid_ext, numsub = re.subn('[^A-Z0-9_]{1}', r'_', tmpext)
if numsub > 0:
valid_ext = ''
basename = orig
# All right, now we have the basename of the file, and (optionally) an
# extension.
return truncate_basename(basename, iso_level, False), valid_ext + ';1'
def mangle_dir_for_iso9660(orig, iso_level):
# type: (str, int) -> str
'''
A function to take a regular Unix-style directory name and produce an
ISO9660-valid directory name.
Parameters:
orig - The original filename
iso_level - The ISO interchange level to conform to
Returns:
An ISO9660-compliant directory name.
'''
# ISO9660 has a lot of restrictions on what valid directory names are.
# Here, we mangle the names to conform to those rules. In particular, the
# rules for dirnames are:
# 1. Filenames can only consist of d-characters or d1-characters; these are
# defined in the Appendix as: 0-9A-Z_
# 2. If this is ISO level one, then directory names consist of no more than
# 8 characters
# This function takes any valid Unix directory name and converts it into one
# that is allowed by the above rules. It does this by substituting _ for
# any invalid character in the directory name, and by shortening the name to
# a form of aaaaxxx (if necessary). The aaa is always the first three
# characters of the original filename; the xxxx is the next number in a
# sequence starting from 0.
return truncate_basename(orig, iso_level, True)