diff --git a/README.md b/README.md index 0fe102d6f..9130c370e 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ Format-specific Requirements `sudo pip3 install python-lzo` -- **Reading from Babylon BGL**: Python 3.4 or 3.5 is recommended +- **Reading from Babylon BGL**: Python 3.4 to 3.6 is recommended **Other Requirements for Mac OS X** diff --git a/pyglossary/plugin_lib/py36/__init__.py b/pyglossary/plugin_lib/py36/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pyglossary/plugin_lib/py36/gzip_no_crc.py b/pyglossary/plugin_lib/py36/gzip_no_crc.py new file mode 100644 index 000000000..30feb0fc0 --- /dev/null +++ b/pyglossary/plugin_lib/py36/gzip_no_crc.py @@ -0,0 +1,577 @@ +"""Functions that read and write gzipped files. + +The user of the file doesn't have to worry about the compression, +but random access is not allowed.""" + +# based on Andrew Kuchling's minigzip.py distributed with the zlib module + +import logging +log = logging.getLogger('root') + +import struct, sys, time, os +import zlib +import builtins +import io +import _compression + +__all__ = ["GzipFile", "open", "compress", "decompress"] + +FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 + +READ, WRITE = 1, 2 + +def open(filename, mode="rb", compresslevel=9, + encoding=None, errors=None, newline=None): + """Open a gzip-compressed file in binary or text mode. + + The filename argument can be an actual filename (a str or bytes object), or + an existing file object to read from or write to. + + The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for + binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is + "rb", and the default compresslevel is 9. + + For binary mode, this function is equivalent to the GzipFile constructor: + GzipFile(filename, mode, compresslevel). In this case, the encoding, errors + and newline arguments must not be provided. + + For text mode, a GzipFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + + """ + if "t" in mode: + if "b" in mode: + raise ValueError("Invalid mode: %r" % (mode,)) + else: + if encoding is not None: + raise ValueError("Argument 'encoding' not supported in binary mode") + if errors is not None: + raise ValueError("Argument 'errors' not supported in binary mode") + if newline is not None: + raise ValueError("Argument 'newline' not supported in binary mode") + + gz_mode = mode.replace("t", "") + if isinstance(filename, (str, bytes, os.PathLike)): + binary_file = GzipFile(filename, gz_mode, compresslevel) + elif hasattr(filename, "read") or hasattr(filename, "write"): + binary_file = GzipFile(None, gz_mode, compresslevel, filename) + else: + raise TypeError("filename must be a str or bytes object, or a file") + + if "t" in mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file + +def write32u(output, value): + # The L format writes the bit pattern correctly whether signed + # or unsigned. + output.write(struct.pack("' + + def _init_write(self, filename): + self.name = filename + self.crc = zlib.crc32(b"") + self.size = 0 + self.writebuf = [] + self.bufsize = 0 + self.offset = 0 # Current file offset for seek(), tell(), etc + + def _write_gzip_header(self): + self.fileobj.write(b'\037\213') # magic header + self.fileobj.write(b'\010') # compression method + try: + # RFC 1952 requires the FNAME field to be Latin-1. Do not + # include filenames that cannot be represented that way. + fname = os.path.basename(self.name) + if not isinstance(fname, bytes): + fname = fname.encode('latin-1') + if fname.endswith(b'.gz'): + fname = fname[:-3] + except UnicodeEncodeError: + fname = b'' + flags = 0 + if fname: + flags = FNAME + self.fileobj.write(chr(flags).encode('latin-1')) + mtime = self._write_mtime + if mtime is None: + mtime = time.time() + write32u(self.fileobj, int(mtime)) + self.fileobj.write(b'\002') + self.fileobj.write(b'\377') + if fname: + self.fileobj.write(fname + b'\000') + + def write(self,data): + self._check_not_closed() + if self.mode != WRITE: + import errno + raise OSError(errno.EBADF, "write() on read-only GzipFile object") + + if self.fileobj is None: + raise ValueError("write() on closed GzipFile object") + + if isinstance(data, bytes): + length = len(data) + else: + # accept any data that supports the buffer protocol + data = memoryview(data) + length = data.nbytes + + if length > 0: + self.fileobj.write(self.compress.compress(data)) + self.size += length + self.crc = zlib.crc32(data, self.crc) + self.offset += length + + return length + + def read(self, size=-1): + self._check_not_closed() + if self.mode != READ: + import errno + raise OSError(errno.EBADF, "read() on write-only GzipFile object") + return self._buffer.read(size) + + def read1(self, size=-1): + """Implements BufferedIOBase.read1() + + Reads up to a buffer's worth of data is size is negative.""" + self._check_not_closed() + if self.mode != READ: + import errno + raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + + if size < 0: + size = io.DEFAULT_BUFFER_SIZE + return self._buffer.read1(size) + + def peek(self, n): + self._check_not_closed() + if self.mode != READ: + import errno + raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + return self._buffer.peek(n) + + @property + def closed(self): + return self.fileobj is None + + def close(self): + fileobj = self.fileobj + if fileobj is None: + return + self.fileobj = None + try: + if self.mode == WRITE: + fileobj.write(self.compress.flush()) + write32u(fileobj, self.crc) + # self.size may exceed 2GB, or even 4GB + write32u(fileobj, self.size & 0xffffffff) + elif self.mode == READ: + self._buffer.close() + finally: + myfileobj = self.myfileobj + if myfileobj: + self.myfileobj = None + myfileobj.close() + + def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): + self._check_not_closed() + if self.mode == WRITE: + # Ensure the compressor's buffer is flushed + self.fileobj.write(self.compress.flush(zlib_mode)) + self.fileobj.flush() + + def fileno(self): + """Invoke the underlying file object's fileno() method. + + This will raise AttributeError if the underlying file object + doesn't support fileno(). + """ + return self.fileobj.fileno() + + def rewind(self): + '''Return the uncompressed stream file position indicator to the + beginning of the file''' + if self.mode != READ: + raise OSError("Can't rewind in write mode") + self._buffer.seek(0) + + def readable(self): + return self.mode == READ + + def writable(self): + return self.mode == WRITE + + def seekable(self): + return True + + def seek(self, offset, whence=io.SEEK_SET): + if self.mode == WRITE: + if whence != io.SEEK_SET: + if whence == io.SEEK_CUR: + offset = self.offset + offset + else: + raise ValueError('Seek from end not supported') + if offset < self.offset: + raise OSError('Negative seek in write mode') + count = offset - self.offset + chunk = b'\0' * 1024 + for i in range(count // 1024): + self.write(chunk) + self.write(b'\0' * (count % 1024)) + elif self.mode == READ: + self._check_not_closed() + return self._buffer.seek(offset, whence) + + return self.offset + + def readline(self, size=-1): + self._check_not_closed() + return self._buffer.readline(size) + + +class _GzipReader(_compression.DecompressReader): + def __init__(self, fp): + super().__init__(_PaddedFile(fp), zlib.decompressobj, + wbits=-zlib.MAX_WBITS) + # Set flag indicating start of a new member + self._new_member = True + self._last_mtime = None + + def _init_read(self): + self._crc = zlib.crc32(b"") + self._stream_size = 0 # Decompressed size of unconcatenated stream + + def _read_exact(self, n): + '''Read exactly *n* bytes from `self._fp` + + This method is required because self._fp may be unbuffered, + i.e. return short reads. + ''' + + data = self._fp.read(n) + while len(data) < n: + b = self._fp.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + + def _read_gzip_header(self): + magic = self._fp.read(2) + if magic == b'': + return False + + if magic != b'\037\213': + raise OSError('Not a gzipped file (%r)' % magic) + + (method, flag, + self._last_mtime) = struct.unpack("