From 09d6574c57f64e96dc360bd67af2bdad418fc97f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sun, 17 May 2020 20:08:26 +0100 Subject: [PATCH 01/15] add files.GetContentIOBuffer Fixes #38 --- pydrive2/files.py | 95 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 92 insertions(+), 3 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 49ceb454..be58b42a 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -5,6 +5,7 @@ from googleapiclient import errors from googleapiclient.http import MediaIoBaseUpload from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.http import DEFAULT_CHUNK_SIZE from functools import wraps from .apiattr import ApiAttribute @@ -97,6 +98,48 @@ def _GetList(self): return result +class FakeWriteable(object): + def write(self, chunk): + self.chunk = chunk + + +class MediaIoReadable(object): + def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True): + """File-like wrapper around MediaIoBaseDownload. + + :param pre_buffer: Whether to read one chunk into an internal buffer + immediately in order to raise any potential errors. + :raises: HttpError + """ + self.done = False + self.fd = FakeWriteable() + self.downloader = MediaIoBaseDownload( + self.fd, request, chunksize=chunksize + ) + self._buffer = None + if pre_buffer: + self._buffer = self.read() + + def read(self, chunksize=DEFAULT_CHUNK_SIZE): + """ + :returns: str -- chunk or None if done + :raises: ApiRequestError + """ + if self._buffer: + buffer = self._buffer + self._buffer = None + return buffer + if self.done: + return None + if chunksize: + self.downloader._chunksize = chunksize + try: + _, self.done = self.downloader.next_chunk() + except errors.HttpError as error: + raise ApiRequestError(error) + return self.fd.chunk + + class GoogleDriveFile(ApiAttributeMixin, ApiResource): """Google Drive File instance. @@ -260,9 +303,10 @@ def download(fd, request): callback(status.resumable_progress, status.total_size) with open(filename, mode="w+b") as fd: - # Ideally would use files.export_media instead if - # metadata.get("mimeType").startswith("application/vnd.google-apps.") - # but that would first require a slow call to FetchMetadata() + # Should use files.export_media instead of files.get_media if + # metadata["mimeType"].startswith("application/vnd.google-apps."). + # But that would first require a slow call to FetchMetadata(). + # We prefer to try-except for speed. try: download(fd, files.get_media(fileId=file_id)) except errors.HttpError as error: @@ -292,6 +336,51 @@ def download(fd, request): if boms: self._RemovePrefix(fd, boms[0]) + @LoadAuth + def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): + """Get a file-like object which has a buffered read() method. + + :param mimetype: mimeType of the file. + :type mimetype: str + :param chunksize: default read() chunksize. + :type chunksize: int + :returns: MediaIoReadable -- file-like object. + :raises: ApiRequestError, FileNotUploadedError + """ + files = self.auth.service.files() + file_id = self.metadata.get("id") or self.get("id") + if not file_id: + raise FileNotUploadedError() + + # Should use files.export_media instead of files.get_media if + # metadata["mimeType"].startswith("application/vnd.google-apps."). + # But that would first require a slow call to FetchMetadata(). + # We prefer to try-except for speed. + try: + request = files.get_media(fileId=file_id) + # Ensures thread safety. Similar to other places where we call + # `.execute(http=self.http)` to pass a client from the thread + # local storage. + if self.http: + request.http = self.http + + return MediaIoReadable(request, chunksize=chunksize) + except errors.HttpError as error: + exc = ApiRequestError(error) + if ( + exc.error["code"] != 403 + or exc.GetField("reason") != "fileNotDownloadable" + ): + raise exc + mimetype = mimetype or "text/plain" + try: + request = files.export_media(fileId=file_id, mimeType=mimetype) + if self.http: + request.http = self.http + return MediaIoReadable(request, chunksize=chunksize) + except errors.HttpError as error: + raise ApiRequestError(error) + @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): """Download file's metadata from id using Files.get(). From 61dbf8ffb302ffe13ff81c2fa1712a663d50189e Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 25 May 2020 01:03:40 +0100 Subject: [PATCH 02/15] files: tidy and abstract common wrapper --- pydrive2/files.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index be58b42a..5e2ee504 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -98,7 +98,9 @@ def _GetList(self): return result -class FakeWriteable(object): +class IoBuffer(object): + """Lightweight retention of one chunk.""" + def write(self, chunk): self.chunk = chunk @@ -112,7 +114,7 @@ def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True): :raises: HttpError """ self.done = False - self.fd = FakeWriteable() + self.fd = IoBuffer() self.downloader = MediaIoBaseDownload( self.fd, request, chunksize=chunksize ) @@ -290,12 +292,7 @@ def GetContentFile( raise FileNotUploadedError() def download(fd, request): - # Ensures thread safety. Similar to other places where we call - # `.execute(http=self.http)` to pass a client from the thread - # local storage. - if self.http: - request.http = self.http - downloader = MediaIoBaseDownload(fd, request) + downloader = MediaIoBaseDownload(fd, self._WrapRequest(request)) done = False while done is False: status, done = downloader.next_chunk() @@ -357,13 +354,7 @@ def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): # But that would first require a slow call to FetchMetadata(). # We prefer to try-except for speed. try: - request = files.get_media(fileId=file_id) - # Ensures thread safety. Similar to other places where we call - # `.execute(http=self.http)` to pass a client from the thread - # local storage. - if self.http: - request.http = self.http - + request = self._WrapRequest(files.get_media(fileId=file_id)) return MediaIoReadable(request, chunksize=chunksize) except errors.HttpError as error: exc = ApiRequestError(error) @@ -374,9 +365,9 @@ def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): raise exc mimetype = mimetype or "text/plain" try: - request = files.export_media(fileId=file_id, mimeType=mimetype) - if self.http: - request.http = self.http + request = self._WrapRequest( + files.export_media(fileId=file_id, mimeType=mimetype) + ) return MediaIoReadable(request, chunksize=chunksize) except errors.HttpError as error: raise ApiRequestError(error) @@ -535,6 +526,16 @@ def DeletePermission(self, permission_id): """ return self._DeletePermission(permission_id) + def _WrapRequest(self, request): + """Replaces request.http with self.http. + + Ensures thread safety. Similar to other places where we call + `.execute(http=self.http)` to pass a client from the thread local storage. + """ + if self.http: + request.http = self.http + return request + @LoadAuth def _FilesInsert(self, param=None): """Upload a new file using Files.insert(). From fdfd90bf320ef82db9ae1e3fa8beea69be8e6d6c Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Mon, 25 May 2020 01:18:35 +0100 Subject: [PATCH 03/15] add encoding param --- pydrive2/files.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 5e2ee504..91399bf7 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -106,7 +106,13 @@ def write(self, chunk): class MediaIoReadable(object): - def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True): + def __init__( + self, + request, + encoding=None, + chunksize=DEFAULT_CHUNK_SIZE, + pre_buffer=True, + ): """File-like wrapper around MediaIoBaseDownload. :param pre_buffer: Whether to read one chunk into an internal buffer @@ -114,6 +120,7 @@ def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True): :raises: HttpError """ self.done = False + self.encoding = encoding self.fd = IoBuffer() self.downloader = MediaIoBaseDownload( self.fd, request, chunksize=chunksize @@ -122,6 +129,9 @@ def __init__(self, request, chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True): if pre_buffer: self._buffer = self.read() + def decode(self, raw_bytes): + return raw_bytes.decode(self.encoding) if self.encoding else raw_bytes + def read(self, chunksize=DEFAULT_CHUNK_SIZE): """ :returns: str -- chunk or None if done @@ -130,7 +140,7 @@ def read(self, chunksize=DEFAULT_CHUNK_SIZE): if self._buffer: buffer = self._buffer self._buffer = None - return buffer + return self.decode(buffer) if self.done: return None if chunksize: @@ -139,7 +149,7 @@ def read(self, chunksize=DEFAULT_CHUNK_SIZE): _, self.done = self.downloader.next_chunk() except errors.HttpError as error: raise ApiRequestError(error) - return self.fd.chunk + return self.decode(self.fd.chunk) class GoogleDriveFile(ApiAttributeMixin, ApiResource): @@ -334,11 +344,15 @@ def download(fd, request): self._RemovePrefix(fd, boms[0]) @LoadAuth - def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): + def GetContentIOBuffer( + self, mimetype=None, encoding=None, chunksize=DEFAULT_CHUNK_SIZE + ): """Get a file-like object which has a buffered read() method. :param mimetype: mimeType of the file. :type mimetype: str + :param encoding: The encoding to use when decoding the byte string. + :type encoding: str :param chunksize: default read() chunksize. :type chunksize: int :returns: MediaIoReadable -- file-like object. @@ -355,7 +369,9 @@ def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): # We prefer to try-except for speed. try: request = self._WrapRequest(files.get_media(fileId=file_id)) - return MediaIoReadable(request, chunksize=chunksize) + return MediaIoReadable( + request, encoding=encoding, chunksize=chunksize + ) except errors.HttpError as error: exc = ApiRequestError(error) if ( @@ -368,7 +384,9 @@ def GetContentIOBuffer(self, mimetype=None, chunksize=DEFAULT_CHUNK_SIZE): request = self._WrapRequest( files.export_media(fileId=file_id, mimeType=mimetype) ) - return MediaIoReadable(request, chunksize=chunksize) + return MediaIoReadable( + request, encoding=encoding, chunksize=chunksize + ) except errors.HttpError as error: raise ApiRequestError(error) From da33215c3ab6657ccf23b754c0785531d070bcf4 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Tue, 26 May 2020 10:55:43 +0100 Subject: [PATCH 04/15] mode decode to IoBuffer --- pydrive2/files.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 91399bf7..42878c46 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -101,9 +101,20 @@ def _GetList(self): class IoBuffer(object): """Lightweight retention of one chunk.""" + def __init__(self, encoding): + self.encoding = encoding + self.chunk = None + def write(self, chunk): self.chunk = chunk + def read(self): + return ( + self.chunk.decode(self.encoding) + if self.chunk and self.encoding + else self.chunk + ) + class MediaIoReadable(object): def __init__( @@ -120,27 +131,23 @@ def __init__( :raises: HttpError """ self.done = False - self.encoding = encoding - self.fd = IoBuffer() + self._fd = IoBuffer(encoding) self.downloader = MediaIoBaseDownload( - self.fd, request, chunksize=chunksize + self._fd, request, chunksize=chunksize ) - self._buffer = None + self._pre_buffer = False if pre_buffer: - self._buffer = self.read() - - def decode(self, raw_bytes): - return raw_bytes.decode(self.encoding) if self.encoding else raw_bytes + self.read() + self._pre_buffer = True def read(self, chunksize=DEFAULT_CHUNK_SIZE): """ :returns: str -- chunk or None if done :raises: ApiRequestError """ - if self._buffer: - buffer = self._buffer - self._buffer = None - return self.decode(buffer) + if self._pre_buffer: + self._pre_buffer = False + return self._fd.read() if self.done: return None if chunksize: @@ -149,7 +156,7 @@ def read(self, chunksize=DEFAULT_CHUNK_SIZE): _, self.done = self.downloader.next_chunk() except errors.HttpError as error: raise ApiRequestError(error) - return self.decode(self.fd.chunk) + return self._fd.read() class GoogleDriveFile(ApiAttributeMixin, ApiResource): From 0b085694af3413b650b1d4c483d386048b0e3ce3 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Wed, 27 May 2020 16:46:29 +0100 Subject: [PATCH 05/15] remove `read(chunksize)` for now - see https://github.com/iterative/PyDrive2/pull/39#discussion_r430827441 --- pydrive2/files.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 42878c46..560ea807 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -140,7 +140,7 @@ def __init__( self.read() self._pre_buffer = True - def read(self, chunksize=DEFAULT_CHUNK_SIZE): + def read(self): """ :returns: str -- chunk or None if done :raises: ApiRequestError @@ -150,8 +150,6 @@ def read(self, chunksize=DEFAULT_CHUNK_SIZE): return self._fd.read() if self.done: return None - if chunksize: - self.downloader._chunksize = chunksize try: _, self.done = self.downloader.next_chunk() except errors.HttpError as error: From 23cb534d562c91fe6f124da8d10b763d68060c5d Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 19:42:17 +0100 Subject: [PATCH 06/15] add files.MediaIoReadable.__iter__ --- pydrive2/files.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 560ea807..20b25b05 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -142,7 +142,7 @@ def __init__( def read(self): """ - :returns: str -- chunk or None if done + :returns: bytes or str -- chunk (or None if done) :raises: ApiRequestError """ if self._pre_buffer: @@ -156,6 +156,13 @@ def read(self): raise ApiRequestError(error) return self._fd.read() + def __iter__(self): + while True: + chunk = self.read() + if chunk is None: + break + yield chunk + class GoogleDriveFile(ApiAttributeMixin, ApiResource): """Google Drive File instance. From 73f0c815fd2361e6a4be7de0a59acce2297cb80f Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 20:29:13 +0100 Subject: [PATCH 07/15] add files.GetContentIOBuffer(remove_bom) --- pydrive2/files.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 20b25b05..db6bade3 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -121,6 +121,7 @@ def __init__( self, request, encoding=None, + remove_prefix=b"", chunksize=DEFAULT_CHUNK_SIZE, pre_buffer=True, ): @@ -128,6 +129,7 @@ def __init__( :param pre_buffer: Whether to read one chunk into an internal buffer immediately in order to raise any potential errors. + :param remove_prefix: Bytes prefix to remove from internal pre buffer. :raises: HttpError """ self.done = False @@ -138,6 +140,10 @@ def __init__( self._pre_buffer = False if pre_buffer: self.read() + if remove_prefix: + chunk = io.BytesIO(self._fd.chunk) + GoogleDriveFile._RemovePrefix(chunk, remove_prefix) + self._fd.chunk = chunk.getvalue() self._pre_buffer = True def read(self): @@ -357,7 +363,11 @@ def download(fd, request): @LoadAuth def GetContentIOBuffer( - self, mimetype=None, encoding=None, chunksize=DEFAULT_CHUNK_SIZE + self, + mimetype=None, + encoding=None, + remove_bom=False, + chunksize=DEFAULT_CHUNK_SIZE, ): """Get a file-like object which has a buffered read() method. @@ -365,7 +375,9 @@ def GetContentIOBuffer( :type mimetype: str :param encoding: The encoding to use when decoding the byte string. :type encoding: str - :param chunksize: default read() chunksize. + :param remove_bom: Whether to remove the byte order marking. + :type remove_bom: bool + :param chunksize: default read()/iter() chunksize. :type chunksize: int :returns: MediaIoReadable -- file-like object. :raises: ApiRequestError, FileNotUploadedError @@ -396,8 +408,20 @@ def GetContentIOBuffer( request = self._WrapRequest( files.export_media(fileId=file_id, mimeType=mimetype) ) + remove_prefix = b"" + if mimetype == "text/plain" and remove_bom: + boms = [ + bom[mimetype] + for bom in MIME_TYPE_TO_BOM.values() + if mimetype in bom + ] + if boms: + remove_prefix = boms[0] return MediaIoReadable( - request, encoding=encoding, chunksize=chunksize + request, + encoding=encoding, + remove_prefix=remove_prefix, + chunksize=chunksize, ) except errors.HttpError as error: raise ApiRequestError(error) From dbfa5d9ef25f18c7d08834e01ebd0a90e3a1fc7b Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 20:38:14 +0100 Subject: [PATCH 08/15] tidy remove_bom --- pydrive2/files.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index db6bade3..19f1a047 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -23,6 +23,13 @@ } +def GetBom(mimetype): + """Based on download mime type (ignores Google Drive mime type)""" + for bom in MIME_TYPE_TO_BOM.values(): + if mimetype in bom: + return bom[mimetype] + + class FileNotUploadedError(RuntimeError): """Error trying to access metadata of file that is not uploaded.""" @@ -353,11 +360,7 @@ def download(fd, request): if mimetype == "text/plain" and remove_bom: fd.seek(0) - boms = [ - bom[mimetype] - for bom in MIME_TYPE_TO_BOM.values() - if mimetype in bom - ] + boms = GetBom(mimetype) if boms: self._RemovePrefix(fd, boms[0]) @@ -410,11 +413,7 @@ def GetContentIOBuffer( ) remove_prefix = b"" if mimetype == "text/plain" and remove_bom: - boms = [ - bom[mimetype] - for bom in MIME_TYPE_TO_BOM.values() - if mimetype in bom - ] + boms = GetBom(mimetype) if boms: remove_prefix = boms[0] return MediaIoReadable( From c550cd0cfaf02f8e816838dfb95b3bae90b75161 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 21:02:47 +0100 Subject: [PATCH 09/15] test GetContentIOBuffer --- pydrive2/test/test_file.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pydrive2/test/test_file.py b/pydrive2/test/test_file.py index f246080a..4c3dee84 100644 --- a/pydrive2/test/test_file.py +++ b/pydrive2/test/test_file.py @@ -280,6 +280,24 @@ def test_10_Files_Download_Service(self): self.DeleteUploadedFiles(drive, [file1["id"]]) + def test_11_Files_Get_Content_Buffer(self): + drive = GoogleDrive(self.ga) + file1 = drive.CreateFile() + filename = self.getTempFile() + content = "hello world!\ngoodbye, cruel world!" + file1["title"] = filename + file1.SetContentString(content) + pydrive_retry(file1.Upload) # Files.insert + + buffer1 = pydrive_retry(file1.GetContentIOBuffer) + self.assertEqual(file1.metadata["title"], filename) + self.assertEqual(b"".join(iter(buffer1)).decode("ascii"), content) + + buffer2 = pydrive_retry(file1.GetContentIOBuffer, encoding="ascii") + self.assertEqual("".join(iter(buffer2)), content) + + self.DeleteUploadedFiles(drive, [file1["id"]]) + # Tests for Trash/UnTrash/Delete. # =============================== From 51014258e486e57d25a2e71f7a79b6ee8acddc45 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 21:18:26 +0100 Subject: [PATCH 10/15] fix GetBOM usage --- pydrive2/files.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 19f1a047..12d01360 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -23,7 +23,7 @@ } -def GetBom(mimetype): +def GetBOM(mimetype): """Based on download mime type (ignores Google Drive mime type)""" for bom in MIME_TYPE_TO_BOM.values(): if mimetype in bom: @@ -360,9 +360,9 @@ def download(fd, request): if mimetype == "text/plain" and remove_bom: fd.seek(0) - boms = GetBom(mimetype) - if boms: - self._RemovePrefix(fd, boms[0]) + bom = GetBOM(mimetype) + if bom: + self._RemovePrefix(fd, bom) @LoadAuth def GetContentIOBuffer( @@ -413,9 +413,9 @@ def GetContentIOBuffer( ) remove_prefix = b"" if mimetype == "text/plain" and remove_bom: - boms = GetBom(mimetype) - if boms: - remove_prefix = boms[0] + bom = GetBOM(mimetype) + if bom: + remove_prefix = bom return MediaIoReadable( request, encoding=encoding, From b561de25f52b65c46027ae520c1ccdefda8bebf7 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 21:25:37 +0100 Subject: [PATCH 11/15] misc tidy --- pydrive2/files.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index 12d01360..b5d65102 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -23,13 +23,6 @@ } -def GetBOM(mimetype): - """Based on download mime type (ignores Google Drive mime type)""" - for bom in MIME_TYPE_TO_BOM.values(): - if mimetype in bom: - return bom[mimetype] - - class FileNotUploadedError(RuntimeError): """Error trying to access metadata of file that is not uploaded.""" @@ -128,15 +121,15 @@ def __init__( self, request, encoding=None, + pre_buffer=True, remove_prefix=b"", chunksize=DEFAULT_CHUNK_SIZE, - pre_buffer=True, ): """File-like wrapper around MediaIoBaseDownload. :param pre_buffer: Whether to read one chunk into an internal buffer immediately in order to raise any potential errors. - :param remove_prefix: Bytes prefix to remove from internal pre buffer. + :param remove_prefix: Bytes prefix to remove from internal pre_buffer. :raises: HttpError """ self.done = False @@ -360,7 +353,7 @@ def download(fd, request): if mimetype == "text/plain" and remove_bom: fd.seek(0) - bom = GetBOM(mimetype) + bom = self._GetBOM(mimetype) if bom: self._RemovePrefix(fd, bom) @@ -411,11 +404,11 @@ def GetContentIOBuffer( request = self._WrapRequest( files.export_media(fileId=file_id, mimeType=mimetype) ) - remove_prefix = b"" - if mimetype == "text/plain" and remove_bom: - bom = GetBOM(mimetype) - if bom: - remove_prefix = bom + remove_prefix = ( + self._GetBOM(mimetype) + if mimetype == "text/plain" and remove_bom + else b"" + ) return MediaIoReadable( request, encoding=encoding, @@ -806,6 +799,13 @@ def _DeletePermission(self, permission_id): self.metadata["permissions"] = permissions return True + @staticmethod + def _GetBOM(mimetype): + """Based on download mime type (ignores Google Drive mime type)""" + for bom in MIME_TYPE_TO_BOM.values(): + if mimetype in bom: + return bom[mimetype] + @staticmethod def _RemovePrefix(file_object, prefix, block_size=BLOCK_SIZE): """Deletes passed prefix by shifting content of passed file object by to From 3bff25ae74c5f3a42c6f29ff6062b26f82dfb509 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 21:36:32 +0100 Subject: [PATCH 12/15] test GetContentIOBuffer(remove_bom) --- pydrive2/test/test_file.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pydrive2/test/test_file.py b/pydrive2/test/test_file.py index 4c3dee84..1c8dbef5 100644 --- a/pydrive2/test/test_file.py +++ b/pydrive2/test/test_file.py @@ -628,6 +628,23 @@ def test_Gfile_Conversion_Add_Remove_BOM(self): self.assertNotEqual(content_bom, content_no_bom) self.assertTrue(len(content_bom) > len(content_no_bom)) + buffer_bom = pydrive_retry( + file1.GetContentIOBuffer, + mimetype="text/plain", + encoding="ascii", + ) + buffer_bom = "".join(iter(buffer_bom)) + buffer_no_bom = pydrive_retry( + file1.GetContentIOBuffer, + mimetype="text/plain", + remove_bom=True, + encoding="ascii", + ) + buffer_no_bom = "".join(iter(buffer_no_bom)) + + self.assertEqual(content_bom, buffer_bom) + self.assertNotEqual(content_no_bom, buffer_no_bom) + finally: self.cleanup_gfile_conversion_test( file1, file_name, downloaded_file_name From e7ee0787810acfb1c7ea9e44ea73368407f1a71c Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 22:02:52 +0100 Subject: [PATCH 13/15] fix error handling --- pydrive2/files.py | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pydrive2/files.py b/pydrive2/files.py index b5d65102..c9e8a0b7 100644 --- a/pydrive2/files.py +++ b/pydrive2/files.py @@ -130,7 +130,7 @@ def __init__( :param pre_buffer: Whether to read one chunk into an internal buffer immediately in order to raise any potential errors. :param remove_prefix: Bytes prefix to remove from internal pre_buffer. - :raises: HttpError + :raises: ApiRequestError """ self.done = False self._fd = IoBuffer(encoding) @@ -163,6 +163,9 @@ def read(self): return self._fd.read() def __iter__(self): + """ + :raises: ApiRequestError + """ while True: chunk = self.read() if chunk is None: @@ -392,31 +395,27 @@ def GetContentIOBuffer( return MediaIoReadable( request, encoding=encoding, chunksize=chunksize ) - except errors.HttpError as error: - exc = ApiRequestError(error) + except ApiRequestError as exc: if ( exc.error["code"] != 403 or exc.GetField("reason") != "fileNotDownloadable" ): raise exc mimetype = mimetype or "text/plain" - try: - request = self._WrapRequest( - files.export_media(fileId=file_id, mimeType=mimetype) - ) - remove_prefix = ( - self._GetBOM(mimetype) - if mimetype == "text/plain" and remove_bom - else b"" - ) - return MediaIoReadable( - request, - encoding=encoding, - remove_prefix=remove_prefix, - chunksize=chunksize, - ) - except errors.HttpError as error: - raise ApiRequestError(error) + request = self._WrapRequest( + files.export_media(fileId=file_id, mimeType=mimetype) + ) + remove_prefix = ( + self._GetBOM(mimetype) + if mimetype == "text/plain" and remove_bom + else b"" + ) + return MediaIoReadable( + request, + encoding=encoding, + remove_prefix=remove_prefix, + chunksize=chunksize, + ) @LoadAuth def FetchMetadata(self, fields=None, fetch_all=False): From 3547268e87b9db57fefc192728d8d3f0c1957c7d Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 22:15:02 +0100 Subject: [PATCH 14/15] fix tests encoding --- pydrive2/test/test_file.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydrive2/test/test_file.py b/pydrive2/test/test_file.py index 1c8dbef5..0a8a9d03 100644 --- a/pydrive2/test/test_file.py +++ b/pydrive2/test/test_file.py @@ -631,16 +631,16 @@ def test_Gfile_Conversion_Add_Remove_BOM(self): buffer_bom = pydrive_retry( file1.GetContentIOBuffer, mimetype="text/plain", - encoding="ascii", + encoding="utf-8", ) - buffer_bom = "".join(iter(buffer_bom)) + buffer_bom = u"".join(iter(buffer_bom)) buffer_no_bom = pydrive_retry( file1.GetContentIOBuffer, mimetype="text/plain", remove_bom=True, - encoding="ascii", + encoding="utf-8", ) - buffer_no_bom = "".join(iter(buffer_no_bom)) + buffer_no_bom = u"".join(iter(buffer_no_bom)) self.assertEqual(content_bom, buffer_bom) self.assertNotEqual(content_no_bom, buffer_no_bom) From 712c571cad069c39a89bc306a0dcbef1f5512a59 Mon Sep 17 00:00:00 2001 From: Casper da Costa-Luis Date: Sat, 30 May 2020 22:26:40 +0100 Subject: [PATCH 15/15] silly assertion fix --- pydrive2/test/test_file.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydrive2/test/test_file.py b/pydrive2/test/test_file.py index 0a8a9d03..fe6aab27 100644 --- a/pydrive2/test/test_file.py +++ b/pydrive2/test/test_file.py @@ -634,6 +634,8 @@ def test_Gfile_Conversion_Add_Remove_BOM(self): encoding="utf-8", ) buffer_bom = u"".join(iter(buffer_bom)) + self.assertEqual(content_bom, buffer_bom) + buffer_no_bom = pydrive_retry( file1.GetContentIOBuffer, mimetype="text/plain", @@ -641,9 +643,7 @@ def test_Gfile_Conversion_Add_Remove_BOM(self): encoding="utf-8", ) buffer_no_bom = u"".join(iter(buffer_no_bom)) - - self.assertEqual(content_bom, buffer_bom) - self.assertNotEqual(content_no_bom, buffer_no_bom) + self.assertEqual(content_no_bom, buffer_no_bom) finally: self.cleanup_gfile_conversion_test(