Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Merge 1cb2d56 into 04323f0
Browse files Browse the repository at this point in the history
  • Loading branch information
roll committed May 27, 2020
2 parents 04323f0 + 1cb2d56 commit 2dd2812
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 10 deletions.
Binary file removed data/special/sheets.xlsx
Binary file not shown.
1 change: 1 addition & 0 deletions tabulator/loaders/aws.py
Expand Up @@ -22,6 +22,7 @@ class AWSLoader(Loader):

# Public

remote = True
options = [
's3_endpoint_url',
]
Expand Down
1 change: 1 addition & 0 deletions tabulator/loaders/remote.py
Expand Up @@ -21,6 +21,7 @@ class RemoteLoader(Loader):

# Public

remote = True
options = [
'http_session',
'http_stream',
Expand Down
38 changes: 29 additions & 9 deletions tabulator/parsers/xlsx.py
Expand Up @@ -4,12 +4,15 @@
from __future__ import absolute_import
from __future__ import unicode_literals

import os
import io
import six
import shutil
import atexit
import openpyxl
import datetime
from itertools import chain
from tempfile import TemporaryFile
from tempfile import NamedTemporaryFile
from ..parser import Parser
from .. import exceptions
from .. import helpers
Expand All @@ -25,16 +28,18 @@ class XLSXParser(Parser):

options = [
'sheet',
'workbook_cache',
'fill_merged_cells',
'preserve_formatting',
'adjust_floating_point_error',
]

def __init__(self, loader, force_parse=False, sheet=1,
def __init__(self, loader, force_parse=False, sheet=1, workbook_cache=None,
fill_merged_cells=False, preserve_formatting=False,
adjust_floating_point_error=False):
self.__loader = loader
self.__sheet_pointer = sheet
self.__workbook_cache = workbook_cache
self.__fill_merged_cells = fill_merged_cells
self.__preserve_formatting = preserve_formatting
self.__adjust_floating_point_error = adjust_floating_point_error
Expand All @@ -51,17 +56,32 @@ def closed(self):
def open(self, source, encoding=None):
self.close()
self.__encoding = encoding
self.__bytes = self.__loader.load(source, mode='b', encoding=encoding)

# Remote
# Create copy for remote source
# For remote stream we need local copy (will be deleted on close by Python)
# https://docs.python.org/3.5/library/tempfile.html#tempfile.TemporaryFile
if getattr(self.__bytes, 'remote', False):
new_bytes = TemporaryFile()
shutil.copyfileobj(self.__bytes, new_bytes)
self.__bytes.close()
self.__bytes = new_bytes
self.__bytes.seek(0)
if getattr(self.__loader, 'remote', False):
# Cached
if self.__workbook_cache is not None and source in self.__workbook_cache:
self.__bytes = io.open(self.__workbook_cache[source], 'rb')
# Not cached
else:
prefix = 'tabulator-'
delete = self.__workbook_cache is None
source_bytes = self.__loader.load(source, mode='b', encoding=encoding)
target_bytes = NamedTemporaryFile(prefix=prefix, delete=delete)
shutil.copyfileobj(source_bytes, target_bytes)
source_bytes.close()
target_bytes.seek(0)
self.__bytes = target_bytes
if self.__workbook_cache is not None:
self.__workbook_cache[source] = target_bytes.name
atexit.register(os.remove, target_bytes.name)

# Local
else:
self.__bytes = self.__loader.load(source, mode='b', encoding=encoding)

# Get book
# To fill merged cells we can't use read-only because
Expand Down
9 changes: 9 additions & 0 deletions tests/formats/test_xlsx.py
Expand Up @@ -113,6 +113,15 @@ def test_stream_xlsx_preserve_formatting():
}]


def test_stream_xlsx_workbook_cache():
workbook_cache = {}
source = BASE_URL % 'data/special/sheets.xlsx'
for sheet in ['Sheet1', 'Sheet2', 'Sheet3']:
with Stream(source, sheet=sheet, workbook_cache=workbook_cache) as stream:
assert len(workbook_cache) == 1
assert stream.read()


# Write

def test_stream_save_xlsx(tmpdir):
Expand Down
1 change: 0 additions & 1 deletion tests/test_stream.py
Expand Up @@ -108,7 +108,6 @@ def test_stream_headers_inline_keyed_headers_is_none():
def test_stream_headers_xls_multiline():
source = 'data/special/multiline-headers.xlsx'
with Stream(source, headers=[1, 5], fill_merged_cells=True) as stream:
print(stream.headers)
assert stream.headers == [
'Region',
'Caloric contribution (%)',
Expand Down

0 comments on commit 2dd2812

Please sign in to comment.