diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py index cc7f0889..23aad5c2 100644 --- a/src/tablib/formats/_xlsx.py +++ b/src/tablib/formats/_xlsx.py @@ -85,13 +85,8 @@ def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape return stream.getvalue() @classmethod - def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0): - """Returns databook from XLS stream.""" - - dset.wipe() - - xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) - sheet = xls_book.active + def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): + """Populates dataset with sheet.""" dset.title = sheet.title @@ -102,8 +97,20 @@ def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0) if i == skip_lines and headers: dset.headers = row_vals else: + if i > skip_lines and len(row_vals) < dset.width: + row_vals += [''] * (dset.width - len(row_vals)) dset.append(row_vals) + @classmethod + def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0): + """Returns databook from XLS stream.""" + + dset.wipe() + + xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) + sheet = xls_book.active + cls.import_sheet(dset, sheet, headers, skip_lines) + @classmethod def import_book(cls, dbook, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" @@ -113,19 +120,9 @@ def import_book(cls, dbook, in_stream, headers=True, read_only=True): xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) for sheet in xls_book.worksheets: - data = tablib.Dataset() - data.title = sheet.title - - for i, row in enumerate(sheet.rows): - row_vals = [c.value for c in row] - if (i == 0) and (headers): - data.headers = row_vals - else: - if i > 0 and len(row_vals) < data.width: - row_vals += [''] * (data.width - len(row_vals)) - data.append(row_vals) - - dbook.add_sheet(data) + dset = tablib.Dataset() + cls.import_sheet(dset, sheet, headers) + dbook.add_sheet(dset) @classmethod def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): diff --git a/tests/test_tablib.py b/tests/test_tablib.py index d5eda8ce..6360a2b7 100755 --- a/tests/test_tablib.py +++ b/tests/test_tablib.py @@ -1096,13 +1096,20 @@ def test_xlsx_bad_chars_sheet_name(self): new_data = tablib.Databook().load(_xlsx, 'xlsx') self.assertEqual(new_data.sheets()[0].title, 'bad name -------qwertyuiopasdfg') - def test_xlsx_import_set_ragged(self): - """Import XLSX file when not all rows have the same length.""" + def test_xlsx_import_book_ragged(self): + """Import XLSX file through databook when not all rows have the same length.""" xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx' with open(str(xlsx_source), mode='rb') as fh: book = tablib.Databook().load(fh, 'xlsx') self.assertEqual(book.sheets()[0].pop(), (1.0, '')) + def test_xlsx_import_set_ragged(self): + """Import XLSX file through dataset when not all rows have the same length.""" + xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx' + with open(str(xlsx_source), mode='rb') as fh: + dataset = tablib.Dataset().load(fh, 'xlsx') + self.assertEqual(dataset.pop(), (1.0, '')) + def test_xlsx_wrong_char(self): """Bad characters are not silently ignored. We let the exception bubble up.""" from openpyxl.utils.exceptions import IllegalCharacterError