Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace MarkupPy by ElementTree for html conversion #554

Merged
merged 1 commit into from
Jul 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# History

## Unreleased

### Changes

- The html export format does not depend on MarkupPy any longer, therefore the
tablib[html] install target was removed also.

## 3.5.0 (2023-06-11)

### Improvements
Expand Down
3 changes: 0 additions & 3 deletions docs/formats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@ The ``html`` format is currently export-only. The exports produce an HTML page
with the data in a ``<table>``. If headers have been set, they will be used as
table headers.

This format is optional, install Tablib with ``pip install "tablib[html]"`` to
make the format available.

jira
====

Expand Down
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ dynamic = ["version"]

[project.optional-dependencies]
all = [
"markuppy",
"odfpy",
"openpyxl>=2.6.0",
"pandas",
Expand All @@ -44,7 +43,6 @@ all = [
"xlwt",
]
cli = ["tabulate"]
html = ["markuppy"]
ods = ["odfpy"]
pandas = ["pandas"]
xls = ["xlrd", "xlwt"]
Expand Down
4 changes: 1 addition & 3 deletions src/tablib/formats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
uninstalled_format_messages = {
"cli": {"package_name": "tabulate package", "extras_name": "cli"},
"df": {"package_name": "pandas package", "extras_name": "pandas"},
"html": {"package_name": "MarkupPy package", "extras_name": "html"},
"ods": {"package_name": "odfpy package", "extras_name": "ods"},
"xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"},
"xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"},
Expand Down Expand Up @@ -101,8 +100,7 @@ def register_builtins(self):
if find_spec('odf'):
self.register('ods', 'tablib.formats._ods.ODSFormat')
self.register('dbf', 'tablib.formats._dbf.DBFFormat')
if find_spec('MarkupPy'):
self.register('html', 'tablib.formats._html.HTMLFormat')
self.register('html', 'tablib.formats._html.HTMLFormat')
self.register('jira', 'tablib.formats._jira.JIRAFormat')
self.register('latex', 'tablib.formats._latex.LATEXFormat')
if find_spec('pandas'):
Expand Down
66 changes: 26 additions & 40 deletions src/tablib/formats/_html.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
""" Tablib - HTML export support.
"""

import codecs
from io import BytesIO

from MarkupPy import markup
from xml.etree import ElementTree as ET


class HTMLFormat:
Expand All @@ -17,48 +13,38 @@ class HTMLFormat:
def export_set(cls, dataset):
"""HTML representation of a Dataset."""

stream = BytesIO()

page = markup.page()
page.table.open()

table = ET.Element('table')
if dataset.headers is not None:
new_header = [item if item is not None else '' for item in dataset.headers]

page.thead.open()
headers = markup.oneliner.th(new_header)
page.tr(headers)
page.thead.close()

page.tbody.open()
head = ET.Element('thead')
tr = ET.Element('tr')
for header in dataset.headers:
th = ET.Element('th')
th.text = str(header) if header is not None else ''
tr.append(th)
head.append(tr)
table.append(head)

body = ET.Element('tbody')
for row in dataset:
new_row = [item if item is not None else '' for item in row]
tr = ET.Element('tr')
for item in row:
td = ET.Element('td')
td.text = str(item) if item is not None else ''
tr.append(td)
body.append(tr)
table.append(body)

html_row = markup.oneliner.td(new_row)
page.tr(html_row)
page.tbody.close()

page.table.close()

# Allow unicode characters in output
wrapper = codecs.getwriter("utf8")(stream)
wrapper.writelines(str(page))

return stream.getvalue().decode('utf-8')
return ET.tostring(table, method='html', encoding='unicode')

@classmethod
def export_book(cls, databook):
"""HTML representation of a Databook."""

stream = BytesIO()

# Allow unicode characters in output
wrapper = codecs.getwriter("utf8")(stream)

result = ''
for i, dset in enumerate(databook._datasets):
title = (dset.title if dset.title else 'Set %s' % (i))
wrapper.write(f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n')
wrapper.write(dset.html)
wrapper.write('\n')
title = dset.title if dset.title else f'Set {i}'
result += f'<{cls.BOOK_ENDINGS}>{title}</{cls.BOOK_ENDINGS}>\n'
result += dset.html
result += '\n'

return stream.getvalue().decode('utf-8')
return result
1 change: 0 additions & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pytest
pytest-cov
MarkupPy
odfpy
openpyxl>=2.6.0
pyyaml
Expand Down
71 changes: 36 additions & 35 deletions tests/test_tablib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from uuid import uuid4

import pytest
from MarkupPy import markup
from openpyxl.reader.excel import load_workbook

import tablib
Expand Down Expand Up @@ -624,47 +623,49 @@ def test_row_has_tags(self):


class HTMLTests(BaseTestCase):
def test_html_export(self):
founders_html = (
"<table>"
"<thead>"
"<tr><th>first_name</th><th>last_name</th><th>gpa</th></tr>"
"</thead>"
"<tbody>"
"<tr><td>John</td><td>Adams</td><td>90</td></tr>"
"<tr><td>George</td><td>Washington</td><td>67</td></tr>"
"<tr><td>Thomas</td><td>Jefferson</td><td>50</td></tr>"
"</tbody>"
"</table>"
)

def test_html_dataset_export(self):
"""HTML export"""

html = markup.page()
html.table.open()
html.thead.open()

html.tr(markup.oneliner.th(self.founders.headers))
html.thead.close()

html.tbody.open()
for founder in self.founders:
html.tr(markup.oneliner.td(founder))
html.tbody.close()

html.table.close()
html = str(html)

self.assertEqual(html, self.founders.html)
self.assertEqual(self.founders_html, self.founders.html.replace('\n', ''))

def test_html_export_none_value(self):
"""HTML export"""

html = markup.page()
html.table.open()
html.thead.open()

html.tr(markup.oneliner.th(['foo', '', 'bar']))
html.thead.close()

html.tbody.open()
html.tr(markup.oneliner.td(['foo', '', 'bar']))
html.tbody.close()

html.table.close()
html = str(html)

headers = ['foo', None, 'bar']
d = tablib.Dataset(['foo', None, 'bar'], headers=headers)
d = tablib.Dataset(['foø', None, 'bar'], headers=headers)
expected = (
"<table>"
"<thead>"
"<tr><th>foo</th><th></th><th>bar</th></tr>"
"</thead>"
"<tbody>"
"<tr><td>foø</td><td></td><td>bar</td></tr>"
"</tbody>"
"</table>"
)
self.assertEqual(expected, d.html.replace('\n', ''))

self.assertEqual(html, d.html)
def test_html_databook_export(self):
book = tablib.Databook()
book.add_sheet(self.founders)
book.add_sheet(self.founders)
self.maxDiff = None
self.assertEqual(
book.html.replace('\n', ''),
f"<h3>Founders</h3>{self.founders_html}<h3>Founders</h3>{self.founders_html}"
)


class RSTTests(BaseTestCase):
Expand Down