Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Use nh3 lib instead of bleach #23862

Merged
merged 4 commits into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ bcrypt==4.0.1
# via paramiko
billiard==3.6.4.0
# via celery
bleach==3.3.1
# via apache-superset
brotli==1.0.9
# via flask-compress
cachelib==0.4.1
Expand Down Expand Up @@ -182,6 +180,8 @@ mdurl==0.1.2
# via markdown-it-py
msgpack==1.0.2
# via apache-superset
nh3==0.2.11
# via apache-superset
numpy==1.23.5
# via
# apache-superset
Expand All @@ -191,7 +191,6 @@ ordered-set==4.1.0
# via flask-limiter
packaging==21.3
# via
# bleach
# deprecation
# limits
pandas==1.5.3
Expand Down Expand Up @@ -265,7 +264,6 @@ simplejson==3.17.3
# via apache-superset
six==1.16.0
# via
# bleach
# click-repl
# isodate
# jsonschema
Expand Down Expand Up @@ -308,8 +306,6 @@ vine==5.0.0
# kombu
wcwidth==0.2.5
# via prompt-toolkit
webencodings==0.5.1
# via bleach
werkzeug==2.1.2
# via
# flask
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,sqlalchemy_bigquery,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,nh3,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,sqlalchemy_bigquery,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def get_git_sha() -> str:
},
install_requires=[
"backoff>=1.8.0",
"bleach>=3.0.2, <4.0.0",
"cachelib>=0.4.1,<0.5",
"celery>=5.2.2, <6.0.0",
"click>=8.0.3",
Expand Down Expand Up @@ -101,6 +100,7 @@ def get_git_sha() -> str:
"isodate",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
"nh3>=0.2.11, <0.3",
"numpy==1.23.5",
"pandas>=1.5.3, <1.6",
"parsedatetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -459,10 +459,10 @@ class Chart extends React.Component {

{/*
This usage of dangerouslySetInnerHTML is safe since it is being used to render
markdown that is sanitized with bleach. See:
markdown that is sanitized with nh3. See:
https://github.com/apache/superset/pull/4390
and
https://github.com/apache/superset/commit/b6fcc22d5a2cb7a5e92599ed5795a0169385a825
https://github.com/apache/superset/pull/23862
*/}
{isExpanded && slice.description_markeddown && (
<div
Expand Down
27 changes: 15 additions & 12 deletions superset/reports/notifications/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from email.utils import make_msgid, parseaddr
from typing import Any, Dict, Optional

import bleach
import nh3
from flask_babel import gettext as __

from superset import app
Expand All @@ -35,10 +35,10 @@

logger = logging.getLogger(__name__)

TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"]
TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"]
TABLE_TAGS = {"table", "th", "tr", "td", "thead", "tbody", "tfoot"}
TABLE_ATTRIBUTES = {"colspan", "rowspan", "halign", "border", "class"}

ALLOWED_TAGS = [
ALLOWED_TAGS = {
"a",
"abbr",
"acronym",
Expand All @@ -54,13 +54,14 @@
"p",
"strong",
"ul",
] + TABLE_TAGS
}.union(TABLE_TAGS)

ALLOWED_TABLE_ATTRIBUTES = {tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS}
ALLOWED_ATTRIBUTES = {
"a": ["href", "title"],
"abbr": ["title"],
"acronym": ["title"],
**{tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS},
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},
**ALLOWED_TABLE_ATTRIBUTES,
}


Expand Down Expand Up @@ -108,7 +109,8 @@ def _get_content(self) -> EmailContent:
}

# Strip any malicious HTML from the description
description = bleach.clean(
# pylint: disable=no-member
description = nh3.clean(
self._content.description or "",
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
Expand All @@ -117,12 +119,13 @@ def _get_content(self) -> EmailContent:
# Strip malicious HTML from embedded data, allowing only table elements
if self._content.embedded_data is not None:
df = self._content.embedded_data
html_table = bleach.clean(
# pylint: disable=no-member
html_table = nh3.clean(
df.to_html(na_rep="", index=True, escape=True),
# pandas will escape the HTML in cells already, so passing
# more allowed tags here will not work
tags=TABLE_TAGS,
attributes=TABLE_ATTRIBUTES,
attributes=ALLOWED_TABLE_ATTRIBUTES,
)
else:
html_table = ""
Expand Down
13 changes: 7 additions & 6 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@
from urllib.parse import unquote_plus
from zipfile import ZipFile

import bleach
import markdown as md
import nh3
import numpy as np
import pandas as pd
import sqlalchemy as sa
Expand Down Expand Up @@ -664,7 +664,7 @@ def error_msg_from_exception(ex: Exception) -> str:


def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
safe_markdown_tags = [
safe_markdown_tags = {
"h1",
"h2",
"h3",
Expand All @@ -690,10 +690,10 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"dt",
"img",
"a",
]
}
safe_markdown_attrs = {
"img": ["src", "alt", "title"],
"a": ["href", "alt", "title"],
"img": {"src", "alt", "title"},
"a": {"href", "alt", "title"},
}
safe = md.markdown(
raw or "",
Expand All @@ -703,7 +703,8 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"markdown.extensions.codehilite",
],
)
safe = bleach.clean(safe, safe_markdown_tags, safe_markdown_attrs)
# pylint: disable=no-member
safe = nh3.clean(safe, tags=safe_markdown_tags, attributes=safe_markdown_attrs)
if markup_wrap:
safe = Markup(safe)
return safe
Expand Down
5 changes: 4 additions & 1 deletion tests/unit_tests/notifications/email_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,8 @@ def test_render_description_with_html() -> None:
._get_content()
.body
)
assert '<p>This is <a href="#">a test</a> alert</p><br>' in email_body
assert (
'<p>This is <a href="#" rel="noopener noreferrer">a test</a> alert</p><br>'
in email_body
)
assert '<td>&lt;a href="http://www.example.com"&gt;333&lt;/a&gt;</td>' in email_body
Loading