Skip to content

Commit

Permalink
chore: Use nh3 lib instead of bleach (#23862)
Browse files Browse the repository at this point in the history
  • Loading branch information
EugeneTorap authored and eschutho committed Dec 2, 2023
1 parent dabf7cb commit b258f0b
Show file tree
Hide file tree
Showing 10 changed files with 46 additions and 37 deletions.
8 changes: 2 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@ bcrypt==4.0.1
# via paramiko
billiard==3.6.4.0
# via celery
bleach==3.3.1
# via apache-superset
brotli==1.0.9
# via flask-compress
cachelib==0.4.1
Expand Down Expand Up @@ -171,14 +169,15 @@ marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
msgpack==1.0.2
# via apache-superset
nh3==0.2.11
# via apache-superset
numpy==1.23.5
# via
# apache-superset
# pandas
# pyarrow
packaging==21.3
# via
# bleach
# deprecation
pandas==1.5.3
# via apache-superset
Expand Down Expand Up @@ -249,7 +248,6 @@ simplejson==3.17.3
# via apache-superset
six==1.16.0
# via
# bleach
# click-repl
# isodate
# jsonschema
Expand Down Expand Up @@ -294,8 +292,6 @@ vine==5.0.0
# kombu
wcwidth==0.2.5
# via prompt-toolkit
webencodings==0.5.1
# via bleach
werkzeug==2.3.3
# via
# apache-superset
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,sqlalchemy_bigquery,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
known_third_party =alembic,apispec,backoff,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,nh3,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources,polyline,prison,progress,pyarrow,sqlalchemy_bigquery,pyhive,pyparsing,pytest,pytest_mock,pytz,redis,requests,selenium,setuptools,simplejson,slack,sqlalchemy,sqlalchemy_utils,sqlparse,typing_extensions,urllib3,werkzeug,wtforms,wtforms_json,yaml
multi_line_output = 3
order_by_type = false

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def get_git_sha() -> str:
},
install_requires=[
"backoff>=1.8.0",
"bleach>=3.0.2, <4.0.0",
"cachelib>=0.4.1,<0.5",
"celery>=5.2.2, <6.0.0",
"click>=8.0.3",
Expand All @@ -100,6 +99,7 @@ def get_git_sha() -> str:
"isodate",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
"nh3>=0.2.11, <0.3",
"numpy==1.23.5",
"pandas>=1.5.3, <1.6",
"parsedatetime",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,10 +486,10 @@ class Chart extends React.Component {

{/*
This usage of dangerouslySetInnerHTML is safe since it is being used to render
markdown that is sanitized with bleach. See:
markdown that is sanitized with nh3. See:
https://github.com/apache/superset/pull/4390
and
https://github.com/apache/superset/commit/b6fcc22d5a2cb7a5e92599ed5795a0169385a825
https://github.com/apache/superset/pull/23862
*/}
{isExpanded && slice.description_markeddown && (
<div
Expand Down
27 changes: 15 additions & 12 deletions superset/reports/notifications/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from email.utils import make_msgid, parseaddr
from typing import Any, Dict, Optional

import bleach
import nh3
from flask_babel import gettext as __

from superset import app
Expand All @@ -35,10 +35,10 @@

logger = logging.getLogger(__name__)

TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"]
TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"]
TABLE_TAGS = {"table", "th", "tr", "td", "thead", "tbody", "tfoot"}
TABLE_ATTRIBUTES = {"colspan", "rowspan", "halign", "border", "class"}

ALLOWED_TAGS = [
ALLOWED_TAGS = {
"a",
"abbr",
"acronym",
Expand All @@ -54,13 +54,14 @@
"p",
"strong",
"ul",
] + TABLE_TAGS
}.union(TABLE_TAGS)

ALLOWED_TABLE_ATTRIBUTES = {tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS}
ALLOWED_ATTRIBUTES = {
"a": ["href", "title"],
"abbr": ["title"],
"acronym": ["title"],
**{tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS},
"a": {"href", "title"},
"abbr": {"title"},
"acronym": {"title"},
**ALLOWED_TABLE_ATTRIBUTES,
}


Expand Down Expand Up @@ -108,7 +109,8 @@ def _get_content(self) -> EmailContent:
}

# Strip any malicious HTML from the description
description = bleach.clean(
# pylint: disable=no-member
description = nh3.clean(
self._content.description or "",
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
Expand All @@ -117,12 +119,13 @@ def _get_content(self) -> EmailContent:
# Strip malicious HTML from embedded data, allowing only table elements
if self._content.embedded_data is not None:
df = self._content.embedded_data
html_table = bleach.clean(
# pylint: disable=no-member
html_table = nh3.clean(
df.to_html(na_rep="", index=True, escape=True),
# pandas will escape the HTML in cells already, so passing
# more allowed tags here will not work
tags=TABLE_TAGS,
attributes=TABLE_ATTRIBUTES,
attributes=ALLOWED_TABLE_ATTRIBUTES,
)
else:
html_table = ""
Expand Down
2 changes: 1 addition & 1 deletion superset/sqllab/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class SqlLabException(SupersetException):
failed_reason_msg: str
suggestion_help_msg: Optional[str]

def __init__(
def __init__( # pylint: disable=too-many-arguments
self,
sql_json_execution_context: SqlJsonExecutionContext,
error_type: Optional[SupersetErrorType] = None,
Expand Down
12 changes: 8 additions & 4 deletions superset/tasks/async_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,10 @@ def load_chart_data_into_cache(
except Exception as ex:
# TODO: QueryContext should support SIP-40 style errors
error = (
ex.message if hasattr(ex, "message") else str(ex)
) # pylint: disable=no-member
ex.message # pylint: disable=no-member
if hasattr(ex, "message")
else str(ex)
)
errors = [{"message": error}]
async_query_manager.update_job(
job_metadata, async_query_manager.STATUS_ERROR, errors=errors
Expand Down Expand Up @@ -160,8 +162,10 @@ def load_explore_json_into_cache( # pylint: disable=too-many-locals
errors = ex.errors # pylint: disable=no-member
else:
error = (
ex.message if hasattr(ex, "message") else str(ex)
) # pylint: disable=no-member
ex.message # pylint: disable=no-member
if hasattr(ex, "message")
else str(ex)
)
errors = [error]

async_query_manager.update_job(
Expand Down
4 changes: 2 additions & 2 deletions superset/utils/async_query_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,5 +192,5 @@ def update_job(
logger.debug("********** logging event data to stream %s", scoped_stream_name)
logger.debug(event_data)

self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit)
self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose)
self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) # type: ignore
self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) # type: ignore

Check warning on line 196 in superset/utils/async_query_manager.py

View check run for this annotation

Codecov / codecov/patch

superset/utils/async_query_manager.py#L195-L196

Added lines #L195 - L196 were not covered by tests
17 changes: 10 additions & 7 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@
from urllib.parse import unquote_plus
from zipfile import ZipFile

import bleach
import markdown as md
import nh3
import numpy as np
import pandas as pd
import sqlalchemy as sa
Expand Down Expand Up @@ -664,7 +664,7 @@ def error_msg_from_exception(ex: Exception) -> str:


def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
safe_markdown_tags = [
safe_markdown_tags = {
"h1",
"h2",
"h3",
Expand All @@ -690,10 +690,10 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"dt",
"img",
"a",
]
}
safe_markdown_attrs = {
"img": ["src", "alt", "title"],
"a": ["href", "alt", "title"],
"img": {"src", "alt", "title"},
"a": {"href", "alt", "title"},
}
safe = md.markdown(
raw or "",
Expand All @@ -703,7 +703,8 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"markdown.extensions.codehilite",
],
)
safe = bleach.clean(safe, safe_markdown_tags, safe_markdown_attrs)
# pylint: disable=no-member
safe = nh3.clean(safe, tags=safe_markdown_tags, attributes=safe_markdown_attrs)
if markup_wrap:
safe = Markup(safe)
return safe
Expand Down Expand Up @@ -1149,7 +1150,9 @@ def merge_extra_form_data(form_data: Dict[str, Any]) -> None:
append_adhoc_filters: List[AdhocFilterClause] = extra_form_data.get(
"adhoc_filters", []
)
adhoc_filters.extend({"isExtra": True, **fltr} for fltr in append_adhoc_filters) # type: ignore
adhoc_filters.extend(
{"isExtra": True, **fltr} for fltr in append_adhoc_filters # type: ignore
)
if append_filters:
for key, value in form_data.items():
if re.match("adhoc_filter.*", key):
Expand Down
5 changes: 4 additions & 1 deletion tests/unit_tests/notifications/email_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,8 @@ def test_render_description_with_html() -> None:
._get_content()
.body
)
assert '<p>This is <a href="#">a test</a> alert</p><br>' in email_body
assert (
'<p>This is <a href="#" rel="noopener noreferrer">a test</a> alert</p><br>'
in email_body
)
assert '<td>&lt;a href="http://www.example.com"&gt;333&lt;/a&gt;</td>' in email_body

0 comments on commit b258f0b

Please sign in to comment.