Skip to content

Commit

Permalink
[AIRFLOW-3768] Escape search parameter in pagination controls
Browse files Browse the repository at this point in the history
The "minidom" we were using from lxml didn't cope with the > etc
entities (because it is an XML parser, not an HTML parser): rather than
special casing each one I have instead swapped out lxml-based parser for
BeautifulSoup which 1) handles these, and 2) is pure-python so is easier
to install :)
  • Loading branch information
ashb committed Mar 15, 2019
1 parent a3889c8 commit a7d8fe6
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 52 deletions.
45 changes: 20 additions & 25 deletions airflow/www/utils.py
Expand Up @@ -23,7 +23,6 @@
import inspect
import json
import time
import wtforms
import markdown
import re
import zipfile
Expand All @@ -39,6 +38,8 @@
from flask_appbuilder.models.sqla.interface import SQLAInterface
import flask_appbuilder.models.sqla.filters as fab_sqlafilters
import sqlalchemy as sqla
from six.moves.urllib.parse import urlencode

from airflow import configuration
from airflow.models import BaseOperator
from airflow.operators.subdag_operator import SubDagOperator
Expand Down Expand Up @@ -68,17 +69,11 @@ def should_hide_value_for_key(key_name):


def get_params(**kwargs):
params = []
for k, v in kwargs.items():
if k == 'showPaused':
# True is default or None
if v or v is None:
continue
params.append('{}={}'.format(k, v))
elif v:
params.append('{}={}'.format(k, v))
params = sorted(params, key=lambda x: x.split('=')[0])
return '&'.join(params)
if 'showPaused' in kwargs:
v = kwargs['showPaused']
if v or v is None:
kwargs.pop('showPaused')
return urlencode({d: v if v is not None else '' for d, v in kwargs.items()})


def generate_pages(current_page, num_of_pages,
Expand Down Expand Up @@ -109,27 +104,27 @@ def generate_pages(current_page, num_of_pages,
"""

void_link = 'javascript:void(0)'
first_node = """<li class="paginate_button {disabled}" id="dags_first">
first_node = Markup("""<li class="paginate_button {disabled}" id="dags_first">
<a href="{href_link}" aria-controls="dags" data-dt-idx="0" tabindex="0">&laquo;</a>
</li>"""
</li>""")

previous_node = """<li class="paginate_button previous {disabled}" id="dags_previous">
previous_node = Markup("""<li class="paginate_button previous {disabled}" id="dags_previous">
<a href="{href_link}" aria-controls="dags" data-dt-idx="0" tabindex="0">&lt;</a>
</li>"""
</li>""")

next_node = """<li class="paginate_button next {disabled}" id="dags_next">
next_node = Markup("""<li class="paginate_button next {disabled}" id="dags_next">
<a href="{href_link}" aria-controls="dags" data-dt-idx="3" tabindex="0">&gt;</a>
</li>"""
</li>""")

last_node = """<li class="paginate_button {disabled}" id="dags_last">
last_node = Markup("""<li class="paginate_button {disabled}" id="dags_last">
<a href="{href_link}" aria-controls="dags" data-dt-idx="3" tabindex="0">&raquo;</a>
</li>"""
</li>""")

page_node = """<li class="paginate_button {is_active}">
page_node = Markup("""<li class="paginate_button {is_active}">
<a href="{href_link}" aria-controls="dags" data-dt-idx="2" tabindex="0">{page_num}</a>
</li>"""
</li>""")

output = ['<ul class="pagination" style="margin-top:0px;">']
output = [Markup('<ul class="pagination" style="margin-top:0px;">')]

is_disabled = 'disabled' if current_page <= 0 else ''
output.append(first_node.format(href_link="?{}"
Expand Down Expand Up @@ -185,9 +180,9 @@ def is_current(current, page):
showPaused=showPaused)),
disabled=is_disabled))

output.append('</ul>')
output.append(Markup('</ul>'))

return wtforms.widgets.core.HTMLString('\n'.join(output))
return Markup('\n'.join(output))


def epoch(dttm):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -235,10 +235,10 @@ def write_version(filename=os.path.join(*['airflow',
+ cassandra + mongo

devel = [
'beautifulsoup4~=4.7.1',
'click==6.7',
'freezegun',
'jira',
'lxml>=4.0.0',
'mock',
'mongomock',
'moto==1.3.5',
Expand Down
78 changes: 52 additions & 26 deletions tests/www/test_utils.py
Expand Up @@ -18,12 +18,20 @@
# under the License.

from datetime import datetime
import unittest

from bs4 import BeautifulSoup
import mock
from xml.dom import minidom
import six
from six.moves.urllib.parse import parse_qs

from airflow.www import utils

if six.PY2:
# Need `assertRegex` back-ported from unittest2
import unittest2 as unittest
else:
import unittest


class UtilsTest(unittest.TestCase):
def test_empty_variable_should_not_be_hidden(self):
Expand All @@ -42,39 +50,43 @@ def test_sensitive_variable_should_be_hidden_ic(self):
def check_generate_pages_html(self, current_page, total_pages,
window=7, check_middle=False):
extra_links = 4 # first, prev, next, last
html_str = utils.generate_pages(current_page, total_pages)
search = "'>\"/><img src=x onerror=alert(1)>"
html_str = utils.generate_pages(current_page, total_pages,
search=search)

self.assertNotIn(search, html_str,
"The raw search string shouldn't appear in the output")
self.assertIn('search=%27%3E%22%2F%3E%3Cimg+src%3Dx+onerror%3Dalert%281%29%3E',
html_str)

self.assertTrue(
callable(html_str.__html__),
"Should return something that is HTML-escaping aware"
)

# dom parser has issues with special &laquo; and &raquo;
html_str = html_str.replace('&laquo;', '')
html_str = html_str.replace('&raquo;', '')
dom = minidom.parseString(html_str)
dom = BeautifulSoup(html_str, 'html.parser')
self.assertIsNotNone(dom)

ulist = dom.getElementsByTagName('ul')[0]
ulist_items = ulist.getElementsByTagName('li')
ulist = dom.ul
ulist_items = ulist.find_all('li')
self.assertEqual(min(window, total_pages) + extra_links, len(ulist_items))

def get_text(nodelist):
rc = []
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc.append(node.data)
return ''.join(rc)

page_items = ulist_items[2:-2]
mid = int(len(page_items) / 2)
for i, item in enumerate(page_items):
a_node = item.getElementsByTagName('a')[0]
href_link = a_node.getAttribute('href')
node_text = get_text(a_node.childNodes)
a_node = item.a
href_link = a_node['href']
node_text = a_node.string
if node_text == str(current_page + 1):
if check_middle:
self.assertEqual(mid, i)
self.assertEqual('javascript:void(0)', a_node.getAttribute('href'))
self.assertIn('active', item.getAttribute('class'))
self.assertEqual('javascript:void(0)', href_link)
self.assertIn('active', item['class'])
else:
link_str = '?page=' + str(int(node_text) - 1)
self.assertEqual(link_str, href_link)
self.assertRegex(href_link, r'^\?', 'Link is page-relative')
query = parse_qs(href_link[1:])
self.assertListEqual(query['page'], [str(int(node_text) - 1)])
self.assertListEqual(query['search'], [search])

def test_generate_pager_current_start(self):
self.check_generate_pages_html(current_page=0,
Expand Down Expand Up @@ -106,10 +118,24 @@ def test_params_showPaused_false(self):
self.assertEqual('showPaused=False',
utils.get_params(showPaused=False))

def test_params_none_and_zero(self):
qs = utils.get_params(a=0, b=None)
# The order won't be consistent, but that doesn't affect behaviour of a browser
pairs = list(sorted(qs.split('&')))
self.assertListEqual(['a=0', 'b='], pairs)

def test_params_all(self):
"""Should return params string ordered by param key"""
self.assertEqual('page=3&search=bash_&showPaused=False',
utils.get_params(showPaused=False, page=3, search='bash_'))
query = utils.get_params(showPaused=False, page=3, search='bash_')
self.assertEqual(
{'page': ['3'],
'search': ['bash_'],
'showPaused': ['False']},
parse_qs(query)
)

def test_params_escape(self):
self.assertEqual('search=%27%3E%22%2F%3E%3Cimg+src%3Dx+onerror%3Dalert%281%29%3E',
utils.get_params(search="'>\"/><img src=x onerror=alert(1)>"))

def test_open_maybe_zipped_normal_file(self):
with mock.patch(
Expand Down

0 comments on commit a7d8fe6

Please sign in to comment.