Skip to content

Commit

Permalink
Release #302 and #313.
Browse files Browse the repository at this point in the history
  • Loading branch information
danielballan committed Nov 8, 2018
2 parents 6928782 + 541ff72 commit 446ae83
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 18 deletions.
25 changes: 23 additions & 2 deletions web_monitoring/diffing_server.py
Expand Up @@ -307,16 +307,37 @@ def _extract_encoding(headers, content):
prolog_match = XML_PROLOG_PATTERN.search(content, endpos=2048)
if prolog_match:
encoding = prolog_match.group(1).decode('ascii', errors='ignore')
# Handle common mistakes and errors in encoding names
if encoding == 'iso-8559-1':
encoding = 'iso-8859-1'
# Windows-1252 is so commonly mislabeled, WHATWG recommends assuming it's a
# mistake: https://encoding.spec.whatwg.org/#names-and-labels
if encoding == 'iso-8859-1' and 'html' in content_type:
encoding = 'windows-1252'
return encoding


def _decode_body(response, name, raise_if_binary=True):
encoding = _extract_encoding(response.headers, response.body) or 'UTF-8'
text = response.body.decode(encoding, errors='replace')
try:
text = response.body.decode(encoding, errors='replace')
except LookupError:
# If the encoding we found isn't known, fall back to ascii
text = response.body.decode('ascii', errors='replace')

text_length = len(text)
if text_length == 0:
return text

# Replace null terminators; some differs (especially those written in C)
# don't handle them well in the middle of a string.
text = text.replace('\u0000', '\ufffd')

# If a significantly large portion of the document was totally undecodable,
# it's likely this wasn't text at all, but binary data.
if raise_if_binary and text.count('\ufffd') / len(text) > 0.25:
if raise_if_binary and text.count('\ufffd') / text_length > 0.25:
raise UndecodableContentError(f'The response body of `{name}` could not be decoded as {encoding}.')

return text


Expand Down
27 changes: 15 additions & 12 deletions web_monitoring/html_diff_render.py
Expand Up @@ -299,10 +299,13 @@ def html_diff_render(a_text, b_text, a_headers=None, b_headers=None,

color_palette = get_color_palette()
change_styles.string = f'''
ins, ins > * {{text-decoration: none; background-color:
{color_palette['differ_insertion']};}}
del, del > * {{text-decoration: none; background-color:
{color_palette['differ_deletion']};}}'''
ins.wm-diff, ins.wm-diff > * {{background-color:
{color_palette['differ_insertion']} !important;
all: unset;}}
del.wm-diff, del.wm-diff > * {{background-color:
{color_palette['differ_deletion']} !important;
all: unset;}}
script {{display: none !important;}}'''
soup.head.append(change_styles)

soup.body.replace_with(diff_body)
Expand Down Expand Up @@ -351,9 +354,9 @@ def _html_for_dmp_operation(operation):
"Convert a diff-match-patch operation to an HTML string."
html_value = html.escape(operation[1])
if operation[0] == -1:
return f'<del>{html_value}</del>'
return f'<del class="wm-diff">{html_value}</del>'
elif operation[0] == 1:
return f'<ins>{html_value}</ins>'
return f'<ins class="wm-diff">{html_value}</ins>'
else:
return html_value

Expand Down Expand Up @@ -1118,7 +1121,7 @@ def merge_changes(change_chunks, doc, tag_type='ins'):

doc.append(f'</{tag_type}>')
doc.append(chunk)
doc.append(f'<{tag_type}>')
doc.append(f'<{tag_type} class="wm-diff">')

# other side of the malformed document case from above
current_content.reverse()
Expand All @@ -1144,7 +1147,7 @@ def merge_changes(change_chunks, doc, tag_type='ins'):
inline_tag_name = name

if depth == 0:
doc.append(f'<{tag_type}>')
doc.append(f'<{tag_type} class="wm-diff">')
depth += 1
current_content = []

Expand Down Expand Up @@ -1389,7 +1392,7 @@ def merge_change_groups(change_chunks, doc, tag_type=None):
# doc.append(group)
# <end> not sure if we should break the group
if tag_type:
group.append(f'<{tag_type}>')
group.append(f'<{tag_type} class="wm-diff">')

# other side of the malformed document case from above
current_content.reverse()
Expand Down Expand Up @@ -1420,7 +1423,7 @@ def merge_change_groups(change_chunks, doc, tag_type=None):
group = []
doc.append(group)
if tag_type:
group.append(f'<{tag_type}>')
group.append(f'<{tag_type} class="wm-diff">')
depth += 1
current_content = []

Expand Down Expand Up @@ -1582,7 +1585,7 @@ def tag_info(token):
# if we have a hanging delete buffer (with content, not just HTML
# DOM structure), clean it up and insert it before moving on.
# FIXME: this should not look explicitly for `<del>`
if '<del>' in delete_buffer:
if '<del class="wm-diff">' in delete_buffer:
for tag in delete_tag_stack:
delete_buffer.append(f'</{tag[0]}>')
document.extend(delete_buffer)
Expand Down Expand Up @@ -1627,7 +1630,7 @@ def tag_info(token):
# Add any hanging buffer of deletes that never got completed, but only if
# it has salient changes in it.
# FIXME: this should not look explicitly for `<del>`
if '<del>' in delete_buffer:
if '<del class="wm-diff">' in delete_buffer:
for tag in delete_tag_stack:
delete_buffer.append(f'</{tag[0]}>')
document.extend(delete_buffer)
Expand Down
9 changes: 5 additions & 4 deletions web_monitoring/links_diff.py
Expand Up @@ -125,10 +125,11 @@ def links_diff_html(a_text, b_text, a_headers=None, b_headers=None,
background-color: {color_palette['differ_insertion']};}}
[wm-deleted] > td {{
background-color: {color_palette['differ_deletion']};}}
ins {{ text-decoration: none;
background-color: {color_palette['differ_insertion']};}}
del {{ text-decoration: none;
background-color: {color_palette['differ_deletion']};}}"""
ins.wm-diff {{ background-color: {color_palette['differ_insertion']}
!important; all: unset;}}
del.wm-diff {{ background-color: {color_palette['differ_deletion']}
!important; all: unset;}}
script {{display: none !important;}}"""

soup.head.append(change_styles)
soup.title.string = get_title(diff['b_parsed'])
Expand Down
Empty file.
Binary file added web_monitoring/tests/fixtures/has_null_byte.txt
Binary file not shown.
25 changes: 25 additions & 0 deletions web_monitoring/tests/fixtures/unknown_encoding.html
@@ -0,0 +1,25 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<html>
<head>
<meta http-equiv="refresh" content="60; url=http://radar2pub.bldr.ncep.noaa.gov">
<meta http-equiv="Content-Type" content="text/html; charset=who-knows-what-i-am">
<meta name="Author" content="David S Alden (GMSI)">
<title>NWS Level II Radar Receive Status</title>
<link rel="stylesheet" type="text/css" href="mon.css">
</head>
<body>
<font face="arial,helvetica" size=+2><b>NWS Level II Radar Receive Status as of Tue Jan 17 14:14:06 UTC 2017</b></font><br>
<br>Key: Green=Up (Lvl2&lt;5 min); Yellow=Warning (5&lt;=Lvl2&lt;30 min); Orange=Down (Lvl2&amp;Lvl3&gt;10 min); Red=Down (Lvl2&gt;=30 min)<br>
SiteID: Black=Ok (-1&lt;=Latency&lt;=60 sec); White=Anomaly (-1&gt;Latency&gt;60 sec)<br>
SiteCodes: 01=Legacy Msg1; 02=Legacy Msg31; 03=Super-Res; 04=Recombined; 05=DP w/o SuperRes; 06=DP w Super Res; 07=DP Recombined<br><br>
<p><b><font face="arial,helvetica">Eastern Region Radar Sites - Last receipt of data</font></b>
<table BORDER=0 CELLSPACING=4><tr>
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Wakefield VA" VALIGN=middle><b><A HREF="site/kakq.html" TARGET="_blank">KAKQ</a></b><span class=black>06<br>14:13:02</span></td>
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Binghamton NY" VALIGN=middle><b><A HREF="site/kbgm.html" TARGET="_blank">KBGM</a></b><span class=black>06<br>14:13:03</span></td>
<td ALIGN=CENTER BGCOLOR="#FF0000" class=black id=blacklink TITLE="Boston MA" VALIGN=middle><b><A HREF="site/kbox.html" TARGET="_blank">KBOX</a></b><span class=black>06<br>13:12:47</span></td>
<td ALIGN=CENTER BGCOLOR="#33FF33" class=black id=blacklink TITLE="Buffalo NY" VALIGN=middle><b><A HREF="site/kbuf.html" TARGET="_blank">KBUF</a></b><span class=black>06<br>14:13:04</span></td>
</tr></table>
<font face="Arial,Helvetica">44 sites up (97.8%) of 45 radar sites monitored</font>
<p><font face="arial,helvetica"> 151 sites up (96%) of 158 total radar sites monitored</font>
</body>
</html>
14 changes: 14 additions & 0 deletions web_monitoring/tests/test_diffing_server_exc_handling.py
Expand Up @@ -188,6 +188,10 @@ def test_cors_origin_header(self):
'Origin': 'http://two.com'})
assert response.headers.get('Access-Control-Allow-Origin') == 'http://two.com'

def test_decode_empty_bodies(self):
response = mock_tornado_request('empty.txt')
df._decode_body(response, 'a')

def test_poorly_encoded_content(self):
response = mock_tornado_request('poorly_encoded_utf8.txt')
df._decode_body(response, 'a')
Expand All @@ -204,6 +208,16 @@ def test_fetch_undecodable_content(self):
self.json_check(response)
assert response.code == 422

def test_treats_unknown_encoding_as_ascii(self):
response = mock_tornado_request('unknown_encoding.html')
df._decode_body(response, 'a')

def test_diff_content_with_null_bytes(self):
response = self.fetch('/html_source_dmp?format=json&'
f'a=file://{fixture_path("has_null_byte.txt")}&'
f'b=file://{fixture_path("has_null_byte.txt")}')
assert response.code == 200


def mock_diffing_method(c_body):
return
Expand Down

0 comments on commit 446ae83

Please sign in to comment.