Skip to content

Commit

Permalink
Refactor testing to use unittest. 100% code coverage. Bugfixes.
Browse files Browse the repository at this point in the history
- HTMLSyntaxError now thrown when parsing ends before an attribute
value meets a quote
- fix: classes are now kept in the order that they were specified
- fix: styles generated no longer have extraneous whitespace
  • Loading branch information
dcollien committed Apr 28, 2015
1 parent 3d4b1ab commit cfb83ca
Show file tree
Hide file tree
Showing 18 changed files with 558 additions and 350 deletions.
22 changes: 5 additions & 17 deletions .gitignore
Expand Up @@ -3,23 +3,9 @@

*.pyc

FilterHTML.egg-info/PKG-INFO

FilterHTML.egg-info/SOURCES.txt

FilterHTML.egg-info/top_level.txt

FilterHTML.egg-info/dependency_links.txt

build/lib/FilterHTML.py

dist/FilterHTML-0.0.2-py2.7.egg

dist/FilterHTML-0.0.3-py2.7.egg

dist/FilterHTML-0.0.3.macosx-10.8-intel.exe

dist/FilterHTML-0.0.3.tar.gz
FilterHTML.egg-info/
build/
dist/

*.egg

Expand All @@ -28,3 +14,5 @@ dist/FilterHTML-0.0.3.tar.gz
*.exe

npm-debug.log

.coverage
16 changes: 8 additions & 8 deletions FilterHTML.py
Expand Up @@ -193,7 +193,7 @@

MEASUREMENT_MATCH = re.compile(r'^(-?\d+(px|cm|pt|em|ex|pc|mm|in)?|\d+%)$')

# states for navigating script tags (with pesky less-than "<" signs)
# states for navigating script tags (tag body contains "<" signs)
"""
data
skip-data
Expand Down Expand Up @@ -495,9 +495,6 @@ def __filter_attribute(self, tag_name):
value = self.__filter_value(tag_name, attribute_name)
if value is None:
is_allowed = False

elif is_allowed and None not in self.spec[tag_name][attribute_name]:
is_allowed = False

elif self.curr_char not in self.attr_chars and self.curr_char != '>':
self.__next() # skip invalid characters
Expand All @@ -517,6 +514,7 @@ def __filter_value(self, tag_name, attribute_name):

while self.__next() != quote:
if self.curr_char == '':
raise HTMLSyntaxError('Attribute quote not closed: <' + tag_name + ' ' + attribute_name + '>')
break

value_chars.append(self.curr_char)
Expand Down Expand Up @@ -562,7 +560,8 @@ def __purify_attribute(self, attribute_name, value, rules):
if not purified:
if attribute_name == "class" and isinstance(rules, list):
candidate_values = value.split(' ')
allowed_values = set()
allowed_values_set = set()
allowed_values = []

for candidate in candidate_values:
for rule in rules:
Expand All @@ -574,8 +573,9 @@ def __purify_attribute(self, attribute_name, value, rules):
elif candidate == rule:
new_class_value = candidate

if new_class_value:
allowed_values.add(new_class_value)
if new_class_value and new_class_value not in allowed_values_set:
allowed_values_set.add(new_class_value)
allowed_values.append(new_class_value)


value = ' '.join(allowed_values)
Expand Down Expand Up @@ -652,7 +652,7 @@ def purify_style(self, style, rules):
else:
return None

return ': '.join([name, value])
return ':'.join([name, value])

def purify_color(self, value):
value = value.lower()
Expand Down
2 changes: 2 additions & 0 deletions BootstrapSpec.py → examples/bootstrap-2.spec.py
@@ -1,3 +1,5 @@
import re

HTML_WHITELIST = {
'div': {
'class': [
Expand Down
2 changes: 1 addition & 1 deletion lib/Bootstrap3Spec.js → examples/bootstrap-3.spec.js
@@ -1,4 +1,4 @@
var HTML_SPEC = {
var HTML_WHITELIST = {
'span': {
'class': [
'pull-left',
Expand Down
17 changes: 8 additions & 9 deletions lib/FilterHTML.js
Expand Up @@ -582,6 +582,10 @@ var FilterHTML = (function() {

while (this.next() !== quote) {
if (this.curr_char === '') {
throw {
name: 'HTML Syntax Error',
message: 'Attribute quote not closed: <' + tag_name + ' ' + attribute_name + '>'
};
break;
}

Expand Down Expand Up @@ -630,6 +634,7 @@ var FilterHTML = (function() {
if (attribute_name === "class" && Object.prototype.toString.call(rules) == '[object Array]') {
candidate_values = value.split(' ');
allowed_values_set = {};
allowed_values = [];

for (i = 0; i != candidate_values.length; ++i) {
for (rule_index = 0; rule_index != rules.length; ++rule_index) {
Expand All @@ -642,18 +647,12 @@ var FilterHTML = (function() {
new_class_value = candidate_values[i];
}

if (new_class_value) {
if (new_class_value && !allowed_values_set[new_class_value]) {
allowed_values_set[new_class_value] = true;
allowed_values.push(new_class_value);
}
}
}

allowed_values = [];
for (new_class_value in allowed_values_set) {
if (allowed_values_set.hasOwnProperty(new_class_value)) {
allowed_values.push(new_class_value);
}
}

value = allowed_values.join(' ');
} else if (attribute_name === "style" && Object.prototype.toString.call(rules) == '[object Object]') {
Expand Down Expand Up @@ -745,7 +744,7 @@ var FilterHTML = (function() {
return null;
}

return name + ': ' + value;
return name + ':' + value;
};

HTMLFilter.prototype.purify_color = function(value) {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -2,7 +2,7 @@
"author": "David Collien",
"name": "filterhtml",
"description": "FilterHTML: A whitelisting HTML filter for Python and JavaScript",
"version": "0.3.5",
"version": "0.4.0",
"repository": {
"url": "https://github.com/dcollien/FilterHTML"
},
Expand Down

0 comments on commit cfb83ca

Please sign in to comment.