Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

added mswordy test for whitewash fragment

  • Loading branch information...
commit cc48a05da89879c9f09d306fd34e4a704467d4b6 1 parent b380843
@flavorjones flavorjones authored
Showing with 73 additions and 68 deletions.
  1. +1 −1  lib/dryopteris/sanitize.rb
  2. +72 −67 test/test_basic.rb
View
2  lib/dryopteris/sanitize.rb
@@ -23,7 +23,7 @@ def whitewash(string, encoding=nil)
return nil if string.nil?
return "" if string.strip.size == 0
- string = "<html><body>" + string + "</body></html>"
+ string = "<html><body>" + string + "</body></html>"
doc = Nokogiri::HTML.parse(string, nil, encoding)
body = doc.xpath("/html/body").first
return "" if body.nil?
View
139 test/test_basic.rb
@@ -8,6 +8,72 @@
class TestBasic < Test::Unit::TestCase
+ MSWORD_HTML = <<-EOHTML
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
+<w:WordDocument>
+ <w:View>Normal</w:View>
+ <w:Zoom>0</w:Zoom>
+ <w:PunctuationKerning/>
+ <w:ValidateAgainstSchemas/>
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
+ <w:Compatibility>
+ <w:BreakWrappedTables/>
+ <w:SnapToGridInCell/>
+ <w:WrapTextWithPunct/>
+ <w:UseAsianBreakRules/>
+ <w:DontGrowAutofit/>
+ </w:Compatibility>
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
+</w:WordDocument>
+</xml><![endif]--><!--[if gte mso 9]><xml>
+<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
+</w:LatentStyles>
+</xml><![endif]--><style>
+<!--
+/* Style Definitions */
+p.MsoNormal, li.MsoNormal, div.MsoNormal
+{mso-style-parent:"";
+margin:0in;
+margin-bottom:.0001pt;
+mso-pagination:widow-orphan;
+font-size:12.0pt;
+font-family:"Times New Roman";
+mso-fareast-font-family:"Times New Roman";}
+@page Section1
+{size:8.5in 11.0in;
+margin:1.0in 1.25in 1.0in 1.25in;
+mso-header-margin:.5in;
+mso-footer-margin:.5in;
+mso-paper-source:0;}
+div.Section1
+{page:Section1;}
+-->
+</style><!--[if gte mso 10]>
+<style>
+/* Style Definitions */
+table.MsoNormalTable
+{mso-style-name:"Table Normal";
+mso-tstyle-rowband-size:0;
+mso-tstyle-colband-size:0;
+mso-style-noshow:yes;
+mso-style-parent:"";
+mso-padding-alt:0in 5.4pt 0in 5.4pt;
+mso-para-margin:0in;
+mso-para-margin-bottom:.0001pt;
+mso-pagination:widow-orphan;
+font-size:10.0pt;
+font-family:"Times New Roman";
+mso-ansi-language:#0400;
+mso-fareast-language:#0400;
+mso-bidi-language:#0400;}
+</style>
+<![endif]-->
+
+<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
+ EOHTML
+
def test_nil
assert_nil Dryopteris.sanitize(nil)
end
@@ -79,74 +145,13 @@ def test_whitewash_on_fragment
assert_equal "<p>safe</p><b>description</b>", whitewashed
end
- def test_whitewash_on_microsofty_markup
- html = <<-EOHTML
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
-<w:WordDocument>
- <w:View>Normal</w:View>
- <w:Zoom>0</w:Zoom>
- <w:PunctuationKerning/>
- <w:ValidateAgainstSchemas/>
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
- <w:Compatibility>
- <w:BreakWrappedTables/>
- <w:SnapToGridInCell/>
- <w:WrapTextWithPunct/>
- <w:UseAsianBreakRules/>
- <w:DontGrowAutofit/>
- </w:Compatibility>
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
-</w:WordDocument>
-</xml><![endif]--><!--[if gte mso 9]><xml>
-<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
-</w:LatentStyles>
-</xml><![endif]--><style>
-<!--
-/* Style Definitions */
-p.MsoNormal, li.MsoNormal, div.MsoNormal
-{mso-style-parent:"";
-margin:0in;
-margin-bottom:.0001pt;
-mso-pagination:widow-orphan;
-font-size:12.0pt;
-font-family:"Times New Roman";
-mso-fareast-font-family:"Times New Roman";}
-@page Section1
-{size:8.5in 11.0in;
-margin:1.0in 1.25in 1.0in 1.25in;
-mso-header-margin:.5in;
-mso-footer-margin:.5in;
-mso-paper-source:0;}
-div.Section1
-{page:Section1;}
--->
-</style><!--[if gte mso 10]>
-<style>
-/* Style Definitions */
-table.MsoNormalTable
-{mso-style-name:"Table Normal";
-mso-tstyle-rowband-size:0;
-mso-tstyle-colband-size:0;
-mso-style-noshow:yes;
-mso-style-parent:"";
-mso-padding-alt:0in 5.4pt 0in 5.4pt;
-mso-para-margin:0in;
-mso-para-margin-bottom:.0001pt;
-mso-pagination:widow-orphan;
-font-size:10.0pt;
-font-family:"Times New Roman";
-mso-ansi-language:#0400;
-mso-fareast-language:#0400;
-mso-bidi-language:#0400;}
-</style>
-<![endif]-->
-
-<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
- EOHTML
+ def test_whitewash_fragment_on_microsofty_markup
+ whitewashed = Dryopteris.whitewash(MSWORD_HTML.chomp)
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
+ end
- whitewashed = Dryopteris.whitewash_document(html)
+ def test_whitewash_on_microsofty_markup
+ whitewashed = Dryopteris.whitewash_document(MSWORD_HTML)
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed
end
Please sign in to comment.
Something went wrong with that request. Please try again.