Permalink
Browse files

r3610@pinealgland: sven | 2006-04-02 19:46:06 +0200

 added htree cleaning of html if htree is available


git-svn-id: svn://78.47.249.61/ruby-mediawiki/trunk@55 ba9c31aa-a806-0410-9a81-9f13d15ee83b
  • Loading branch information...
1 parent dc45b40 commit af22ff7f80f0b8e5c557cb76a50034d6a966489a sven committed Apr 2, 2006
Showing with 21 additions and 5 deletions.
  1. +20 −4 lib/mediawiki/article.rb
  2. +1 −1 lib/mediawiki/specialpage.rb
@@ -1,4 +1,9 @@
-require 'rexml/document'
+begin
+ require 'htree'
+rescue LoadError
+ STDERR.puts( 'htree library missing. Cannot sanitize HTML.' )
+ require 'rexml/document'
+end
module MediaWiki
##
@@ -57,7 +62,7 @@ def xhtml
# will be automatically done by Article#xhtml if not already cached.
def xhtml_reload
html = @wiki.browser.get_content("#{@wiki.article_url(full_name, @section)}")
- @xhtml = REXML::Document.new(html).root
+ @xhtml = to_rexml( html )
@xhtml_cached = true
end
@@ -71,7 +76,7 @@ def reload
end
def parse(html)
- doc = REXML::Document.new(html).root
+ doc = to_rexml( html )
# does not work for MediaWiki 1.4.x and is always the same name you ask for under 1.5.x
# @name = doc.elements['//span[@class="editHelp"]/a'].attributes['title']
form = doc.elements['//form[@name="editform"]']
@@ -149,12 +154,23 @@ def unprotect(reason)
# result:: [Array] of [String] Article names
def what_links_here
res = []
- links = REXML::Document.new(@wiki.browser.get_content(@wiki.article_url("Spezial:Whatlinkshere/#{full_name}"))).root
+ links = to_rexml(@wiki.browser.get_content(@wiki.article_url("Spezial:Whatlinkshere/#{full_name}")))
links.each_element('//div[@id="bodyContent"]//ul/li/a') { |a|
res << a.attributes['title']
}
res
end
+
+ protected
+ def to_rexml( html )
+ if $".member?( 'htree.rb' )
+ rexml = HTree( html ).to_rexml
+ else
+ rexml = REXML::Document.new( html )
+ end
+ rexml.root
+ end
+
end
end
@@ -11,7 +11,7 @@ class SpecialPage < Article
def xhtml_reload
html = @wiki.browser.get_content("#{@wiki.article_url(@name, @section)}")
html.scan(/<!-- start content -->(.+)<!-- end content -->/m) { |content,|
- @xhtml = REXML::Document.new("<xhtml>#{content}</xhtml>").root
+ @xhtml = to_rexml( "<xhtml>#{content}</xhtml>" )
}
@xhtml_cached = true
end

0 comments on commit af22ff7

Please sign in to comment.