Permalink
Browse files

Initial checkin of gem based source

  • Loading branch information...
0 parents commit a7f6f131f10cb29aa062f614f85f341e9122a346 @UnderpantsGnome committed Mar 4, 2007
No changes.
No changes.
@@ -0,0 +1,11 @@
+Rakefile
+README.txt
+CHANGELOG.txt
+Manifest.txt
+setup.rb
+lib/hpricot_scrub/version.rb
+lib/hpricot_scrub.rb
+test/test_helper.rb
+test/scrubber_data.rb
+test/hpricot_scrub_test.rb
+examples/config.yml
@@ -0,0 +1,9 @@
+README for hpricot_scrub
+========================
+
+HpricotScrub is a wrapper around Hpricot that allows you to easily scrub HTML
+of tags and attributes you don't want in the final output.
+
+See examples/config.yml for a sample config file or
+
+http://underpantsgnome.com/2007/01/20/hpricot-scrub/
@@ -0,0 +1,54 @@
+require 'rubygems'
+require 'rake'
+require 'rake/clean'
+require 'rake/testtask'
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+require 'rake/rdoctask'
+require 'rake/contrib/rubyforgepublisher'
+require 'fileutils'
+require 'hoe'
+include FileUtils
+require File.join(File.dirname(__FILE__), 'lib', 'hpricot_scrub', 'version')
+
+AUTHOR = "UnderpantsGnome" # can also be an array of Authors
+EMAIL = "michael@underpantsgnome.com"
+DESCRIPTION = "Scrub HTML with Hpricot"
+GEM_NAME = "hpricot_scrub" # what ppl will type to install your gem
+RUBYFORGE_PROJECT = "hpricot_scrub" # The unix name for your project
+HOMEPATH = "http://trac.underpantsgnome.com/hpricot_scrub/"
+
+
+NAME = "hpricot_scrub"
+REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
+VERS = ENV['VERSION'] || (HpricotScrub::VERSION::STRING + (REV ? ".#{REV}" : ""))
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config']
+RDOC_OPTS = ['--quiet', '--title', "hpricot_scrub documentation",
+ "--opname", "index.html",
+ "--line-numbers",
+ "--main", "README",
+ "--inline-source"]
+
+class Hoe
+ def extra_deps
+ @extra_deps.reject { |x| Array(x).first == 'hoe' }
+ end
+end
+
+# Generate all the Rake tasks
+# Run 'rake -T' to see list of generated tasks (from gem root directory)
+hoe = Hoe.new(GEM_NAME, VERS) do |p|
+ p.author = AUTHOR
+ p.description = DESCRIPTION
+ p.email = EMAIL
+ p.summary = DESCRIPTION
+ p.url = HOMEPATH
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
+ p.test_globs = ["test/**/*_test.rb"]
+ p.clean_globs = CLEAN #An array of file patterns to delete on clean.
+
+ # == Optional
+ #p.changes - A description of the release's latest changes.
+ p.extra_deps = ['hpricot', '>= 0.5']
+ #p.spec_extras - A hash of extra values to set in the gemspec.
+end
@@ -0,0 +1,47 @@
+
+---
+ :allow_tags: # let these tags stay, but will strip attributes
+ - 'b'
+ - 'blockquote'
+ - 'br'
+ - 'div'
+ - 'h1'
+ - 'h2'
+ - 'h3'
+ - 'h4'
+ - 'h5'
+ - 'h6'
+ - 'hr'
+ - 'i'
+ - 'em'
+ - 'img'
+ - 'li'
+ - 'ol'
+ - 'p'
+ - 'pre'
+ - 'small'
+ - 'span'
+ - 'span'
+ - 'strike'
+ - 'strong'
+ - 'sub'
+ - 'sup'
+ - 'table'
+ - 'tbody'
+ - 'td'
+ - 'tfoot'
+ - 'thead'
+ - 'tr'
+ - 'u'
+ - 'ul'
+
+ :remove_tags: # completely removes everything between open and close tag
+ - 'form'
+ - 'script'
+
+ :allow_attributes: # let these attributes stay, strip all others
+ - 'src'
+ - 'font'
+ - 'alt'
+ - 'style'
+ - 'align'
@@ -0,0 +1 @@
+Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
@@ -0,0 +1,79 @@
+require 'rubygems'
+
+if defined?(Kernel::gem)
+ gem('hpricot', '>= 0.5')
+else
+ require_gem('hpricot', '>= 0.5')
+end
+
+require 'hpricot'
+
+module Hpricot
+ module Scrubable
+ def scrubable?
+ ! [Hpricot::Text, Hpricot::BogusETag].include?(self.class)
+ end
+ end
+
+ class Elements
+ def strip
+ each { |x| x.strip }
+ end
+
+ def strip_attributes(safe=[])
+ each { |x| x.strip_attributes(safe) }
+ end
+ end
+
+ class BaseEle
+ include Scrubable
+ end
+
+ class Elem
+ include Scrubable
+
+ def remove
+ parent.children.delete(self)
+ end
+
+ def strip
+ children.each { |c| c.strip if c.scrubable? }
+
+ if strip_removes?
+ remove
+ else
+ parent.replace_child self, Hpricot.make(inner_html) unless parent.nil?
+ end
+ end
+
+ def strip_attributes(safe=[])
+ attributes.each {|atr|
+ remove_attribute(atr[0]) unless safe.include?(atr[0])
+ } unless attributes.nil?
+ end
+
+ def strip_removes?
+ # I'm sure there are others that shuould be ripped instead of stripped
+ attributes && attributes['type'] =~ /script|css/
+ end
+ end
+
+ class Doc
+ def scrub(config={})
+ config = {
+ :remove_tags => [],
+ :allow_tags => [],
+ :allow_attributes => []
+ }.merge(config)
+
+ config[:remove_tags].each { |tag| (self/tag).remove }
+ config[:allow_tags].each { |tag|
+ (self/tag).strip_attributes(config[:allow_attributes])
+ }
+ children.reverse.each {|e|
+ e.strip if e.scrubable? && ! config[:allow_tags].include?(e.name)
+ }
+ self
+ end
+ end
+end
@@ -0,0 +1,9 @@
+module HpricotScrub #:nodoc:
+ module VERSION #:nodoc:
+ MAJOR = 0
+ MINOR = 1
+ TINY = 0
+
+ STRING = [MAJOR, MINOR, TINY].join('.')
+ end
+end
Oops, something went wrong.

0 comments on commit a7f6f13

Please sign in to comment.