Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

cleanup and finish gemifying

  • Loading branch information...
commit 6ce929e99bd81d219e9c239c46a7bcd6452b1246 1 parent a422918
@cantino cantino authored
View
2  README
@@ -6,4 +6,4 @@ http://lab.arc90.com/experiments/readability/
Given a html document, it pulls out the main body text and cleans it up.
-Ruby port by starrhorne and iterationlabs. Gemification by fizx.
+Ruby port by starrhorne, libc, and iterationlabs. Gemification by fizx.
View
10 Rakefile
@@ -5,11 +5,11 @@ begin
require 'jeweler'
Jeweler::Tasks.new do |gem|
gem.name = "ruby-readability"
- gem.summary = %Q{ruby-readability}
- gem.description = %Q{ruby-readability}
- gem.email = "kmaxwell@twitter.com"
- gem.homepage = "http://github.com/fizx/ruby-readability"
- gem.authors = ["Kyle Maxwell"]
+ gem.summary = %Q{Port of arc90's readability project to ruby}
+ gem.description = %Q{Port of arc90's readability project to ruby}
+ gem.email = "andrew@iterationlabs.com"
+ gem.homepage = "http://github.com/iterationlabs/ruby-readability"
+ gem.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
gem.add_development_dependency "rspec", ">= 1.2.9"
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
end
View
74 lib/readability_old.rb
@@ -1,74 +0,0 @@
-require 'rubygems'
-require 'nokogiri'
-
-module Readability
- class Document
-
- def initialize(input, options = {})
- @options = options
- @html = Nokogiri::HTML(input, nil, 'UTF-8')
- end
-
-
- def content
-
- # Get all parent elements containing a <p> tag
- @parents = @html.css("p").map { |p| p.parent }.compact.uniq
-
- sanitize(@parents.map { |p| [p, score(p)] }.max { |a, b| a[1] <=> b[1] }[0])
-
- end
-
- def score(parent)
- s = 0
-
- # Adjust score based on parent's "class" attribute
- s -= 50 if parent[:class] =~ /(comment|meta|footer|footnote)/i
- s += 25 if parent[:class] =~ /((^|\s)(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)(\s|$))/i
-
- # Adjust score based on parent id
- s -= 50 if parent[:id] =~ /(comment|meta|footer|footnote)/i
- s += 25 if parent[:id] =~ /^(post|hentry|entry[-]?(content|text|body)?|article[-]?(content|text|body)?)$/i
-
- # Adjust score based on # of <p> elements inside parent
- s += parent.css("p").size
-
- # Adjust score based on # of commas inside parent
- s += parent.text.count ","
-
- s
- end
-
- def sanitize(node)
-
- # Get rid of divs full of non-text items
- node.css("div").each do |el|
- counts = %w[p img li a embed].inject({}) { |m, kind| m[kind] = el.css(kind).length; m }
- el.remove if (el.text.count(",") < 10) && (counts["p"] == 0 || counts["embed"] > 0 || counts["a"] > counts["p"] || counts["li"] > counts["p"] || counts["img"] > counts["p"])
- end
-
- # We'll sanitize all elements using a whitelist
- whitelist = @options[:tags] || %w[div p]
-
- # Use a hash for speed (don't want to make a million calls to include?)
- whitelist = Hash[ whitelist.zip([true] * whitelist.size) ]
-
- ([node] + node.css("*")).each do |el|
-
- # If element is in whitelist, delete all its attributes
- if whitelist[el.node_name]
- el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
-
- # Otherwise, replace the element with its contents
- else
- el.swap(el.text)
- end
-
- end
-
- # Get rid of duplicate whitespace
- node.to_html.gsub(/[\r\n\f]+/, "\n" ).gsub(/[\t ]+/, " ").gsub(/&nbsp;/, " ")
- end
-
- end
-end
View
72 ruby-readability.gemspec
@@ -0,0 +1,72 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# -*- encoding: utf-8 -*-
+
+Gem::Specification.new do |s|
+ s.name = %q{ruby-readability}
+ s.version = "0.2.0"
+
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+ s.authors = ["Andrew Cantino", "starrhorne", "libc", "Kyle Maxwell"]
+ s.date = %q{2010-10-01}
+ s.default_executable = %q{readability}
+ s.description = %q{Port of arc90's readability project to ruby}
+ s.email = %q{andrew@iterationlabs.com}
+ s.executables = ["readability"]
+ s.extra_rdoc_files = [
+ "README"
+ ]
+ s.files = [
+ ".document",
+ ".gitignore",
+ "README",
+ "Rakefile",
+ "VERSION",
+ "bin/readability",
+ "lib/readability.rb",
+ "lib/readability_old.rb",
+ "ruby-readability.gemspec",
+ "spec/fixtures/cant_read.html",
+ "spec/fixtures/sample.html",
+ "spec/fixtures/samples/blogpost_with_links-fragments.rb",
+ "spec/fixtures/samples/blogpost_with_links.html",
+ "spec/fixtures/samples/channel4-1-fragments.rb",
+ "spec/fixtures/samples/channel4-1.html",
+ "spec/fixtures/samples/foxnews-india1-fragments.rb",
+ "spec/fixtures/samples/foxnews-india1.html",
+ "spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
+ "spec/fixtures/samples/globemail-ottawa-cuts.html",
+ "spec/fixtures/should_not_truncate.txt",
+ "spec/readability_spec.rb",
+ "spec/spec.opts",
+ "spec/spec_helper.rb"
+ ]
+ s.homepage = %q{http://github.com/iterationlabs/ruby-readability}
+ s.rdoc_options = ["--charset=UTF-8"]
+ s.require_paths = ["lib"]
+ s.rubygems_version = %q{1.3.7}
+ s.summary = %q{Port of arc90's readability project to ruby}
+ s.test_files = [
+ "spec/fixtures/samples/blogpost_with_links-fragments.rb",
+ "spec/fixtures/samples/channel4-1-fragments.rb",
+ "spec/fixtures/samples/foxnews-india1-fragments.rb",
+ "spec/fixtures/samples/globemail-ottawa-cuts-fragments.rb",
+ "spec/readability_spec.rb",
+ "spec/spec_helper.rb"
+ ]
+
+ if s.respond_to? :specification_version then
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+ s.specification_version = 3
+
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
+ else
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
+ end
+ else
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
+ end
+end
+
Please sign in to comment.
Something went wrong with that request. Please try again.