Permalink
Browse files

ScrAPI 2.0.0 adds support for Ruby 1.9.2 using Tidy FFI, thanks to

the great work of Christoph Lupprich.
  • Loading branch information...
assaf committed Nov 10, 2010
1 parent 22d4901 commit 08f207ed740660bdf65730dd6bd3cb4df64e6d4b
Showing with 68 additions and 9 deletions.
  1. +2 −1 .gitignore
  2. +4 −0 CHANGELOG
  3. +2 −0 Gemfile
  4. +21 −0 Gemfile.lock
  5. +8 −2 README.rdoc
  6. +23 −4 Rakefile
  7. +3 −2 scrapi.gemspec
  8. +5 −0 test/scraper_test.rb
View
@@ -1 +1,2 @@
-pkg
+.bundle
+*.gem
View
@@ -1,3 +1,7 @@
+Version 2.0.0 (November 10, 2010)
+
+* Ruby 1.9.2 support using Tidy FFI, by Christoph Lupprich.
+
Version 1.2.1 (Upcoming)
* Added: Cheat sheets.
View
@@ -0,0 +1,2 @@
+source :rubygems
+gemspec
View
@@ -0,0 +1,21 @@
+PATH
+ remote: .
+ specs:
+ scrapi (1.2.2)
+ tidy_ffi (>= 0.1.2)
+
+GEM
+ remote: http://rubygems.org/
+ specs:
+ ffi (0.6.3)
+ rake (>= 0.8.7)
+ rake (0.8.7)
+ tidy_ffi (0.1.3)
+ ffi (>= 0.3.5)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ scrapi!
+ tidy_ffi (>= 0.1.2)
View
@@ -42,7 +42,13 @@ svn co http://labnotes.org/svn/public/ruby/scrapi
== Version of Ruby
-Currently ScrAPI does not run with Ruby 1.9.2, but with the dev versions of Ruby 1.9.3. This is due to a bug in Ruby's visibility context handling (see changelog #29578 and bug #3406 on the official Ruby page). Using the most recent dev version of Ruby is easy with RVM (http://rvm.beginrescueend.com/).
+ScrAPI 1.2.x tested with Ruby 1.8.6 and 1.8.7, but will not work on Ruby 1.9.x.
+
+ScrAPI 2.0.x switches to TidyFFI to runs on Ruby 1.9.2 and newer.
+
+Due to a bug in Ruby's visibility context handling (see changelog #29578 and bug
+#3406 on the official Ruby page), you need to declare all result attributes
+explicitly, using result method or attr_reader/_accessor.
== Using TIDY
@@ -90,4 +96,4 @@ HTML DOM extracted from Rails, Copyright (c) 2004 David Heinemeier Hansson. Unde
HTML parser by Takahiro Maebashi and Katsuyuki Komatsu, Ruby license.
http://www.jin.gr.jp/~nahi/Ruby/html-parser/README.html
-Porting to Ruby 1.9.x by Christoph Lupprich, http://lupprich.info
+Porting to Ruby 1.9.x by Christoph Lupprich, http://lupprich.info
View
@@ -3,7 +3,6 @@ require "rubygems"
require "rake"
require "rake/testtask"
require "rake/rdoctask"
-require "rake/gempackagetask"
spec = Gem::Specification.load(File.join(File.dirname(__FILE__), 'scrapi.gemspec'))
@@ -25,8 +24,28 @@ Rake::TestTask.new(:test) do |test|
test.pattern = "test/**/*_test.rb"
test.verbose = true
end
+task :default=>:test
-gem = Rake::GemPackageTask.new(spec) do |pkg|
- pkg.need_tar = true
- pkg.need_zip = true
+
+spec = Gem::Specification.load(Dir["*.gemspec"].first)
+
+desc "Build the Gem"
+task :build do
+ sh "gem build #{spec.name}.gemspec"
+end
+
+desc "Install #{spec.name} locally"
+task :install=>:build do
+ sudo = "sudo" unless File.writable?( Gem::ConfigMap[:bindir])
+ sh "#{sudo} gem install #{spec.name}-#{spec.version}.gem"
+end
+
+desc "Push new release to gemcutter and git tag"
+task :push=>["test", "build"] do
+ sh "git push"
+ puts "Tagging version #{spec.version} .."
+ sh "git tag v#{spec.version}"
+ sh "git push --tag"
+ puts "Building and pushing gem .."
+ sh "gem push #{spec.name}-#{spec.version}.gem"
end
View
@@ -1,6 +1,6 @@
Gem::Specification.new do |spec|
spec.name = 'scrapi'
- spec.version = '1.2.2'
+ spec.version = '2.0.0'
spec.summary = "scrAPI toolkit for Ruby. Uses CSS selectors to write easy, maintainable HTML scraping rules."
spec.description = <<-EOF
scrAPI is an HTML scraping toolkit for Ruby. It uses CSS selectors to write easy, maintainable scraping rules to select, extract and store data from HTML content.
@@ -14,9 +14,10 @@ EOF
spec.require_path = 'lib'
spec.autorequire = 'scrapi.rb'
spec.requirements << 'Tidy_ffi'
+ spec.required_ruby_version = '>= 1.9.1'
spec.has_rdoc = true
spec.rdoc_options << '--main' << 'README.rdoc' << '--title' << "scrAPI toolkit for Ruby" << '--line-numbers'
spec.extra_rdoc_files = ['README.rdoc']
- spec.add_dependency 'tidy_ffy', '>=0.1.2'
+ spec.add_dependency 'tidy_ffi', '>=0.1.2'
end
View
@@ -287,6 +287,7 @@ def test_skip_from_extractor
scraper = new_scraper(html) do
process "#1", :this1=>:text
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -295,6 +296,7 @@ def test_skip_from_extractor
scraper = new_scraper(html) do
process "#1", :this1=>:text, :skip=>false
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -305,6 +307,7 @@ def test_skip_from_extractor
element
end
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -563,6 +566,7 @@ def test_multi_value_extractors
process "h1", [:text, :kls]=>Scraper.define {
process "*", :text=>:text, :kls=>"@class"
}
+ attr_reader :text, :kls
end
result = scraper.scrape
assert "first", result.text
@@ -618,6 +622,7 @@ def test_accessors_from_extractor
scraper = new_scraper(DIVS_ST_ND) do
process_first "div", :div_id=>"@id", :div_text=>:text
+ attr_reader :div_id, :div_text
end
value = scraper.scrape
assert_equal "1", value.div_id

0 comments on commit 08f207e

Please sign in to comment.