Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

ScrAPI 2.0.0 adds support for Ruby 1.9.2 using Tidy FFI, thanks to

the great work of Christoph Lupprich.
  • Loading branch information...
commit 08f207ed740660bdf65730dd6bd3cb4df64e6d4b 1 parent 22d4901
@assaf authored
View
3  .gitignore
@@ -1 +1,2 @@
-pkg
+.bundle
+*.gem
View
4 CHANGELOG
@@ -1,3 +1,7 @@
+Version 2.0.0 (November 10, 2010)
+
+* Ruby 1.9.2 support using Tidy FFI, by Christoph Lupprich.
+
Version 1.2.1 (Upcoming)
* Added: Cheat sheets.
View
2  Gemfile
@@ -0,0 +1,2 @@
+source :rubygems
+gemspec
View
21 Gemfile.lock
@@ -0,0 +1,21 @@
+PATH
+ remote: .
+ specs:
+ scrapi (1.2.2)
+ tidy_ffi (>= 0.1.2)
+
+GEM
+ remote: http://rubygems.org/
+ specs:
+ ffi (0.6.3)
+ rake (>= 0.8.7)
+ rake (0.8.7)
+ tidy_ffi (0.1.3)
+ ffi (>= 0.3.5)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ scrapi!
+ tidy_ffi (>= 0.1.2)
View
10 README.rdoc
@@ -42,7 +42,13 @@ svn co http://labnotes.org/svn/public/ruby/scrapi
== Version of Ruby
-Currently ScrAPI does not run with Ruby 1.9.2, but with the dev versions of Ruby 1.9.3. This is due to a bug in Ruby's visibility context handling (see changelog #29578 and bug #3406 on the official Ruby page). Using the most recent dev version of Ruby is easy with RVM (http://rvm.beginrescueend.com/).
+ScrAPI 1.2.x tested with Ruby 1.8.6 and 1.8.7, but will not work on Ruby 1.9.x.
+
+ScrAPI 2.0.x switches to TidyFFI to runs on Ruby 1.9.2 and newer.
+
+Due to a bug in Ruby's visibility context handling (see changelog #29578 and bug
+#3406 on the official Ruby page), you need to declare all result attributes
+explicitly, using result method or attr_reader/_accessor.
== Using TIDY
@@ -90,4 +96,4 @@ HTML DOM extracted from Rails, Copyright (c) 2004 David Heinemeier Hansson. Unde
HTML parser by Takahiro Maebashi and Katsuyuki Komatsu, Ruby license.
http://www.jin.gr.jp/~nahi/Ruby/html-parser/README.html
-Porting to Ruby 1.9.x by Christoph Lupprich, http://lupprich.info
+Porting to Ruby 1.9.x by Christoph Lupprich, http://lupprich.info
View
27 Rakefile
@@ -3,7 +3,6 @@ require "rubygems"
require "rake"
require "rake/testtask"
require "rake/rdoctask"
-require "rake/gempackagetask"
spec = Gem::Specification.load(File.join(File.dirname(__FILE__), 'scrapi.gemspec'))
@@ -25,8 +24,28 @@ Rake::TestTask.new(:test) do |test|
test.pattern = "test/**/*_test.rb"
test.verbose = true
end
+task :default=>:test
-gem = Rake::GemPackageTask.new(spec) do |pkg|
- pkg.need_tar = true
- pkg.need_zip = true
+
+spec = Gem::Specification.load(Dir["*.gemspec"].first)
+
+desc "Build the Gem"
+task :build do
+ sh "gem build #{spec.name}.gemspec"
+end
+
+desc "Install #{spec.name} locally"
+task :install=>:build do
+ sudo = "sudo" unless File.writable?( Gem::ConfigMap[:bindir])
+ sh "#{sudo} gem install #{spec.name}-#{spec.version}.gem"
+end
+
+desc "Push new release to gemcutter and git tag"
+task :push=>["test", "build"] do
+ sh "git push"
+ puts "Tagging version #{spec.version} .."
+ sh "git tag v#{spec.version}"
+ sh "git push --tag"
+ puts "Building and pushing gem .."
+ sh "gem push #{spec.name}-#{spec.version}.gem"
end
View
5 scrapi.gemspec
@@ -1,6 +1,6 @@
Gem::Specification.new do |spec|
spec.name = 'scrapi'
- spec.version = '1.2.2'
+ spec.version = '2.0.0'
spec.summary = "scrAPI toolkit for Ruby. Uses CSS selectors to write easy, maintainable HTML scraping rules."
spec.description = <<-EOF
scrAPI is an HTML scraping toolkit for Ruby. It uses CSS selectors to write easy, maintainable scraping rules to select, extract and store data from HTML content.
@@ -14,9 +14,10 @@ EOF
spec.require_path = 'lib'
spec.autorequire = 'scrapi.rb'
spec.requirements << 'Tidy_ffi'
+ spec.required_ruby_version = '>= 1.9.1'
spec.has_rdoc = true
spec.rdoc_options << '--main' << 'README.rdoc' << '--title' << "scrAPI toolkit for Ruby" << '--line-numbers'
spec.extra_rdoc_files = ['README.rdoc']
- spec.add_dependency 'tidy_ffy', '>=0.1.2'
+ spec.add_dependency 'tidy_ffi', '>=0.1.2'
end
View
5 test/scraper_test.rb
@@ -287,6 +287,7 @@ def test_skip_from_extractor
scraper = new_scraper(html) do
process "#1", :this1=>:text
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -295,6 +296,7 @@ def test_skip_from_extractor
scraper = new_scraper(html) do
process "#1", :this1=>:text, :skip=>false
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -305,6 +307,7 @@ def test_skip_from_extractor
element
end
process "#1", :this2=>:text
+ attr_reader :this1, :this2
end
scraper.scrape
assert_equal "this", scraper.this1
@@ -563,6 +566,7 @@ def test_multi_value_extractors
process "h1", [:text, :kls]=>Scraper.define {
process "*", :text=>:text, :kls=>"@class"
}
+ attr_reader :text, :kls
end
result = scraper.scrape
assert "first", result.text
@@ -618,6 +622,7 @@ def test_accessors_from_extractor
scraper = new_scraper(DIVS_ST_ND) do
process_first "div", :div_id=>"@id", :div_text=>:text
+ attr_reader :div_id, :div_text
end
value = scraper.scrape
assert_equal "1", value.div_id
Please sign in to comment.
Something went wrong with that request. Please try again.