Skip to content

Commit

Permalink
add github_projects.rb example
Browse files Browse the repository at this point in the history
  • Loading branch information
flyerhzm committed Sep 12, 2009
1 parent 9c663d9 commit dd8ef2e
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions example/github_projects.rb
@@ -0,0 +1,19 @@
require 'rubygems'
require 'regexp_crawler'

crawler = RegexpCrawler::Crawler.new(
:start_page => "http://github.com/flyerhzm",
:continue_regexp => %r{<div class="title"><b><a href="(/flyerhzm/.*?/tree)">}m,
:capture_regexp => %r{<a href="http://github.com/flyerhzm/.*?/tree">(.*?)</a>.*<span id="repository_description".*?>(.*?)</span>.*(<div class="(?:wikistyle|plain)">.*?</div>)</div>}m,
:named_captures => ['title', 'description', 'body'],
:save_method => Proc.new do |result, page|
puts '============================='
puts page
puts result[:title]
puts result[:description]
puts result[:body][0..100] + "..."
end,
:need_parse => Proc.new do |page, response_body|
!response_body.index "Fork of"
end)
crawler.start

0 comments on commit dd8ef2e

Please sign in to comment.