Skip to content

Commit

Permalink
Switch to MongoDB
Browse files Browse the repository at this point in the history
  • Loading branch information
changs committed Oct 20, 2011
1 parent bcd9097 commit e789e6b
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Expand Up @@ -10,3 +10,5 @@ gem 'dm-sqlite-adapter' , "1.1.0"
gem 'haml'
gem 'json', "~> 1.4.6"
gem 'thin'
gem 'mongodb'
gem 'bson_ext'
8 changes: 8 additions & 0 deletions Gemfile.lock
Expand Up @@ -6,6 +6,8 @@ GEM
nokogiri (>= 1.3.0)
robots (>= 0.7.2)
bcrypt-ruby (2.1.4)
bson (1.4.1)
bson_ext (1.4.1)
daemons (1.1.4)
data_mapper (1.1.0)
dm-aggregates (= 1.1.0)
Expand Down Expand Up @@ -57,6 +59,10 @@ GEM
haml (3.1.3)
json (1.4.6)
mime-types (1.16)
mongo (1.4.1)
bson (= 1.4.1)
mongodb (0.0.13)
mongo (~> 1.3)
nokogiri (1.5.0)
rack (1.3.4)
rack-protection (1.1.4)
Expand All @@ -81,10 +87,12 @@ PLATFORMS

DEPENDENCIES
anemone
bson_ext
data_mapper (= 1.1.0)
dm-sqlite-adapter (= 1.1.0)
haml
json (~> 1.4.6)
mongodb
nokogiri
rack (= 1.3.4)
rest-client
Expand Down
9 changes: 5 additions & 4 deletions crawler/crawler.rb
Expand Up @@ -17,7 +17,7 @@ def meta_refresh?(page)
end

def send_to_server(server_url, params)
RestClient.post server_url,params.to_json, :content_type => :json, :accept => :json
RestClient.post server_url, params.to_json, :content_type => :json, :accept => :json
end

email_regex = /[\w+\-.]+@[a-z\d\-.]+\.[a-z]+/i
Expand Down Expand Up @@ -51,6 +51,7 @@ def send_to_server(server_url, params)
puts "Crawling on #{domain}"

Anemone.crawl(domain) do |anemone|
anemone.storage = Anemone::Storage.MongoDB
anemone.focus_crawl do |page|
page.links.select do |x|
x.to_s.downcase.include? domain.downcase
Expand Down Expand Up @@ -81,9 +82,9 @@ def send_to_server(server_url, params)
end
end

puts "Links: #{links.to_a}"
puts "Emails found in #{domain}"
p arr_mails.to_a
#puts "Links: #{links.to_a}"
#puts "Emails found in #{domain}"
#p arr_mails.to_a

send_to_server(server_url + '/email', { 'emails' => arr_mails.to_a, 'domain' => domain })
send_to_server(server_url + '/link', { 'url' => links.to_a })
Expand Down

0 comments on commit e789e6b

Please sign in to comment.