Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
197 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,6 @@ class Link | |
field :depth, :type => Integer | ||
|
||
|
||
embeds_many :pages | ||
has_many :pages | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
require 'logger' | ||
|
||
LOG = Logger.new(STDOUT) | ||
|
||
### AUX classes ### | ||
class Stopwatch | ||
def initialize | ||
start | ||
end | ||
|
||
def start | ||
@t0 = Time.now | ||
end | ||
|
||
def end | ||
@t1 = Time.now | ||
@t1 - @t0 | ||
end | ||
|
||
def self.ts | ||
Time.now.to_i.to_s | ||
end | ||
|
||
def self.ts2 | ||
t=Time.new.to_i.to_s | ||
[t.slice(0..6), t.slice(7..-1)] | ||
end | ||
|
||
end | ||
|
||
class Util | ||
def self.hexsha512 str | ||
Digest::SHA512::hexdigest str | ||
end | ||
|
||
def self.sha512 str | ||
Digest::SHA512::digest str | ||
end | ||
|
||
def self.hexsha384 str | ||
Digest::SHA384::hexdigest str | ||
end | ||
|
||
def self.sha384 str | ||
Digest::SHA384::digest str | ||
end | ||
|
||
def self.log str, error_level=1 | ||
@@counter ||=0 | ||
str="" if str.nil? | ||
str = Time.now.to_s + "|" + (@@counter+=1).to_s+ "|" + str | ||
if error_level > 1 | ||
LOG.warn str | ||
else | ||
LOG.info str | ||
end | ||
end | ||
|
||
def self.exit | ||
self.log "exiting..." | ||
exit 0 | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
require_relative 'db.rb' | ||
require_relative 'models.rb' | ||
require_relative 'conf.rb' | ||
|
||
|
||
1000.times do |i| | ||
l=Link.new | ||
l.url = (0...8).map{65.+(rand(25)).chr}.join | ||
l.state = 0 | ||
l.save | ||
puts "#{i} saved!" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
require_relative 'db.rb' | ||
require_relative 'models.rb' | ||
require_relative 'conf.rb' | ||
require_relative 'aux.rb' | ||
require 'bunny' | ||
require 'yaml' | ||
require 'logger' | ||
|
||
|
||
class Linkworker | ||
|
||
def initialize host | ||
@cnt = 0 | ||
@bunny = Bunny.new(:logging => false, :host=>host ) | ||
@bunny.start | ||
@exch = @bunny.exchange("links") | ||
@queue = @bunny.queue("links") | ||
@queue.bind(@exch, :key=>"links") | ||
|
||
# @bunny2 = Bunny.new(:host=>host) | ||
# @exch2 = @bunny.exchange("pages") | ||
# @queue2 = @bunny.queue("pages") | ||
# @queue2 = @queue2.bind(@exch2, :key=>"pages") | ||
end | ||
|
||
def run | ||
loop do | ||
process_one_link | ||
end | ||
end | ||
|
||
def report page | ||
|
||
|
||
end | ||
|
||
def process_one_link | ||
link = get_a_job | ||
#if link is accquired successful | ||
unless link.nil? | ||
page = dl link | ||
#if page is dl'ed successful | ||
report page | ||
else | ||
Util.log "no more job, sleep for a while" | ||
sleep 5 | ||
end | ||
end | ||
|
||
def dl link, remain_times = 3 | ||
page = Page.new | ||
if remain_times <= 0 | ||
Util.log "Error in DL #{link.url} really failed after #{3} times" | ||
return | ||
end | ||
page.link = link | ||
Util.log "DL #{link.url}" | ||
sw=Stopwatch.new | ||
begin | ||
Timeout::timeout(Conf.time(:network)) do | ||
open(url, hash) do |f| | ||
@doc = f.read | ||
page.charset = f.charset | ||
page.mime = f.content_type | ||
page.code = f.status[0].to_i | ||
f.base_uri | ||
f.meta | ||
begin | ||
page.expires_at = Time.parse(f.meta["expires"]) | ||
rescue => e | ||
#no expires found | ||
end | ||
|
||
begin | ||
page.etag = f.meta["etag"] | ||
rescue => e | ||
#no etag found | ||
end | ||
|
||
unless f.last_modified.nil? | ||
page.lm_at = f.last_modified | ||
end | ||
end | ||
page.response_time = (sw.end * 1000).floor | ||
end | ||
rescue => e | ||
dl(link, remain_times - 1) | ||
Util.log "Error in dl|#{link.url}|RETRYING#{remain_times - 1}|#{e}" | ||
end | ||
page | ||
end | ||
|
||
def get_a_job | ||
item = @queue.pop | ||
puts item.class | ||
puts item[:payload].class | ||
if item[:payload].is_a? String | ||
link = YAML::load item[:payload] | ||
else | ||
link = nil | ||
end | ||
link | ||
end | ||
|
||
|
||
|
||
end | ||
|
||
lw=Linkworker.new "localhost" | ||
lw.run | ||
|