-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawlable_pages.rb
55 lines (45 loc) · 1.18 KB
/
crawlable_pages.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class CrawlablePages
DIR = 'crawlables/'
def initialize(filename, alt_url)
@path = DIR + filename
@alt_url = alt_url
end
def get_crawlable
file_content = ''
File.open(@path, 'r') do |f|
f.each_line do |line|
# ignore lines that start with #
if line.start_with? '#'
next
end
file_content += line
end
end
hash = eval(file_content)
if !@alt_url.nil?
hash[:url] = @alt_url
end
Crawlable.new(hash[:url], hash[:links], hash[:ignore_links],
hash[:main_divs], hash[:score_divs])
end
class Crawlable
attr_accessor :url, :links, :ignore_links, :main_divs, :score_divs
def initialize(url = '', links = nil, ignore_links = nil, main_divs = [],
score_divs = {})
@url = url
@links = links
@ignore_links = ignore_links
@main_divs = main_divs
@score_divs = score_divs
end
end
class CrawledPage
attr_accessor :url, :title, :page_html, :page_scores
def initialize(url = '', title = '', page_html = '', page_scores = '')
@url = url
@title = title
@page_html = page_html
@page_scores = page_scores
end
end
end