diff --git a/lib/parse_source.rb b/lib/parse_source.rb
index 3ab3707..6898e5d 100755
--- a/lib/parse_source.rb
+++ b/lib/parse_source.rb
@@ -2,7 +2,7 @@
require 'hpricot'
require 'yaml'
require 'logger'
-log = Logger.new(LOG_FILE, 5, 10*1024)
+log = Logger.new(File.join(File.dirname(__FILE__), '../tmp/parse_source.log'), 5, 10*1024)
exceptions = []
log.info "\n"
@@ -29,12 +29,12 @@
original_html = content.to_original_html.sub("
", '')[0..-7]
dictionary['review_id'] = review_id.strip.to_i
[
- ['catalog_id', 'h1'],
- ['authors', 'h4'],
- ['review_title', 'h2'],
- ['bibliography', 'p.biblio'],
- ['reviewer', 'p strong'],
- ].each do |key, selector|
+ ['catalog_id', 'h1', nil],
+ ['authors', 'h4', nil],
+ ['review_title', 'h2', nil],
+ ['bibliography', 'p.biblio', nil],
+ ['reviewer', 'p strong', /\A *Reviewed by(.*)\Z/],
+ ].each do |key, selector, regex|
node = (content/"#{selector}:first-of-type")
dictionary[key] = node[0].inner_html.strip
if key == 'reviewer'
@@ -42,6 +42,9 @@
else
original_html.sub!(node[0].to_original_html,'')
end
+ if regex
+ dictionary[key] = dictionary[key].sub(regex, '\1').strip
+ end
end
(content/"div#hr:first-of-type").each {|n| original_html.sub!(n.to_original_html, '')}
@@ -57,7 +60,7 @@
# dictionary['bibliography'] = (content/"p.biblio").first.inner_html.strip
# dictionary['reviewer'] = (content/"p strong").first.inner_html.strip
# review_content = (content/"div#hr").first.following_siblings.collect{|sib| sib.to_original_html}.join("\n")
- dictionary['content'] = original_html
+ dictionary['content'] = original_html.strip
end
File.open(File.join(File.dirname(__FILE__), "../src/yml/review-#{review_id}.yml"), 'w+') do |file|
file.puts YAML.dump(dictionary)
diff --git a/lib/render.rb b/lib/render.rb
index 11f5782..81c6057 100755
--- a/lib/render.rb
+++ b/lib/render.rb
@@ -4,7 +4,6 @@
require 'yaml'
Dir.glob(File.join(File.dirname(__FILE__), "../src/yml/*.yml")).each do |filename|
- puts "Processing #{filename}"
object = YAML.load_file(filename)
buffer = ERB.new(File.read(File.join(File.dirname(__FILE__), '../src/template.erb.html'))).result(binding)
target_filename = File.join(File.dirname(__FILE__), "../src/output/review-#{object['review_id']}.html")
diff --git a/src/template.erb.html b/src/template.erb.html
index 4b92d5c..f24e739 100644
--- a/src/template.erb.html
+++ b/src/template.erb.html
@@ -54,7 +54,7 @@
<%= object['authors'] %>
<%= object['review_title'] %>
<%= object['bibliography'] %>
-
<%= object['reviewer'] %>
+
Reviewed by <%= object['reviewer'] %>
<%= object['content'] %>