diff --git a/lib/parse_source.rb b/lib/parse_source.rb index 3ab3707..6898e5d 100755 --- a/lib/parse_source.rb +++ b/lib/parse_source.rb @@ -2,7 +2,7 @@ require 'hpricot' require 'yaml' require 'logger' -log = Logger.new(LOG_FILE, 5, 10*1024) +log = Logger.new(File.join(File.dirname(__FILE__), '../tmp/parse_source.log'), 5, 10*1024) exceptions = [] log.info "\n" @@ -29,12 +29,12 @@ original_html = content.to_original_html.sub("
", '')[0..-7] dictionary['review_id'] = review_id.strip.to_i [ - ['catalog_id', 'h1'], - ['authors', 'h4'], - ['review_title', 'h2'], - ['bibliography', 'p.biblio'], - ['reviewer', 'p strong'], - ].each do |key, selector| + ['catalog_id', 'h1', nil], + ['authors', 'h4', nil], + ['review_title', 'h2', nil], + ['bibliography', 'p.biblio', nil], + ['reviewer', 'p strong', /\A *Reviewed by(.*)\Z/], + ].each do |key, selector, regex| node = (content/"#{selector}:first-of-type") dictionary[key] = node[0].inner_html.strip if key == 'reviewer' @@ -42,6 +42,9 @@ else original_html.sub!(node[0].to_original_html,'') end + if regex + dictionary[key] = dictionary[key].sub(regex, '\1').strip + end end (content/"div#hr:first-of-type").each {|n| original_html.sub!(n.to_original_html, '')} @@ -57,7 +60,7 @@ # dictionary['bibliography'] = (content/"p.biblio").first.inner_html.strip # dictionary['reviewer'] = (content/"p strong").first.inner_html.strip # review_content = (content/"div#hr").first.following_siblings.collect{|sib| sib.to_original_html}.join("\n") - dictionary['content'] = original_html + dictionary['content'] = original_html.strip end File.open(File.join(File.dirname(__FILE__), "../src/yml/review-#{review_id}.yml"), 'w+') do |file| file.puts YAML.dump(dictionary) diff --git a/lib/render.rb b/lib/render.rb index 11f5782..81c6057 100755 --- a/lib/render.rb +++ b/lib/render.rb @@ -4,7 +4,6 @@ require 'yaml' Dir.glob(File.join(File.dirname(__FILE__), "../src/yml/*.yml")).each do |filename| - puts "Processing #{filename}" object = YAML.load_file(filename) buffer = ERB.new(File.read(File.join(File.dirname(__FILE__), '../src/template.erb.html'))).result(binding) target_filename = File.join(File.dirname(__FILE__), "../src/output/review-#{object['review_id']}.html") diff --git a/src/template.erb.html b/src/template.erb.html index 4b92d5c..f24e739 100644 --- a/src/template.erb.html +++ b/src/template.erb.html @@ -54,7 +54,7 @@

<%= object['authors'] %>

<%= object['review_title'] %>

<%= object['bibliography'] %>

-

<%= object['reviewer'] %>

+

Reviewed by <%= object['reviewer'] %>


<%= object['content'] %>