Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

finish trader joes

  • Loading branch information...
commit ab7f4955925c0aea8920b3cbec6acf3392aa1314 1 parent 97416f9
@danchoi authored
Showing with 28 additions and 13 deletions.
  1. +11 −0 css/kindle.css
  2. +1 −1  lib/kindlefodder.rb
  3. +16 −12 recipes/trader_joes.rb
View
11 css/kindle.css
@@ -24,3 +24,14 @@ dd {
#toc .document {
text-indent: 2em;
}
+
+img.float-left {
+ float:left;
+ margin-right: 10px;
+}
+
+img {
+ text-align: center;
+ margin-right: auto;
+ margin-left: auto;
+}
View
2  lib/kindlefodder.rb
@@ -136,7 +136,7 @@ def download_images! doc
grayscale_image_path = "grayscale_images/#{img_file.gsub('%20', '_').sub(/(\.\w+)$/, "-grayscale.gif")}"
sleep 0.1
unless File.size?(grayscale_image_path)
- run_shell_command "convert images/#{img_file} -compose over -background white -flatten -type Grayscale -resize '300x300>' -alpha off #{grayscale_image_path}"
+ run_shell_command "convert images/#{img_file} -compose over -background white -flatten -type Grayscale -resize '300x200>' -alpha off #{grayscale_image_path}"
end
img['src'] = [Dir.pwd, grayscale_image_path].join("/")
}
View
28 recipes/trader_joes.rb
@@ -39,7 +39,9 @@ def document
def extract_sections
- @start_doc.search('ul#category-list > li').map {|x|
+ @start_doc.search('ul#category-list > li').
+ select {|x| x.at("h3.category-title").inner_text == 'Beverages' }.
+ map {|x|
#puts x
title = x.at("h3.category-title").inner_text
$stderr.puts title
@@ -55,12 +57,6 @@ def extract_sections
}
}
puts articles_list.inspect
-
-# articles_list.unshift({
-# title: title, # section title
-# path: save_article_and_return_path(a[:href]) # section href
-# })
-#
{
title: title,
articles: articles_list
@@ -70,23 +66,31 @@ def extract_sections
def save_article_and_return_path href, filename=nil
path = filename || "articles/" + href.sub(/^\//, '').sub(/\/$/, '').gsub('/', '.')
- if File.size?("#{output_dir}/#{path}")
- puts "#{path} already saved"
- return path
- end
full_url = @start_url + '/' + href.sub(/^\//, '')
html = run_shell_command "curl -s #{full_url}"
-
article_doc = Nokogiri::HTML html
article_doc = article_doc.at(".post")
+ # article_doc = Nokogiri::HTML File.read("#{output_dir}/#{path}")
+
+
# images have relative paths, so fix them
+ article_doc.search("h2.title").each {|h2|
+ h2.swap "<h3>#{h2.inner_text}</h3>"
+
+ }
article_doc.search("img[@src]").each {|img|
if img['src'] =~ %r{^/}
img['src'] = "http://www.traderjoes.com" + img['src']
+ img['class'] = 'float-left'
end
+ if (p = img.parent.parent.parent) && p.name == 'p'
+ puts "unnesting image: #{img['src']}"
+ p.swap img
+ end
+
}
description = article_doc.at("p").inner_text.strip.split(/\s+/)[0, 10].join(' ')
Please sign in to comment.
Something went wrong with that request. Please try again.