Skip to content

Commit

Permalink
finish trader joes
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Choi committed May 1, 2012
1 parent 97416f9 commit ab7f495
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
11 changes: 11 additions & 0 deletions css/kindle.css
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -24,3 +24,14 @@ dd {
#toc .document { #toc .document {
text-indent: 2em; text-indent: 2em;
} }

img.float-left {
float:left;
margin-right: 10px;
}

img {
text-align: center;
margin-right: auto;
margin-left: auto;
}
2 changes: 1 addition & 1 deletion lib/kindlefodder.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def download_images! doc
grayscale_image_path = "grayscale_images/#{img_file.gsub('%20', '_').sub(/(\.\w+)$/, "-grayscale.gif")}" grayscale_image_path = "grayscale_images/#{img_file.gsub('%20', '_').sub(/(\.\w+)$/, "-grayscale.gif")}"
sleep 0.1 sleep 0.1
unless File.size?(grayscale_image_path) unless File.size?(grayscale_image_path)
run_shell_command "convert images/#{img_file} -compose over -background white -flatten -type Grayscale -resize '300x300>' -alpha off #{grayscale_image_path}" run_shell_command "convert images/#{img_file} -compose over -background white -flatten -type Grayscale -resize '300x200>' -alpha off #{grayscale_image_path}"
end end
img['src'] = [Dir.pwd, grayscale_image_path].join("/") img['src'] = [Dir.pwd, grayscale_image_path].join("/")
} }
Expand Down
28 changes: 16 additions & 12 deletions recipes/trader_joes.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def document




def extract_sections def extract_sections
@start_doc.search('ul#category-list > li').map {|x| @start_doc.search('ul#category-list > li').
select {|x| x.at("h3.category-title").inner_text == 'Beverages' }.
map {|x|
#puts x #puts x
title = x.at("h3.category-title").inner_text title = x.at("h3.category-title").inner_text
$stderr.puts title $stderr.puts title
Expand All @@ -55,12 +57,6 @@ def extract_sections
} }
} }
puts articles_list.inspect puts articles_list.inspect

# articles_list.unshift({
# title: title, # section title
# path: save_article_and_return_path(a[:href]) # section href
# })
#
{ {
title: title, title: title,
articles: articles_list articles: articles_list
Expand All @@ -70,23 +66,31 @@ def extract_sections


def save_article_and_return_path href, filename=nil def save_article_and_return_path href, filename=nil
path = filename || "articles/" + href.sub(/^\//, '').sub(/\/$/, '').gsub('/', '.') path = filename || "articles/" + href.sub(/^\//, '').sub(/\/$/, '').gsub('/', '.')
if File.size?("#{output_dir}/#{path}")
puts "#{path} already saved"
return path
end


full_url = @start_url + '/' + href.sub(/^\//, '') full_url = @start_url + '/' + href.sub(/^\//, '')


html = run_shell_command "curl -s #{full_url}" html = run_shell_command "curl -s #{full_url}"

article_doc = Nokogiri::HTML html article_doc = Nokogiri::HTML html
article_doc = article_doc.at(".post") article_doc = article_doc.at(".post")


# article_doc = Nokogiri::HTML File.read("#{output_dir}/#{path}")


# images have relative paths, so fix them # images have relative paths, so fix them
article_doc.search("h2.title").each {|h2|
h2.swap "<h3>#{h2.inner_text}</h3>"

}
article_doc.search("img[@src]").each {|img| article_doc.search("img[@src]").each {|img|
if img['src'] =~ %r{^/} if img['src'] =~ %r{^/}
img['src'] = "http://www.traderjoes.com" + img['src'] img['src'] = "http://www.traderjoes.com" + img['src']
img['class'] = 'float-left'
end end
if (p = img.parent.parent.parent) && p.name == 'p'
puts "unnesting image: #{img['src']}"
p.swap img
end

} }


description = article_doc.at("p").inner_text.strip.split(/\s+/)[0, 10].join(' ') description = article_doc.at("p").inner_text.strip.split(/\s+/)[0, 10].join(' ')
Expand Down

0 comments on commit ab7f495

Please sign in to comment.