Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Daniel Choi
committed
Jan 20, 2012
0 parents
commit 7a268bb
Showing
5 changed files
with
206 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
src/ | ||
run.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# docs_on_kindle | ||
|
||
This project aims to put web documention for popular software tools on the | ||
Kindle. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
p { text-indent: 0; } | ||
|
||
p, H1, H2, H3, H4, H5, H6, H7, H8, table, pre { margin-top: 1em;} | ||
|
||
/* doesn't work apparently: */ | ||
dt { | ||
display:block; | ||
margin-top: 1em; | ||
} | ||
|
||
.pagebreak { page-break-before: always; } | ||
#toc H3 { | ||
text-indent: 1em; | ||
} | ||
#toc .document { | ||
text-indent: 2em; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
=begin | ||
Require this file and include this module into each recipe. | ||
Your recipe class is responsible for getting all the source HTML necessary to | ||
build the ebook. | ||
=end | ||
|
||
require 'fileutils' | ||
require 'nokogiri' | ||
require 'fileutils' | ||
require 'yaml' | ||
require 'date' | ||
|
||
module DocsOnKindle | ||
|
||
STYLESHEET = File.absolute_path "css/kindle.css" | ||
|
||
def add_head_section(doc, title) | ||
head = Nokogiri::XML::Node.new "head", doc | ||
title_node = Nokogiri::XML::Node.new "title", doc | ||
title_node.content = title | ||
title_node.parent = head | ||
css = Nokogiri::XML::Node.new "link", doc | ||
css['rel'] = 'stylesheet' | ||
css['type'] = 'text/css' | ||
css['href'] = STYLESHEET | ||
css.parent = head | ||
doc.at("body").before head | ||
end | ||
|
||
def run cmd | ||
puts " #{cmd}" | ||
`#{cmd}` | ||
end | ||
|
||
def download_images! doc | ||
doc.search('img').each {|img| | ||
src = img[:src] | ||
/(?<img_file>[^\/]+)$/ =~ src | ||
FileUtils::mkdir_p 'images' | ||
FileUtils::mkdir_p 'grayscale_images' | ||
unless File.size?("images/#{img_file}") | ||
run "curl -Ls '#{src}' > images/#{img_file}" | ||
end | ||
grayscale_image_path = "grayscale_images/#{img_file.gsub(/(\.\w+)$/, "-grayscale.gif")}" | ||
unless File.size?(grayscale_image_path) | ||
run "convert images/#{img_file}[0] -type Grayscale -depth 8 -resize '400x300>' #{grayscale_image_path}" | ||
end | ||
img['src'] = [Dir.pwd, grayscale_image_path].join("/") | ||
} | ||
end | ||
|
||
def fixup_html! doc | ||
|
||
# Sort of a hack to improve dt elements spacing | ||
# Using a css rule margin-top doesn't work | ||
doc.search('dt').each {|dt| | ||
dt.children.first.before(Nokogiri::XML::Node.new("br", doc)) | ||
} | ||
|
||
# We want to remove nested 'p' tags in 'li' tags, because these introduce an undesirable | ||
# blank line after the bullet. The expected CSS fix doesn't work. | ||
doc.search('li').each {|li| | ||
xs = li.search("p").map {|p| | ||
# remove surrounding paragraph tags | ||
p.children.each {|c| | ||
li.add_child c | ||
} | ||
p.remove | ||
}.flatten | ||
|
||
} | ||
|
||
end | ||
|
||
def mobi! | ||
File.open("_document.yml", 'w'){|f| f.puts document.to_yaml} | ||
exec 'kindlerb' | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env ruby | ||
|
||
require 'docs_on_kindle' | ||
|
||
class HerokuDocs | ||
include ::DocsOnKindle | ||
|
||
OUTPUT_DIR = "src/heroku" | ||
`mkdir -p #{OUTPUT_DIR}` | ||
|
||
def get_source_files | ||
start_url = "http://devcenter.heroku.com/categories/add-on-documentation" | ||
@start_doc = Nokogiri::HTML `curl -s #{start_url}` | ||
File.open("#{OUTPUT_DIR}/sections.yml", 'w') {|f|f.puts extract_sections.to_yaml} | ||
end | ||
|
||
def document | ||
{ | ||
'doc_uuid' => "heroku-docs-#{Date.today.to_s}", | ||
'title' => "Heroku Documentation", | ||
'publisher' => "Heroku", | ||
'author' => "Heroku", | ||
'subject' => 'Reference', | ||
'date' => Date.today.to_s, | ||
'cover' => nil, | ||
'masthead' => nil, | ||
'mobi_outfile' => "heroku-guide.#{Date.today.to_s}.mobi" | ||
} | ||
end | ||
|
||
def build_kindlerb_tree | ||
sections = YAML::load_file "#{OUTPUT_DIR}/sections.yml" | ||
sections.select! {|s| !s[:articles].empty?} | ||
Dir.chdir OUTPUT_DIR do | ||
sections.each_with_index {|s, section_idx| | ||
title = s[:title] | ||
FileUtils::mkdir_p("sections/%03d" % section_idx) | ||
File.open("sections/%03d/_section.txt" % section_idx, 'w') {|f| f.puts title} | ||
puts "sections/%03d -> #{title}" % section_idx | ||
# save articles | ||
s[:articles].each_with_index {|a, item_idx| | ||
article_title = a[:title] | ||
/(?<path>articles\/[\w-]+)(#\w+|)$/ =~ a[:url] | ||
puts a[:url], path | ||
item = Nokogiri::HTML(File.read path) | ||
|
||
download_images! item | ||
fixup_html! item | ||
|
||
item_path = "sections/%03d/%03d.html" % [section_idx, item_idx] | ||
add_head_section item, article_title | ||
# fix all image links | ||
# item.search("img").each { |img| | ||
#img['src'] = "#{Dir.pwd}/#{img['src']}" | ||
#} | ||
File.open(item_path, 'w'){|f| f.puts item.to_html} | ||
puts " #{item_path} -> #{article_title}" | ||
} | ||
} | ||
mobi! | ||
end | ||
end | ||
|
||
def extract_sections | ||
@start_doc.search('select[@id=quicknav] option').map {|o| | ||
title = o.inner_text | ||
$stderr.puts "#{title}" | ||
s = { | ||
title: title, | ||
articles: articles(`curl -s http://devcenter.heroku.com#{o[:value]}`) | ||
} | ||
} | ||
end | ||
|
||
def articles html | ||
category_page = Nokogiri::HTML html | ||
xs = category_page.search("ul.articles a").map {|x| | ||
title = x.inner_text.strip | ||
href = x[:href] =~ /^http/ ? x[:href] : "http://devcenter.heroku.com#{x[:href]}" | ||
$stderr.puts "- #{title}" | ||
a = { | ||
title: title, | ||
url: href | ||
} | ||
} | ||
end | ||
|
||
def article href | ||
/(?<filename>[\w-]+)$/ =~ href | ||
a = Nokogiri::HTML `curl -s #{href}` | ||
FileUtils::mkdir_p "#{OUTPUT_DIR}/articles" | ||
path = "#{OUTPUT_DIR}/articles/#{filename}" | ||
File.open(path, 'w') {|f| f.puts(a.at('article').inner_html)} | ||
end | ||
end | ||
|
||
|
||
HerokuDocs.new.get_source_files | ||
HerokuDocs.new.build_kindlerb_tree |