Skip to content

Commit

Permalink
initial
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Choi committed Jan 20, 2012
0 parents commit 7a268bb
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
@@ -0,0 +1,2 @@
src/
run.sh
6 changes: 6 additions & 0 deletions README.md
@@ -0,0 +1,6 @@
# docs_on_kindle

This project aims to put web documention for popular software tools on the
Kindle.


17 changes: 17 additions & 0 deletions css/kindle.css
@@ -0,0 +1,17 @@
p { text-indent: 0; }

p, H1, H2, H3, H4, H5, H6, H7, H8, table, pre { margin-top: 1em;}

/* doesn't work apparently: */
dt {
display:block;
margin-top: 1em;
}

.pagebreak { page-break-before: always; }
#toc H3 {
text-indent: 1em;
}
#toc .document {
text-indent: 2em;
}
82 changes: 82 additions & 0 deletions lib/docs_on_kindle.rb
@@ -0,0 +1,82 @@
=begin
Require this file and include this module into each recipe.
Your recipe class is responsible for getting all the source HTML necessary to
build the ebook.
=end

require 'fileutils'
require 'nokogiri'
require 'fileutils'
require 'yaml'
require 'date'

module DocsOnKindle

STYLESHEET = File.absolute_path "css/kindle.css"

def add_head_section(doc, title)
head = Nokogiri::XML::Node.new "head", doc
title_node = Nokogiri::XML::Node.new "title", doc
title_node.content = title
title_node.parent = head
css = Nokogiri::XML::Node.new "link", doc
css['rel'] = 'stylesheet'
css['type'] = 'text/css'
css['href'] = STYLESHEET
css.parent = head
doc.at("body").before head
end

def run cmd
puts " #{cmd}"
`#{cmd}`
end

def download_images! doc
doc.search('img').each {|img|
src = img[:src]
/(?<img_file>[^\/]+)$/ =~ src
FileUtils::mkdir_p 'images'
FileUtils::mkdir_p 'grayscale_images'
unless File.size?("images/#{img_file}")
run "curl -Ls '#{src}' > images/#{img_file}"
end
grayscale_image_path = "grayscale_images/#{img_file.gsub(/(\.\w+)$/, "-grayscale.gif")}"
unless File.size?(grayscale_image_path)
run "convert images/#{img_file}[0] -type Grayscale -depth 8 -resize '400x300>' #{grayscale_image_path}"
end
img['src'] = [Dir.pwd, grayscale_image_path].join("/")
}
end

def fixup_html! doc

# Sort of a hack to improve dt elements spacing
# Using a css rule margin-top doesn't work
doc.search('dt').each {|dt|
dt.children.first.before(Nokogiri::XML::Node.new("br", doc))
}

# We want to remove nested 'p' tags in 'li' tags, because these introduce an undesirable
# blank line after the bullet. The expected CSS fix doesn't work.
doc.search('li').each {|li|
xs = li.search("p").map {|p|
# remove surrounding paragraph tags
p.children.each {|c|
li.add_child c
}
p.remove
}.flatten

}

end

def mobi!
File.open("_document.yml", 'w'){|f| f.puts document.to_yaml}
exec 'kindlerb'
end
end
99 changes: 99 additions & 0 deletions recipes/heroku.rb
@@ -0,0 +1,99 @@
#!/usr/bin/env ruby

require 'docs_on_kindle'

class HerokuDocs
include ::DocsOnKindle

OUTPUT_DIR = "src/heroku"
`mkdir -p #{OUTPUT_DIR}`

def get_source_files
start_url = "http://devcenter.heroku.com/categories/add-on-documentation"
@start_doc = Nokogiri::HTML `curl -s #{start_url}`
File.open("#{OUTPUT_DIR}/sections.yml", 'w') {|f|f.puts extract_sections.to_yaml}
end

def document
{
'doc_uuid' => "heroku-docs-#{Date.today.to_s}",
'title' => "Heroku Documentation",
'publisher' => "Heroku",
'author' => "Heroku",
'subject' => 'Reference',
'date' => Date.today.to_s,
'cover' => nil,
'masthead' => nil,
'mobi_outfile' => "heroku-guide.#{Date.today.to_s}.mobi"
}
end

def build_kindlerb_tree
sections = YAML::load_file "#{OUTPUT_DIR}/sections.yml"
sections.select! {|s| !s[:articles].empty?}
Dir.chdir OUTPUT_DIR do
sections.each_with_index {|s, section_idx|
title = s[:title]
FileUtils::mkdir_p("sections/%03d" % section_idx)
File.open("sections/%03d/_section.txt" % section_idx, 'w') {|f| f.puts title}
puts "sections/%03d -> #{title}" % section_idx
# save articles
s[:articles].each_with_index {|a, item_idx|
article_title = a[:title]
/(?<path>articles\/[\w-]+)(#\w+|)$/ =~ a[:url]
puts a[:url], path
item = Nokogiri::HTML(File.read path)

download_images! item
fixup_html! item

item_path = "sections/%03d/%03d.html" % [section_idx, item_idx]
add_head_section item, article_title
# fix all image links
# item.search("img").each { |img|
#img['src'] = "#{Dir.pwd}/#{img['src']}"
#}
File.open(item_path, 'w'){|f| f.puts item.to_html}
puts " #{item_path} -> #{article_title}"
}
}
mobi!
end
end

def extract_sections
@start_doc.search('select[@id=quicknav] option').map {|o|
title = o.inner_text
$stderr.puts "#{title}"
s = {
title: title,
articles: articles(`curl -s http://devcenter.heroku.com#{o[:value]}`)
}
}
end

def articles html
category_page = Nokogiri::HTML html
xs = category_page.search("ul.articles a").map {|x|
title = x.inner_text.strip
href = x[:href] =~ /^http/ ? x[:href] : "http://devcenter.heroku.com#{x[:href]}"
$stderr.puts "- #{title}"
a = {
title: title,
url: href
}
}
end

def article href
/(?<filename>[\w-]+)$/ =~ href
a = Nokogiri::HTML `curl -s #{href}`
FileUtils::mkdir_p "#{OUTPUT_DIR}/articles"
path = "#{OUTPUT_DIR}/articles/#{filename}"
File.open(path, 'w') {|f| f.puts(a.at('article').inner_html)}
end
end


HerokuDocs.new.get_source_files
HerokuDocs.new.build_kindlerb_tree

0 comments on commit 7a268bb

Please sign in to comment.