diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad76274 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +src/ +run.sh diff --git a/README.md b/README.md new file mode 100644 index 0000000..5991f2b --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# docs_on_kindle + +This project aims to put web documention for popular software tools on the +Kindle. + + diff --git a/css/kindle.css b/css/kindle.css new file mode 100644 index 0000000..5ddef25 --- /dev/null +++ b/css/kindle.css @@ -0,0 +1,17 @@ +p { text-indent: 0; } + +p, H1, H2, H3, H4, H5, H6, H7, H8, table, pre { margin-top: 1em;} + +/* doesn't work apparently: */ +dt { + display:block; + margin-top: 1em; +} + +.pagebreak { page-break-before: always; } +#toc H3 { + text-indent: 1em; +} +#toc .document { + text-indent: 2em; +} diff --git a/lib/docs_on_kindle.rb b/lib/docs_on_kindle.rb new file mode 100644 index 0000000..e893d47 --- /dev/null +++ b/lib/docs_on_kindle.rb @@ -0,0 +1,82 @@ +=begin + +Require this file and include this module into each recipe. + +Your recipe class is responsible for getting all the source HTML necessary to +build the ebook. + +=end + +require 'fileutils' +require 'nokogiri' +require 'fileutils' +require 'yaml' +require 'date' + +module DocsOnKindle + + STYLESHEET = File.absolute_path "css/kindle.css" + + def add_head_section(doc, title) + head = Nokogiri::XML::Node.new "head", doc + title_node = Nokogiri::XML::Node.new "title", doc + title_node.content = title + title_node.parent = head + css = Nokogiri::XML::Node.new "link", doc + css['rel'] = 'stylesheet' + css['type'] = 'text/css' + css['href'] = STYLESHEET + css.parent = head + doc.at("body").before head + end + + def run cmd + puts " #{cmd}" + `#{cmd}` + end + + def download_images! doc + doc.search('img').each {|img| + src = img[:src] + /(?[^\/]+)$/ =~ src + FileUtils::mkdir_p 'images' + FileUtils::mkdir_p 'grayscale_images' + unless File.size?("images/#{img_file}") + run "curl -Ls '#{src}' > images/#{img_file}" + end + grayscale_image_path = "grayscale_images/#{img_file.gsub(/(\.\w+)$/, "-grayscale.gif")}" + unless File.size?(grayscale_image_path) + run "convert images/#{img_file}[0] -type Grayscale -depth 8 -resize '400x300>' #{grayscale_image_path}" + end + img['src'] = [Dir.pwd, grayscale_image_path].join("/") + } + end + + def fixup_html! doc + + # Sort of a hack to improve dt elements spacing + # Using a css rule margin-top doesn't work + doc.search('dt').each {|dt| + dt.children.first.before(Nokogiri::XML::Node.new("br", doc)) + } + + # We want to remove nested 'p' tags in 'li' tags, because these introduce an undesirable + # blank line after the bullet. The expected CSS fix doesn't work. + doc.search('li').each {|li| + xs = li.search("p").map {|p| + # remove surrounding paragraph tags + p.children.each {|c| + li.add_child c + } + p.remove + }.flatten + + } + + end + + def mobi! + File.open("_document.yml", 'w'){|f| f.puts document.to_yaml} + exec 'kindlerb' + end +end diff --git a/recipes/heroku.rb b/recipes/heroku.rb new file mode 100644 index 0000000..1e2d891 --- /dev/null +++ b/recipes/heroku.rb @@ -0,0 +1,99 @@ +#!/usr/bin/env ruby + +require 'docs_on_kindle' + +class HerokuDocs + include ::DocsOnKindle + + OUTPUT_DIR = "src/heroku" + `mkdir -p #{OUTPUT_DIR}` + + def get_source_files + start_url = "http://devcenter.heroku.com/categories/add-on-documentation" + @start_doc = Nokogiri::HTML `curl -s #{start_url}` + File.open("#{OUTPUT_DIR}/sections.yml", 'w') {|f|f.puts extract_sections.to_yaml} + end + + def document + { + 'doc_uuid' => "heroku-docs-#{Date.today.to_s}", + 'title' => "Heroku Documentation", + 'publisher' => "Heroku", + 'author' => "Heroku", + 'subject' => 'Reference', + 'date' => Date.today.to_s, + 'cover' => nil, + 'masthead' => nil, + 'mobi_outfile' => "heroku-guide.#{Date.today.to_s}.mobi" + } + end + + def build_kindlerb_tree + sections = YAML::load_file "#{OUTPUT_DIR}/sections.yml" + sections.select! {|s| !s[:articles].empty?} + Dir.chdir OUTPUT_DIR do + sections.each_with_index {|s, section_idx| + title = s[:title] + FileUtils::mkdir_p("sections/%03d" % section_idx) + File.open("sections/%03d/_section.txt" % section_idx, 'w') {|f| f.puts title} + puts "sections/%03d -> #{title}" % section_idx + # save articles + s[:articles].each_with_index {|a, item_idx| + article_title = a[:title] + /(?articles\/[\w-]+)(#\w+|)$/ =~ a[:url] + puts a[:url], path + item = Nokogiri::HTML(File.read path) + + download_images! item + fixup_html! item + + item_path = "sections/%03d/%03d.html" % [section_idx, item_idx] + add_head_section item, article_title + # fix all image links + # item.search("img").each { |img| + #img['src'] = "#{Dir.pwd}/#{img['src']}" + #} + File.open(item_path, 'w'){|f| f.puts item.to_html} + puts " #{item_path} -> #{article_title}" + } + } + mobi! + end + end + + def extract_sections + @start_doc.search('select[@id=quicknav] option').map {|o| + title = o.inner_text + $stderr.puts "#{title}" + s = { + title: title, + articles: articles(`curl -s http://devcenter.heroku.com#{o[:value]}`) + } + } + end + + def articles html + category_page = Nokogiri::HTML html + xs = category_page.search("ul.articles a").map {|x| + title = x.inner_text.strip + href = x[:href] =~ /^http/ ? x[:href] : "http://devcenter.heroku.com#{x[:href]}" + $stderr.puts "- #{title}" + a = { + title: title, + url: href + } + } + end + + def article href + /(?[\w-]+)$/ =~ href + a = Nokogiri::HTML `curl -s #{href}` + FileUtils::mkdir_p "#{OUTPUT_DIR}/articles" + path = "#{OUTPUT_DIR}/articles/#{filename}" + File.open(path, 'w') {|f| f.puts(a.at('article').inner_html)} + end +end + + +HerokuDocs.new.get_source_files +HerokuDocs.new.build_kindlerb_tree