Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 113 lines (89 sloc) 3.59 KB
#! /usr/bin/env python
# Convert HTML pages to PDF.
# Copyright 2012 by Akkana Peck -- share and enjoy under the GPL v2 or later.
# Thx to
# To convert all the output files into a single multi-page PDF, use this:
# gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=multipage.pdf -dBATCH file*.pdf
import sys, os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from pyPdf import PdfFileWriter, PdfFileReader
# Parameters that should be specifiable on the commandline.
# Currently only the resolution is.
x_res = 1024
outdir = "pdf"
if len(sys.argv) < 2 :
print "Usage: %s a.html b.html ..." % sys.argv[0]
# First check for flags. -1366 means include the whole 1366-pixel width
# instead of only the 1024 pixels the projector sees.
# You can specify other X resolutions there too.
if sys.argv[1][0] == '-' and sys.argv[1][1].isdigit() :
x_res = int(sys.argv[1][1:])
print "Using X resolution of", x_res
sys.argv = sys.argv[1:]
pagelist = sys.argv[1:]
pagelistindex = 0
pdflist = []
app = QApplication(sys.argv)
web = QWebView()
# QPrinter always prints too small, and I can't figure out why.
# Empirically, I can get dot-for-dot with this zoom factor:
# Printing to 1366 (with speaker notes), use zoom factor 1.24.
# Printing to 1024 to remove the speaker notes, use zoom factor 2.0.
# You can show the window, but it's not necessary:
printer = QPrinter(mode = QPrinter.ScreenResolution)
printer.setPageMargins(0, 0, 0, 0, QPrinter.DevicePixel)
# This makes no difference either way:
# printer.setFullPage(False)
# This does random, bizarre things if you set it higher:
# printer.setResolution(100);
# This, sadly, no longer works right.
# It's not a no-op, but it looks like QPrinter takes the nearest
# known paper size, or something idiotic like that.
printer.setPaperSize(QSizeF(x_res, 768), QPrinter.DevicePixel)
# printer.setPageSize(QPageSize(QSizeF(x_res, 768), QPageSize.Millimeter, "XGA"))
def makeMultipage(outfile, inlist) :
# Make a multi-page document from all the pages we've created:
output = PdfFileWriter()
for infile in inlist :
inpdf = PdfFileReader(file(infile, "rb"))
page = inpdf.getPage(0)
# Attempt to crop: this does nothing.
# page.cropBox.lowerLeft=(0, 768)
# page.cropBox.upperRight=(1024, 0)
# finally, write "output" to document-output.pdf
outputStream = file(outfile, "wb")
print 'Wrote all pages to', outfile
def print_next() :
'''Print the page that has just loaded, then start the load
of the next page in the list. Called from loadFinished signal.
global pagelist, pagelistindex, web, printer
outputfile = os.path.join(outdir, "file%03d.pdf" % pagelistindex)
print "Generated", outputfile, "from", pagelist[pagelistindex]
pagelistindex += 1
if pagelistindex >= len(pagelist) :
makeMultipage(os.path.join(outdir, 'all.pdf'), pdflist)
print "Exiting"
# That doesn't always work, so hedge our bets:
# Load the next URL in the list
print "Loading", pagelist[pagelistindex]
QObject.connect(web, SIGNAL("loadFinished(bool)"), print_next)