Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 113 lines (89 sloc) 3.59 KB
#! /usr/bin/env python
# Convert HTML pages to PDF.
# Copyright 2012 by Akkana Peck -- share and enjoy under the GPL v2 or later.
#
# Thx to http://notes.alexdong.com/xhtml-to-pdf-using-pyqt4-webkit-and-headless
#
# To convert all the output files into a single multi-page PDF, use this:
# gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=multipage.pdf -dBATCH file*.pdf
import sys, os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import *
from pyPdf import PdfFileWriter, PdfFileReader
# Parameters that should be specifiable on the commandline.
# Currently only the resolution is.
x_res = 1024
outdir = "pdf"
if len(sys.argv) < 2 :
print "Usage: %s a.html b.html ..." % sys.argv[0]
sys.exit(1)
# First check for flags. -1366 means include the whole 1366-pixel width
# instead of only the 1024 pixels the projector sees.
# You can specify other X resolutions there too.
if sys.argv[1][0] == '-' and sys.argv[1][1].isdigit() :
x_res = int(sys.argv[1][1:])
print "Using X resolution of", x_res
sys.argv = sys.argv[1:]
pagelist = sys.argv[1:]
pagelistindex = 0
pdflist = []
app = QApplication(sys.argv)
web = QWebView()
# QPrinter always prints too small, and I can't figure out why.
# Empirically, I can get dot-for-dot with this zoom factor:
# Printing to 1366 (with speaker notes), use zoom factor 1.24.
# Printing to 1024 to remove the speaker notes, use zoom factor 2.0.
web.setZoomFactor(2.0)
# You can show the window, but it's not necessary:
#web.show()
printer = QPrinter(mode = QPrinter.ScreenResolution)
printer.setOutputFormat(QPrinter.PdfFormat)
printer.setPageMargins(0, 0, 0, 0, QPrinter.DevicePixel)
# This makes no difference either way:
# printer.setFullPage(False)
# This does random, bizarre things if you set it higher:
# printer.setResolution(100);
# This, sadly, no longer works right.
# It's not a no-op, but it looks like QPrinter takes the nearest
# known paper size, or something idiotic like that.
printer.setPaperSize(QSizeF(x_res, 768), QPrinter.DevicePixel)
# printer.setPageSize(QPageSize(QSizeF(x_res, 768), QPageSize.Millimeter, "XGA"))
def makeMultipage(outfile, inlist) :
# Make a multi-page document from all the pages we've created:
output = PdfFileWriter()
for infile in inlist :
inpdf = PdfFileReader(file(infile, "rb"))
page = inpdf.getPage(0)
# Attempt to crop: this does nothing.
# page.cropBox.lowerLeft=(0, 768)
# page.cropBox.upperRight=(1024, 0)
output.addPage(page)
# finally, write "output" to document-output.pdf
outputStream = file(outfile, "wb")
output.write(outputStream)
outputStream.close()
print 'Wrote all pages to', outfile
def print_next() :
'''Print the page that has just loaded, then start the load
of the next page in the list. Called from loadFinished signal.
'''
global pagelist, pagelistindex, web, printer
outputfile = os.path.join(outdir, "file%03d.pdf" % pagelistindex)
printer.setOutputFileName(outputfile)
pdflist.append(outputfile)
web.print_(printer)
print "Generated", outputfile, "from", pagelist[pagelistindex]
pagelistindex += 1
if pagelistindex >= len(pagelist) :
makeMultipage(os.path.join(outdir, 'all.pdf'), pdflist)
print "Exiting"
QApplication.exit()
# That doesn't always work, so hedge our bets:
sys.exit(0)
# Load the next URL in the list
print "Loading", pagelist[pagelistindex]
web.load(QUrl(pagelist[pagelistindex]))
QObject.connect(web, SIGNAL("loadFinished(bool)"), print_next)
web.load(QUrl(pagelist[pagelistindex]))
sys.exit(app.exec_())