# Render SVG to PDF

Some experiments about converting SVG directly inside Jupyter cells (embedded in HTML) to PDF via https://github.com/deeplook/svglib and rendered by https://mozilla.github.io/pdf.js/. It turns out this could be a PDF widget in ipywidgets that shows an IFrame, which is not ideal, but quite usable for now.

In [None]:
import os
import io
import shutil
import zipfile
import tempfile
import http.server
import socketserver
from os.path import join, basename, dirname, splitext, exists, abspath
from multiprocessing import Process

import requests
from IPython.display import IFrame
from ipywidgets import HBox, VBox, HTML, Textarea, Layout, Button, Output
from svglib.svglib import svg2rlg

## Install pdf.js

In [None]:
if not exists('pdfjs-2.0.943-dist'):
    url = 'https://github.com/mozilla/pdf.js/releases/download/v2.0.943/pdfjs-2.0.943-dist.zip'
    z = requests.get(url).content
    zf = zipfile.ZipFile(io.BytesIO(z))
    zf.extractall('pdfjs-2.0.943-dist')

## Smoke-test pdf.js in simple ipywidgets.IFrame

Run `python3 -m http.server <port>` in the respective directory, first... (you cannot run it inside Jupyter as it would block.)

In [None]:
# test
if 0:
    url = 'http://localhost:8000/web/viewer.html'  # ?file=myfile.pdf
    ifr = IFrame(src=url, width=1000, height=300)
    ifr

## Utils

In [None]:
# Actually, we should not use this server, but use Jupyter instead
# to serve the pdf.js stuff... Iff Jupyter doesn't block that... :(
# Maybe classic doesn't...

class WebServer:
    """
    A minimal webserver to serve static files running in its own process.
    """
    def __init__(self, directory=None, port=8000):
        self.directory = directory
        self.port = port

    def start(self):
        self.process = Process(target=self.run)
        self.process.start()

    def terminate(self):
        self.process.terminate()

    def run(self):
        # Changing the directory is essential to keep the code below short.
        os.chdir(self.directory)  
        
        Handler = http.server.SimpleHTTPRequestHandler
        with socketserver.TCPServer(("", self.port), Handler) as httpd:
            print(f"serving {self.directory} on port", self.port)
            httpd.serve_forever()

In [None]:
class PDFFrame(IFrame):
    """
    An IFrame that shows a PDF file rendered by a local installation of pdf.js.
    
    See https://mozilla.github.io/pdf.js/
    """
    def __init__(self, *args, pdfjs_path=None, pdf_path=None, port=None, **kwargs):
        """
        Create an IFrame instance showing a PDF file rendered by pdf.js.
        
        This will copy the PDF file to be rendered into the static web folder
        first, under a unique new filename to prevent caching issues with pdf.js.
    
        The result is an IFrame object with a src attribute set to:
        
            /my/file.pdf
            http://localhost:{port}/web/viewer.html?file=file.pdf
        
        Example:

        >>> ifr = PDFFrame(pdfjs_path='/Downloads/pdfjs-2.0.943-dist',
        ...                pdf_path='/my/file.pdf',
        ...                port=8000)
        >>> ifr.src
        http://localhost:8000/web/viewer.html?file=file.pdf
        """
        assert pdfjs_path and exists(pdfjs_path)
        assert pdf_path and exists(pdf_path)
        assert port
        
        server_url = f'http://localhost:{port}/web/viewer.html'  # FIXME: use simpler template

        # Copy the PDF file into pdf.js' 'web' folder for static files.
        prefix = splitext(basename(pdf_path))[0] + '.'
        dir = join(pdfjs_path, 'web')
        f = tempfile.NamedTemporaryFile(prefix=prefix, suffix='.pdf', dir=dir, delete=False)
        dst = shutil.copyfile(pdf_path, f.name)
        
        url = f'{server_url}?file={basename(dst)}'
        super().__init__(src=url, *args, **kwargs)

In [None]:
# test
if 0:
    ifr = PDFFrame(pdfjs_path='pdfjs-2.0.943-dist',
                   pdf_path='pdfjs-2.0.943-dist/web/compressed.tracemonkey-pldi-09.pdf',
                   port=8000,
                   width=500, height=500)
    print(ifr.src)
    display(ifr)

In [None]:
class PDF(HTML):
    """
    A PDF ipywidget that displays the HTML of an IFrame ipywidget rendering PDF inside.
    """
    def __init__(self, *args, **kwargs):
        if args:
            assert type(args[0]) == PDFFrame
        args = [args[0]._repr_html_()] + list(args[1:]) if args else []
        super().__init__(*args, **kwargs)

In [None]:
def convert_svg(svg: str, path: str):
    """
    Convert some SVG source to PDF and write it to a file with given path. 
    """
    d = svg2rlg(io.BytesIO(bytes(svg_code, encoding='utf8')))
    fnRoot = os.path.splitext(os.path.basename(path))[0]
    outDir = os.path.dirname(path)
    d.save(formats=['pdf'], fnRoot=fnRoot, outDir=outDir)
    pdf_code = open(path, 'rb').read()
    return pdf_code  # FIXME: maybe also return drawing's width/height

## Start webserver

In [None]:
directory = f'{os.getcwd()}/pdfjs-2.0.943-dist'
port = 8008

In [None]:
w = WebServer(directory=directory, port=port)
w.start()

## Show SVG-PDF UI

In [None]:
def update(*args):
    """
    Update the SVG-PDF UI.
    """
    global svg_ta, svg_ht, svg_code
    
    # take SVG code, update SVG visual
    svg_code = svg_ta.value
    svg_ht.value = svg_code

    # convert SVG code and save as bar1.pdf
    pdf_name = 'svg2pdf.pdf'
    pdf_path = f'{os.getcwd()}/pdfjs-2.0.943-dist/web/{pdf_name}'
    pdf_code = convert_svg(svg_ta.value, pdf_path)

    # update PDF code
    pdf_ta.value = str(pdf_code)

    # update PDF visual
    pdf_iframe = PDFFrame(pdfjs_path=directory,
                          pdf_path=pdf_path,
                          port=port, 
                          width=400, height=300)
    pdf_ht = HTML(pdf_iframe._repr_html_(), description='PDF', layout=layout)
    ui.children[-1].children = tuple(list(ui.children[-1].children[:-1]) + [pdf_ht])

In [None]:
btn = Button(description='Render')
btn.on_click(update)

layout = Layout(width='400px', height='300px')

# SVG
svg_code = '''\
<svg height="100" width="100">
  <circle cx="50" cy="50" r="40" stroke="black"
          stroke-width="3" fill="yellow" />
  Sorry, your browser does not support inline SVG.  
</svg>
'''
svg_ta = Textarea(svg_code, description='SVG', layout=layout)
svg_ht = HTML('Not converted yet', description='SVG', layout=layout)

pdf_ta = Textarea('Not converted yet', description='PDF', layout=layout, disabled=True)
pdf_ht = HTML('Not converted yet', description='PDF', layout=layout)

# display
row0 = HBox([btn])
row1 = HBox([svg_ta, pdf_ta])
row2 = HBox([svg_ht, pdf_ht])
ui = VBox([row0, row1, row2], layout=Layout(width='100%', height='600px'))
ui

In [None]:
# w.process.terminate()

## Test pdf.js with custom webserver in IFrame subclass

In [None]:
if 0:
    directory = 'pdfjs-2.0.943-dist/'
    w = WebServer(directory=directory, port=8003)
    w.start()

    pdf_path = '/my/file.pdf'
    pdffr = PDFFrame(pdfjs_path=directory, pdf_path=pdf_path, port=port,
                     width=400, height=600)
    h = HTML(pdffr._repr_html_())
    HBox([h, h])

    w.process.terminate()