-
Notifications
You must be signed in to change notification settings - Fork 3.1k
/
pdf.py
301 lines (232 loc) · 7.67 KB
/
pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
# License: MIT. See LICENSE
import contextlib
import io
import os
import re
import subprocess
import pdfkit
from bs4 import BeautifulSoup
from packaging.version import Version
from pypdf import PdfReader, PdfWriter
import frappe
from frappe import _
from frappe.utils import scrub_urls
from frappe.utils.jinja_globals import bundled_asset, is_rtl
PDF_CONTENT_ERRORS = [
"ContentNotFoundError",
"ContentOperationNotPermittedError",
"UnknownContentError",
"RemoteHostClosedError",
]
def pdf_header_html(soup, head, content, styles, html_id, css):
return frappe.render_template(
"templates/print_formats/pdf_header_footer.html",
{
"head": head,
"content": content,
"styles": styles,
"html_id": html_id,
"css": css,
"lang": frappe.local.lang,
"layout_direction": "rtl" if is_rtl() else "ltr",
},
)
def pdf_body_html(template, args, **kwargs):
try:
return template.render(args, filters={"len": len})
except Exception as e:
# Guess line number ?
frappe.throw(
_("Error in print format on line {0}: {1}").format(
_guess_template_error_line_number(template), e
),
exc=frappe.PrintFormatError,
title=_("Print Format Error"),
)
def _guess_template_error_line_number(template) -> int | None:
"""Guess line on which exception occurred from current traceback."""
with contextlib.suppress(Exception):
import sys
import traceback
_, _, tb = sys.exc_info()
for frame in reversed(traceback.extract_tb(tb)):
if template.filename in frame.filename:
return frame.lineno
def pdf_footer_html(soup, head, content, styles, html_id, css):
return pdf_header_html(soup=soup, head=head, content=content, styles=styles, html_id=html_id, css=css)
def get_pdf(html, options=None, output: PdfWriter | None = None):
html = scrub_urls(html)
html, options = prepare_options(html, options)
options.update({"disable-javascript": "", "disable-local-file-access": ""})
filedata = ""
if Version(get_wkhtmltopdf_version()) > Version("0.12.3"):
options.update({"disable-smart-shrinking": ""})
try:
# Set filename property to false, so no file is actually created
filedata = pdfkit.from_string(html, options=options or {}, verbose=True)
# create in-memory binary streams from filedata and create a PdfReader object
reader = PdfReader(io.BytesIO(filedata))
except OSError as e:
if any([error in str(e) for error in PDF_CONTENT_ERRORS]):
if not filedata:
print(html, options)
frappe.throw(_("PDF generation failed because of broken image links"))
# allow pdfs with missing images if file got created
if output:
output.append_pages_from_reader(reader)
else:
raise
finally:
cleanup(options)
if "password" in options:
password = options["password"]
if output:
output.append_pages_from_reader(reader)
return output
writer = PdfWriter()
writer.append_pages_from_reader(reader)
if "password" in options:
writer.encrypt(password)
filedata = get_file_data_from_writer(writer)
return filedata
def get_file_data_from_writer(writer_obj):
# https://docs.python.org/3/library/io.html
stream = io.BytesIO()
writer_obj.write(stream)
# Change the stream position to start of the stream
stream.seek(0)
# Read up to size bytes from the object and return them
return stream.read()
def prepare_options(html, options):
if not options:
options = {}
options.update(
{
"print-media-type": None,
"background": None,
"images": None,
"quiet": None,
# 'no-outline': None,
"encoding": "UTF-8",
# 'load-error-handling': 'ignore'
}
)
if not options.get("margin-right"):
options["margin-right"] = "15mm"
if not options.get("margin-left"):
options["margin-left"] = "15mm"
html, html_options = read_options_from_html(html)
options.update(html_options or {})
# cookies
options.update(get_cookie_options())
# page size
pdf_page_size = (
options.get("page-size") or frappe.db.get_single_value("Print Settings", "pdf_page_size") or "A4"
)
if pdf_page_size == "Custom":
options["page-height"] = options.get("page-height") or frappe.db.get_single_value(
"Print Settings", "pdf_page_height"
)
options["page-width"] = options.get("page-width") or frappe.db.get_single_value(
"Print Settings", "pdf_page_width"
)
else:
options["page-size"] = pdf_page_size
return html, options
def get_cookie_options():
options = {}
if frappe.session and frappe.session.sid and hasattr(frappe.local, "request"):
# Use wkhtmltopdf's cookie-jar feature to set cookies and restrict them to host domain
cookiejar = f"/tmp/{frappe.generate_hash()}.jar"
# Remove port from request.host
# https://werkzeug.palletsprojects.com/en/0.16.x/wrappers/#werkzeug.wrappers.BaseRequest.host
domain = frappe.utils.get_host_name().split(":", 1)[0]
with open(cookiejar, "w") as f:
f.write(f"sid={frappe.session.sid}; Domain={domain};\n")
options["cookie-jar"] = cookiejar
return options
def read_options_from_html(html):
options = {}
soup = BeautifulSoup(html, "html5lib")
options.update(prepare_header_footer(soup))
toggle_visible_pdf(soup)
# use regex instead of soup-parser
for attr in (
"margin-top",
"margin-bottom",
"margin-left",
"margin-right",
"page-size",
"header-spacing",
"orientation",
"page-width",
"page-height",
):
try:
pattern = re.compile(r"(\.print-format)([\S|\s][^}]*?)(" + str(attr) + r":)(.+)(mm;)")
match = pattern.findall(html)
if match:
options[attr] = str(match[-1][3]).strip()
except Exception:
pass
return str(soup), options
def prepare_header_footer(soup: BeautifulSoup):
options = {}
head = soup.find("head").contents
styles = soup.find_all("style")
print_css = bundled_asset("print.bundle.css").lstrip("/")
css = frappe.read_file(os.path.join(frappe.local.sites_path, print_css))
# extract header and footer
for html_id in ("header-html", "footer-html"):
if content := soup.find(id=html_id):
content = content.extract()
# `header/footer-html` are extracted, rendered as html
# and passed in wkhtmltopdf options (as '--header/footer-html')
# Remove instances of them from main content for render_template
for tag in soup.find_all(id=html_id):
tag.extract()
toggle_visible_pdf(content)
id_map = {"header-html": "pdf_header_html", "footer-html": "pdf_footer_html"}
hook_func = frappe.get_hooks(id_map.get(html_id))
html = frappe.get_attr(hook_func[-1])(
soup=soup,
head=head,
content=content,
styles=styles,
html_id=html_id,
css=css,
)
# create temp file
fname = os.path.join("/tmp", f"frappe-pdf-{frappe.generate_hash()}.html")
with open(fname, "wb") as f:
f.write(html.encode("utf-8"))
# {"header-html": "/tmp/frappe-pdf-random.html"}
options[html_id] = fname
else:
if html_id == "header-html":
options["margin-top"] = "15mm"
elif html_id == "footer-html":
options["margin-bottom"] = "15mm"
return options
def cleanup(options):
for key in ("header-html", "footer-html", "cookie-jar"):
if options.get(key) and os.path.exists(options[key]):
os.remove(options[key])
def toggle_visible_pdf(soup):
for tag in soup.find_all(attrs={"class": "visible-pdf"}):
# remove visible-pdf class to unhide
tag.attrs["class"].remove("visible-pdf")
for tag in soup.find_all(attrs={"class": "hidden-pdf"}):
# remove tag from html
tag.extract()
def get_wkhtmltopdf_version():
wkhtmltopdf_version = frappe.cache.hget("wkhtmltopdf_version", None)
if not wkhtmltopdf_version:
try:
res = subprocess.check_output(["wkhtmltopdf", "--version"])
wkhtmltopdf_version = res.decode("utf-8").split(" ")[1]
frappe.cache.hset("wkhtmltopdf_version", None, wkhtmltopdf_version)
except Exception:
pass
return wkhtmltopdf_version or "0"