In [38]:
from xhtml2pdf import pisa
from pdf2image import convert_from_bytes
import io
import base64
from PIL import Image  # For resizing

In [39]:
def convert_html_to_pdf_bytes(html_string):
    """Convert HTML to PDF and return bytes."""
    pdf_bytes = io.BytesIO()
    pisa_status = pisa.CreatePDF(html_string, dest=pdf_bytes)
    
    if pisa_status.err:
        return None  # PDF conversion failed

    return pdf_bytes.getvalue()

In [69]:
def get_compressed_thumbnail_base64(pdf_bytes, thumbnail_size=(200, 500), quality=60):
    """Generate a base64-encoded compressed thumbnail preview of the first page."""
    images = convert_from_bytes(pdf_bytes, first_page=1, last_page=1)
    
    if images:
        img = images[0]  # First page image
        img.thumbnail(thumbnail_size)  # Resize image to thumbnail

        img_bytes = io.BytesIO()
        img.save(img_bytes, format="JPEG", quality=quality, optimize=True) # ,   # Heavily compressed JPEG
        img_base64 = base64.b64encode(img_bytes.getvalue()).decode("utf-8")
        return img_base64

    return None

In [70]:
# HTML content
html = '''
<html>
  <head>
      <title>PDF Example</title>
  </head>

  <body>
      <img style="margin-top: 30px;margin-bottom: 30px;" vnode="true" type="image" id="oztu348e" src="https://editor.umodoc.com/images/logo.svg" width="381" height="71" left="0" top="0" draggable="false" rotatable="false" equalproportion="true" flipx="false" flipy="false" uploaded="false" error="true" previewtype="image"><h1 style="text-align: start" id="td3bxo" data-toc-id="td3bxo"><b>Introduction dee</b></h1><p style="text-align: start; line-height: 1.75rem">Umo Editor is open-source document editor based on Vue3 and Tiptap. Umo Editor provides comprehensive document editing capabilities and AI creation features, supports pagination, supports Markdown syntax, offers basic rich text editing functions, allows for the insertion of various node types in multiple formats, provides a variety of practical tools, and supports setting page styles. It also supports exporting in various formats, printing and print preview, block-level document editing, adding custom extensions, multi-language settings, and a dark theme.</p><img vnode="true" type="image" src="https://editor.umodoc.com/images/umo-editor@2x.png" width="553" height="421" left="0" top="0" draggable="false" rotatable="false" equalproportion="true" flipx="false" flipy="false" uploaded="false" error="true" previewtype="image"><p style="text-align: start; line-height: 1.75rem">As a standalone Vue3 plugin, Umo Editor can be easily integrated into any Vue3 project with zero configuration. For non-Vue3 projects, you can embed Umo Editor using an Iframe.</p><p style="text-align: start; line-height: 1.75rem"><a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://editor.umodoc.com/en/docs">Documentation</a> | <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://editor.umodoc.com/cn/docs">中文文档</a> | <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://demo.umodoc.com/editor?lang=en-US">Live Demo</a> | <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://github.com/umodoc/editor">GitHub</a> | <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://www.npmjs.com/package/@umoteam/editor">NPM</a></p><h2 style="text-align: start; line-height: 2" id="grljqh" data-toc-id="grljqh"><b>Online Experience</b></h2><p style="text-align: start; line-height: 1.75rem">Visit <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://demo.umodoc.com/editor?pane=no&amp;lang=en">Playground</a> for a fast experience.</p><h2 style="text-align: start; line-height: 2" id="ozr47n" data-toc-id="ozr47n"><b>Documentation</b></h2><p style="text-align: start; line-height: 1.75rem">Please visit <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://editor.umodoc.com/en/docs">Documentation</a>.</p><h2 style="text-align: start; line-height: 2" id="m4sld9" data-toc-id="m4sld9"><b>Design Philosophy</b></h2><p style="text-align: start; line-height: 1.75rem">The birth of Umo Editor aims to address the complexity of document editing in web applications, providing open-source and free powerful editing capabilities and pagination modes similar to Microsoft Word for web projects, while maintaining the convenience of web applications. Whether it's government and enterprise information management systems, academic research writing, team document collaboration, knowledge base management, or personal note organization, Umo Editor can be your capable assistant.</p><h2 style="text-align: start; line-height: 2" id="m6qra8" data-toc-id="m6qra8"><b>Open Source Advantages</b></h2><ul style="list-style-type: disc"><li><p style="line-height: 1.75rem"><b>Free to Use</b>: As an open-source project, Umo Editor is freely available to all developers under the <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://github.com/umo-editor/umo-editor/blob/main/LICENSE">MIT License</a>, with no copyright concerns.</p></li><li><p style="line-height: 1.75rem"><b>Continuous Updates</b>: Umo Editor will continue to iterate, constantly optimizing features and enhancing user experience.</p></li><li><p style="line-height: 1.75rem"><b>Customizable Development</b>: Open source means greater flexibility. Developers can customize the development according to project needs to create a unique document editor.</p></li></ul><h2 style="text-align: start; line-height: 2" id="x68on1" data-toc-id="x68on1"><b>Core Features</b></h2><ul style="list-style-type: disc"><li><p>Zero-config out-of-the-box usability</p></li><li><p>Pagination mode similar to that in Microsoft Word</p></li><li><p>Lightweight</p></li><li><p>WYSIWYG (What You See Is What You Get) throughout the process</p></li><li><p>Rich-text editing capabilities</p></li><li><p>Markdown syntax support</p></li><li><p>Integrated practical tools</p></li><li><p>Presentation mode</p></li><li><p>Document export and sharing</p></li><li><p>Page settings</p></li><li><p>AI Assistant</p></li><li><p>Support for printing and print preview</p></li><li><p>Support for custom plugins</p></li><li><p>Shortcut key support</p></li><li><p>Theme customization</p></li><li><p>Multi-language settings</p></li><li><p>Dark mode support</p></li></ul><p style="text-align: start; line-height: 1.75rem">For more detailed introductions, see <a target="_blank" rel="noopener noreferrer nofollow" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="http://localhost:9002/en/docs/features">Core Features</a>.</p><h2 style="text-align: start; line-height: 2" id="ru4iz5" data-toc-id="ru4iz5"><b>Browser Support</b></h2><table style="min-width: 75px"><colgroup><col style="min-width: 25px"><col style="min-width: 25px"><col style="min-width: 25px"></colgroup><tbody><tr><th colspan="1" rowspan="1"><p><b>Browser</b></p></th><th colspan="1" rowspan="1"><p><b>Version</b></p></th><th colspan="1" rowspan="1"><p><b>Support</b></p></th></tr><tr><td colspan="1" rowspan="1"><p>Google Chrome</p></td><td colspan="1" rowspan="1"><p>Latest</p></td><td colspan="1" rowspan="1"><p>✅</p></td></tr><tr><td colspan="1" rowspan="1"><p>Firefox</p></td><td colspan="1" rowspan="1"><p>Latest</p></td><td colspan="1" rowspan="1"><p>✅</p></td></tr><tr><td colspan="1" rowspan="1"><p>Safari</p></td><td colspan="1" rowspan="1"><p>Latest</p></td><td colspan="1" rowspan="1"><p>✅</p></td></tr><tr><td colspan="1" rowspan="1"><p>Microsoft Edge</p></td><td colspan="1" rowspan="1"><p>Latest</p></td><td colspan="1" rowspan="1"><p>✅</p></td></tr><tr><td colspan="1" rowspan="1"><p>Internet Explorer (IE)</p></td><td colspan="1" rowspan="1"><p>All</p></td><td colspan="1" rowspan="1"><p>❌</p></td></tr></tbody></table><h2 style="text-align: start; line-height: 2" id="mtrzq3" data-toc-id="mtrzq3"><b>Node.js Version Support</b></h2><p style="text-align: start; line-height: 1.75rem">Node.js 18.0.0 or above.</p><h2 style="text-align: start; line-height: 2" id="hdab7y" data-toc-id="hdab7y"><b>Join the Community</b></h2><p style="text-align: start; line-height: 1.75rem">We encourage users to join the Umo Editor open-source community and participate in the development and improvement of the product. Whether submitting bug reports, feature requests, or code contributions, all are valuable parts of our community.</p><p style="text-align: start; line-height: 1.75rem">You can submit issues or suggestions via <a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc/editor/discussions">https://github.com/umodoc/editor/discussions</a>.</p><p style="text-align: start; line-height: 1.75rem">Or submit bug reports via <a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc/editor/issues">https://github.com/umodoc/editor/issues</a>.</p><h2 style="text-align: start; line-height: 2" id="o5shhk" data-toc-id="o5shhk"><b>Contribute Code</b></h2><p style="line-height: 1.75rem">The development of Umo Editor could not have been possible without the support of the community. Below is the list of contributors who have contributed code to Umo Editor. We extend our thanks to them:</p><ul style="list-style-type: disc"><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc">Umo Team</a>: 👨‍💻 Core developers</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/Cassielxd">Cassielxd</a>: 💪🏻 Implemented pagination and many important features for Umo Editor</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/Goldziher">Na'aman Hirschfeld</a>:💪🏻 Enhanced TypeScript support, added tests, and provided a stronger foundation for Umo Editor's development</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/SerRashin">SerRashin</a>: 🛠️ Added Russian language support for Umo Editor</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/ChenErik">ChenErik</a>: 🛠️ Contributed code to Umo Editor</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/china-wangxu">china-wangxu</a>: 🛠️ Contributed code to Umo Editor</p></li><li><p><a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/Cassielxd">xuzhenjun130</a>: 🛠️ Contributed code to Umo Editor</p></li></ul><p style="line-height: 1.75rem">We welcome all forms of contributions, including but not limited to submitting bug reports, feature requests, and code contributions.</p><h2 style="text-align: start; line-height: 2" id="nvu0c6" data-toc-id="nvu0c6"><b>Contact Us</b></h2><p style="text-align: start; line-height: 1.75rem">If you have any questions or suggestions, please contact us through the following methods. Before that, it is recommended to read this document carefully to understand how to use Umo Editor.</p><ul style="list-style-type: disc"><li><p>Feedback: <a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc/editor/issues">https://github.com/umodoc/editor/issues</a></p></li><li><p>Community: <a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc/editor/discussions">https://github.com/umodoc/editor/discussions</a></p></li><li><p>Email: <a target="_blank" rel="noopener noreferrer nofollow" href="mailto:contact@umoteam.com">contact@umoteam.com</a></p></li></ul><h2 style="text-align: start; line-height: 2" id="kyks3s" data-toc-id="kyks3s"><b>Support Us</b></h2><p style="text-align: start; line-height: 1.75rem">If you find Umo Editor useful, please consider supporting us in the following ways:</p><ul style="list-style-type: disc"><li><p>⭐ Star the <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://github.com/umodoc/editor">Umo Editor Repository</a> to show your support for the project.</p></li><li><p>🔗 If you use Umo Editor in your project, please add a link to <a target="_blank" rel="noopener noreferrer nofollow" href="https://github.com/umodoc/editor">https://github.com/umodoc/editor</a>.</p></li></ul><h2 style="text-align: start; line-height: 2" id="h2xfpo" data-toc-id="h2xfpo"><b>Open Source License</b></h2><p style="text-align: start; line-height: 1.75rem">Umo Editor is licensed under the <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://github.com/umodoc/editor/blob/main/LICENSE">MIT License</a>, allowing you to use, modify, and distribute the software freely.</p><p style="text-align: start; line-height: 1.75rem">This document is published under the <a target="_blank" rel="noreferrer" class="_text-primary-600 _underline _decoration-from-font [text-underline-position:from-font]" href="https://creativecommons.org/licenses/by-nc-sa/4.0">CC BY-NC-SA 4.0 DEED License</a>.</p>
  </body>
</html>
'''

In [71]:
# Convert HTML to PDF bytes
pdf_bytes = convert_html_to_pdf_bytes(html)

if pdf_bytes:
    compressed_thumbnail_base64 = get_compressed_thumbnail_base64(pdf_bytes)
    
    if compressed_thumbnail_base64:
        print(f"Base64 Thumbnail (Compressed): {compressed_thumbnail_base64[:50]}...")  # Print first 50 chars
    else:
        print("Failed to generate compressed thumbnail preview.")
else:
    print("Failed to generate PDF.")



Base64 Thumbnail (Compressed): /9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAA0JCgsKCA0LCgsODg...


In [72]:
compressed_thumbnail_base64

'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAA0JCgsKCA0LCgsODg0PEyAVExISEyccHhcgLikxMC4pLSwzOko+MzZGNywtQFdBRkxOUlNSMj5aYVpQYEpRUk//2wBDAQ4ODhMREyYVFSZPNS01T09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT0//wAARCAEbAMgDASIAAhEBAxEB/8QAGwABAAIDAQEAAAAAAAAAAAAAAAIEAwUGAQf/xABGEAABAwIEAQcIBgkEAQUAAAABAAIRAwQFEiExEyJBUWFxsdEGFBU1VHORoSMkMkKBkzM0UlNyksHh8BYlQ2KiY7LS4vH/xAAZAQEAAwEBAAAAAAAAAAAAAAAAAQIDBAX/xAAtEQEAAQMBBwMDBQEBAAAAAAAAAQIDEVEEEhMhMjNSFDFxQYGRBSJCYeEj8P/aAAwDAQACEQMRAD8A+nIioYziXoyzFYUxUc54Y1pMK1FM11RTHuiZiIzK+i5Cv5SYiWFzW0KQA5mOd8zotqMZGH4RaV8RL6lau3NyGjt/qFvVslymI/tnF2mW6RajCMcGK3VSlStnU2MbmLnOnn0ELbrGu3Vbndq92lNUVRmBFiZc29Spw2V6bn/sh4J+CyPe2mwve4NaNyTACriU5eoo06lOqzPSe17elpkKSgEWNlei+oabK1Nzxu0OBI/BeOurdji19ek1w3BeAQp3Z0RmGVFh88tfaaP5gWbfZJiY905ERFAIiICIiAiIgIiICIiAuX8rKhq3tratI5DTUMuA6huR0LqFosU8nW4lfPuX3bmEgNDQwGIHaunZa6KLm9XOGd2JmnEOcNk6oMpq2lME6l1dm34LL5T3dGve0aVtUa+jQpBoLTIn/IWz/wBHUvbn/ljxV/DfJ61sw/iOFxmP3mDRd07VZiYrznH0w54tVzGMK3kZb5MPq3BGtV8DsH95W8vK4trOtXP/ABs