# Epistolary

This tool is designed to "print" emails to a PDF file (one thread per file), with a blank (ruled) page after each email. 
You can write a reply to the email on the blank page, and Epistolary will convert your handwriting to text and send it as a reply to the email.

It is originally designed to be used with the [Remarkable](https://remarkable.com/) tablet, which is a great device for reading and annotating PDFs, but it should work with standalone PDFs, tablet devices, or scanned documents as well.

## Architecture

The tool comprises three main components:

* `MailboxManager`: A class that manages the mailbox, and provides methods to get the next email to be printed, and to send a reply to an email.
* `DocumentManager`: A class that manages the PDF document library.
* `EpistolaryOrchestrator`: A class that manages the interaction between the `MailboxManager` and the `DocumentManager`, and provides OCR and main entry point functionality.



In [1]:
# from epistolary import EpistolaryOrchestrator
from epistolary.mailbox_manager import SMTPIMAPMailboxManager
from epistolary.document_manager import FilesystemDocumentManager
from epistolary.text_extractor.tesseract_text_extractor import TesseractTextExtractor

In [2]:
from epistolary.document_manager import DocumentManager
from epistolary.mailbox_manager import MailboxManager
from epistolary.text_extractor import TextExtractor
import fitz
import io
from fitz import Document
from epistolary.types import DocumentID, EmailID


class EpistolaryOrchestrator:
    """A class that orchestrates the Epistolary system."""

    def __init__(
        self,
        mailbox_manager: MailboxManager,
        document_manager: DocumentManager,
    ):
        """Initialize the orchestrator.

        Arguments:
            mailbox_manager: The mailbox manager to use.
            document_manager: The document manager to use.

        """
        self.mailbox_manager = mailbox_manager
        self.document_manager = document_manager

    def refresh_document_mailbox(self):
        """Refresh the document mailbox."""
        new_emails = self.mailbox_manager.get_emails(limit=10)

        # Delete all old documents:
        for document_id in self.document_manager.list_documents():
            # If not in the new emails, delete the document:
            if document_id not in new_emails:
                self.document_manager.delete_document(document_id)

        # Upload all current emails:
        for eid, _ in new_emails.items():
            # Check if the email has already been added to the document mailbox
            # (the document ID should be the same as the email ID)
            if self.document_manager.has_document(DocumentID(eid)):
                continue

            # If the email has not been added, add it:
            self.upload_email_by_id(eid)

    def _email_to_document(self, email_id: EmailID) -> io.BytesIO:
        """Create a document by reflowing the text of an email.

        Arguments:
            email_id: The ID of the email to render to PDF.

        """
        email = self.mailbox_manager.get_email(email_id)
        sender = email.from_
        subject = email.subject
        html_body = email.html_body
        text_body = email.text_body
        date = email.date
        # Render the HTML email to a PDF using the library "fitz":
        pagebox = fitz.paper_rect("letter")
        story = fitz.Story(f"""
        <p><b>{sender}</b></p>
        <p><b>{subject}</b></p>
        <p><b>{date}</b></p>
        <br />
        {html_body if html_body else text_body.replace("\n", "<br />")}
        """)
        page_with_margins = pagebox + (36, 36, -36, -36) # 0.5in margins

        # Create in-memory PDF:
        pdfbytes = io.BytesIO()
        writer = fitz.DocumentWriter(pdfbytes)
        more = True
        while more:
            device = writer.begin_page(pagebox)
            more, _ = story.place(page_with_margins)
            story.draw(device)
            writer.end_page()
        writer.close()

        pdfbytes.seek(0)
        return pdfbytes

    def upload_email_by_id(self, email_id: EmailID) -> DocumentID:
        """Upload an email to the document manager.

        Arguments:
            email_id: The ID of the email to upload.

        Returns:
            The ID of the document.

        """
        # Create a document from the subject and text and then append a page
        # for the user to write on
        document_bytes = self._email_to_document(email_id)
        document = fitz.Document(stream=document_bytes.read(), filetype="pdf")

        document = self.document_manager.append_ruled_page_to_document(document)
        # Put the document into the document manager:
        document_id = self.document_manager.put_document(document, email_id)
        return document_id

    def send_document_by_id(self, document_id: DocumentID, to: str) -> bool:
        """Send a document to an email address."""
        # Get the document:
        document = self.document_manager.get_document(document_id)
        # Create an email from the document:
        subject, text = self.document_manager.create_email_from_document(document)
        # Send the email:
        return self.mailbox_manager.send_message(to, subject, text)

In [16]:
# import getpass
# pw = getpass.getpass()

In [18]:
SM = SMTPIMAPMailboxManager(
    "imap.zoho.com", 993, "jordan@matelsky.com", pw, "smtp.zoho.com", 587
)

SM.send_message(
    "",
    "Re: This is a test!",
    "Using the Epistolary library AGAIN in the same thread."
)

True

In [20]:
import pathlib

# !mkdir demo-fsdm
fsm = FilesystemDocumentManager(pathlib.Path("./demo-fsdm"))
fsm.list_documents()
# doc = fsm.get_document("test-msg")
# doc2 = fsm.append_ruled_page_to_document(doc)
# fsm.put_document(doc2, "test-msg")

# TesseractTextExtractor().extract_text_from_page(list(doc.pages())[-1])

[]

In [21]:
EO = EpistolaryOrchestrator(SM, fsm)

In [22]:
EO.refresh_document_mailbox()

ValueError: too many values to unpack (expected 2)

In [27]:
import fitz

