diff --git a/.flake8 b/.flake8 index a71e268..69018e6 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ [flake8] application-import-names = promail,tests -ignore = E203,W503,E402,S107,S101 +ignore = E203,W503,E402,S107,S101,S403,S303,S301 max-line-length = 88 select = B,B9,BLK,C,D,DAR,E,F,I,S,W docstring-convention = google diff --git a/pyproject.toml b/pyproject.toml index 14830df..a4b9529 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,15 @@ [tool.poetry] name = "promail" -version = "0.4.3" +version = "0.5.0" authors = ["Antoine Wood "] -description = "The Python Email Automation Framework" +description = "Promail: The Python Email Automation Framework" license = "GNU" readme = "README.md" homepage = "https://github.com/trafire/promail" repository = "https://github.com/trafire/promail" keywords = ["promail", "email", "automation"] include = ["src/promail/.credentials/gmail_credentials.json"] +documentation = "https://promail.readthedocs.io" [tool.poetry.dependencies] python = "^3.8" diff --git a/src/promail/clients/email_manager.py b/src/promail/clients/email_manager.py index 959fbaf..8b84c46 100644 --- a/src/promail/clients/email_manager.py +++ b/src/promail/clients/email_manager.py @@ -3,17 +3,27 @@ import mimetypes import os from email.message import EmailMessage -from typing import List, Optional +from typing import Callable, List, Optional from promail.core.embedded_attachments import EmbeddedAttachments +from promail.core.messages.messages import Message +from promail.filters.email_filter import EmailFilter -class OutBoundManager(abc.ABC): +class EmailManager: + """Super class inherited by OutBoundInbound manager.""" + + def __init__(self, account): + """Initializes email manager.""" + self._account = account + + +class OutBoundManager(abc.ABC, EmailManager): """Outbound Mail class template.""" def __init__(self, account): """Initializes OutBoundManager.""" - self._account = account + super(OutBoundManager, self).__init__(account) def send_email( self, @@ -69,7 +79,7 @@ def create_message( plaintext: str = "", embedded_attachments: Optional[List[EmbeddedAttachments]] = None, attachements: Optional[list] = None, - ): + ) -> EmailMessage: """Create Email Message.""" if attachements is None: attachements = [] @@ -97,12 +107,73 @@ def create_message( class InBoundManager(abc.ABC): - """Outbound Mail class template.""" + """InBound Mail class template.""" - def retrieve_last_items(self: object, max_items: int) -> list: + def __init__(self, account): + """Initializes Inbound Email manager.""" + self._registered_functions: dict = {} + self._last_email = None + self._filter_class = None + super(InBoundManager, self).__init__(account) + + @property + def registered_functions(self) -> dict: + """Get Dictionary of (key) filters and (value) registered functions.""" + return self._registered_functions + + def retrieve_last_items(self: object, max_items: int = 100) -> List[Message]: """Get a list of last n items received in inbox. Args: max_items: The Maximum number of items to return + + Raises: + NotImplementedError: If not implemented by subclass. """ - pass + raise NotImplementedError(__name__ + " not Implemented") + + def _process_filter_messages( + self, + email_filter: EmailFilter, + page_size: int = 100, + page_token: Optional[str] = None, + ): + """Queries Email Server for new messages. + + That match filter requisites passes each matched message + to their registered functions. + + Args: + email_filter: Email Filter Object, must be a key + of self._registered_functions. + page_size: Number of emails to pull per query, + max number platform dependent (GMAIL: 500). + page_token: Pagenation Token + (may not be used on all platforms). + + Raises: + NotImplementedError: If not implemented by subclass. + """ + raise NotImplementedError + + def process(self, page_size: int = 100) -> None: + """Process all filters.""" + for email_filter in self._registered_functions: + self._process_filter_messages( + email_filter, page_size=page_size, page_token=None + ) + + def register(self, **filter_args): + """Registers a listener function.""" + + def decorator(func: Callable): + def wrapper(email: EmailMessage): + func(email) + + f = self._filter_class(**filter_args) + + if f not in self._registered_functions: + self._registered_functions[f] = set() + self._registered_functions[f].add(wrapper) + + return decorator diff --git a/src/promail/clients/gmail.py b/src/promail/clients/gmail.py index ee60a30..1bf6648 100644 --- a/src/promail/clients/gmail.py +++ b/src/promail/clients/gmail.py @@ -11,8 +11,13 @@ from googleapiclient.discovery import build # type: ignore from googleapiclient.errors import HttpError # type: ignore -from promail.clients.email_manager import InBoundManager, OutBoundManager +from promail.clients.email_manager import ( + InBoundManager, + OutBoundManager, +) from promail.core.embedded_attachments import EmbeddedAttachments +from promail.core.messages.messages import Message +from promail.filters.gmail_filter import GmailFilter class GmailClient(OutBoundManager, InBoundManager): @@ -51,6 +56,7 @@ def __init__( """ super(GmailClient, self).__init__(account) + self._filter_class = GmailFilter sanitized_account: str = "".join(x for x in account if x.isalnum()) self._token_path: str = token_path or os.path.join( os.getcwd(), @@ -64,7 +70,7 @@ def __init__( ".credentials", "gmail_credentials.json", ) - self.login() + self.service = self.login() self._clear_token = clear_token def login(self): @@ -137,7 +143,75 @@ def send_email( except HttpError as error: print("An error occurred: %s" % error) + # inbound + + def _process_filter_messages(self, email_filter, page_size=100, page_token=None): + results = ( + self.service.users() + .messages() + .list( + userId="me", + maxResults=page_size, + q=email_filter.get_filter_string(), + pageToken=page_token, + ) + .execute() + ) + + messages = email_filter.filter_results(results["messages"]) + + for message in messages: + current_message = ( + self.service.users() + .messages() + .get(userId="me", id=message["id"], format="raw", metadataHeaders=None) + .execute() + ) + email_message = Message(current_message) + for func in self._registered_functions[email_filter]: + func(email_message) + email_filter.add_processed(message["id"]) + + next_page = results.get("nextPageToken") + if next_page: + self.process_filter_items(email_filter, page_size=100, page_token=next_page) + + # gmail specific functionality + def mailboxes(self): + """Labels that we can filter by.""" + return [ + label["id"] + for label in self.service.users() + .labels() + .list(userId="me") + .execute()["labels"] + ] + def __exit__(self, exc_type, exc_val, exc_tb): """If clear token flag has been set will delete token on exit.""" if self._clear_token: os.remove(self._token_path) + + +# +# client = GmailClient("antoinewood@gmail.com") +# +# +# @client.register( +# name="search", +# sender=("antoine",), +# newer_than="4d", +# version="10", +# run_completed=True, +# ) +# def print_subjects1(email): +# if email.cc: +# print(dir(email.cc)) +# print("2", type(email.cc), email.attachments) +# +# +# client.process(100) +# # +# # @client.register(name="search", sender=("antoine",), newer_than="100d") +# # def print_subjects2(email): +# # print("1", email.subject) diff --git a/src/promail/core/attachements/__init__.py b/src/promail/core/attachements/__init__.py new file mode 100644 index 0000000..c942239 --- /dev/null +++ b/src/promail/core/attachements/__init__.py @@ -0,0 +1 @@ +"""Module for email Attachments.""" diff --git a/src/promail/core/attachements/attachments.py b/src/promail/core/attachements/attachments.py new file mode 100644 index 0000000..3e8ec36 --- /dev/null +++ b/src/promail/core/attachements/attachments.py @@ -0,0 +1,39 @@ +"""Email Attachments.""" +import email +from email.message import EmailMessage +from io import BytesIO + + +class Attachments: + """Email Attachment Reader.""" + + manager = email.contentmanager.raw_data_manager + + def __init__(self, email_attachment: EmailMessage): + """Initializes Email Attachment.""" + self.email_attachment = email_attachment + + @property + def filename(self): + """Get filename.""" + return self.email_attachment.get_filename() + + def save_file(self, path): + """Saves file to path provided.""" + with open(path, "wb") as file: + file.write(self.email_attachment.get_content(content_manager=self.manager)) + + def get_data(self) -> BytesIO: + """Get file data as an inmemory as a BytesIO file-like object.""" + obj = BytesIO() + obj.write(self.email_attachment.get_content(content_manager=self.manager)) + obj.seek(0) + return obj + + def __str__(self): + """Get string representation.""" + return self.filename + + def __repr__(self): + """Get repr representation.""" + return f"Attachments({self.__str__()})" diff --git a/src/promail/core/messages/__init__.py b/src/promail/core/messages/__init__.py new file mode 100644 index 0000000..6d155ec --- /dev/null +++ b/src/promail/core/messages/__init__.py @@ -0,0 +1 @@ +"""Module for reading received emails.""" diff --git a/src/promail/core/messages/messages.py b/src/promail/core/messages/messages.py new file mode 100644 index 0000000..2e29e63 --- /dev/null +++ b/src/promail/core/messages/messages.py @@ -0,0 +1,84 @@ +"""Email Message Reader.""" +import base64 +import email +from email.message import EmailMessage +from email.policy import default + +from promail.core.attachements.attachments import Attachments + + +class Message: + """Email Message reader.""" + + def __init__(self, msg: dict) -> None: + """Initalises Message object. + + Args: + msg: email message data containing id + """ + self.msg = email.message_from_bytes( + base64.urlsafe_b64decode(msg["raw"]), _class=EmailMessage, policy=default + ) + self._attachments = None + + @property + def html_body(self) -> str: + """Get HTML Body of email.""" + return self.msg.get_body(preferencelist=["html"]) # type: ignore + + @property + def plain_text(self): + """Get Plain text body of email.""" + return self.msg.get_body(preferencelist=["plain"]) # type: ignore + + @property + def sender(self) -> str: + """Get sender of email.""" + return self.msg.get("from") + + @property + def cc(self) -> str: + """Get emails cc'd.""" + return self.msg.get("cc") + + @property + def bcc(self) -> str: + """Get emails ccc'd.""" + return self.msg.get("bcc") + + @property + def message_id(self) -> str: + """Get Message ID of email.""" + return self.msg.get("message-id") + + @property + def to(self) -> str: + """Get to field of email.""" + return self.msg.get("to") + + @property + def subject(self) -> str: + """Get Subject of email.""" + return self.msg.get("subject") + + @property + def date(self): + """Get Date of Email.""" + return self.msg.get("date") + + @property + def attachments(self): + """Get Email Attachments.""" + if self._attachments is None: + self._attachments = {} + for email_message_attachment in self.msg.iter_attachments(): + print(type(email_message_attachment)) + if email_message_attachment.is_attachment(): + self._attachments[ + email_message_attachment.get_filename() + ] = Attachments(email_message_attachment) + return self._attachments + + def __str__(self) -> str: + """String representation.""" + return self.subject diff --git a/src/promail/filters/__init__.py b/src/promail/filters/__init__.py new file mode 100644 index 0000000..19d71d4 --- /dev/null +++ b/src/promail/filters/__init__.py @@ -0,0 +1 @@ +"""Email Filters.""" diff --git a/src/promail/filters/email_filter.py b/src/promail/filters/email_filter.py new file mode 100644 index 0000000..12289e3 --- /dev/null +++ b/src/promail/filters/email_filter.py @@ -0,0 +1,284 @@ +"""Email Filter.""" + +import abc +import hashlib +import os.path +import pickle +from datetime import datetime +from typing import Optional + + +class EmailFilter(abc.ABC): + """Email Filter Generates a query string used to query the email backend. + + Email filter is used by the email client to store + which emails have been run with which filters. + The Filter uses `name` and `version` to uniquely identify itself. + Queries based on: https://seosly.com/gmail-search-operators/ + """ + + def __init__( + self, + name: str, + run_completed: bool = False, + around: Optional[dict] = None, + attachment: Optional[bool] = None, + bcc: Optional[tuple] = None, + category: Optional[str] = None, + cc: Optional[tuple] = None, + filename: Optional[tuple] = None, + folder: Optional[tuple] = None, + important: Optional[bool] = None, + label: Optional[tuple] = None, + keyword: Optional[tuple] = None, + newer_than: Optional[str] = None, + not_sender: Optional[tuple] = None, + older_than: Optional[str] = None, + phrase: Optional[tuple] = None, + sender: Optional[tuple] = None, + size: Optional[int] = None, + size_larger: Optional[str] = None, + size_smaller: Optional[str] = None, + sent_after: Optional[datetime] = None, + sent_before: Optional[datetime] = None, + starred: Optional[bool] = None, + to: Optional[tuple] = None, + read: Optional[bool] = None, + version: Optional[str] = None, + ): + """Initializes email filter. + + Args: + name: User defined name of filter. + The name along with version is used to identify + the filter internally to check which messages + have been already run with a particular filter. + run_completed: If True it will include messages + that have already been processed. + around: Will check body text for first term, + within apart words of second term + requires dictionary in form of + {"first_term": "Term", "apart": 20, "second_term": "Other"} + attachment: True will filter only emails with attachments, + False without attachments, None will show all. + bcc: Will look for emails that include terms in bcc, + expects tuple of strings if more than one term + provided will search based on "OR" + category: filter on category + cc: filter on cc + filename: filter on filename + folder: filter on folder + important: filter on if important + label: filter on label + keyword: filter on keyword + newer_than: filter on newer than + not_sender: filter our senders + older_than: filter older than + phrase: filter by including phrase + sender: filter on sender + size: Minimum size of email in bytes + size_larger: filter on size + size_smaller: filter size + sent_after: filter on date sent + sent_before: filter on date sent + starred: filter if starred + to: filter on to fieled + read: filter on if read + version: Version will control whether + """ + self.name = name + self._newer_than = newer_than + self._not_sender = not_sender + self._older_than = older_than + self._phrase = phrase + self._size = size + self._size_larger = size_larger + self._size_smaller = size_smaller + self._sent_after = sent_after + self._sent_before = sent_before + self._starred = starred + self._keyword = keyword + self._important = important + self._filename = filename + self._category = category + self._bcc = bcc + self._around = around + self._attachment = attachment + self._label = label + self._folder = folder + self._cc = cc + self._to = to + self._read = read + self._sender = sender + self._version = version + + self.save_folder = "processed_emails" + + self._validate() + if run_completed: + self.processed = set() + else: + self.processed = self.load_processed_ids() + + def _validate(self): + """Validates inputs.""" + pass + + def filter_results(self, messages): + """Removes messages in self.processed.""" + raise NotImplementedError + + @property + def processed_filename(self): + """Relative path to pickle file.""" + return f"{self.save_folder}/{hash(self)}.bin" + + def __hash__(self) -> int: + """Hash based on name, version.""" + fields = ( + self.name, + self._version, + ) + return int(hashlib.md5(str(fields).encode()).hexdigest(), 16) + + def load_processed_ids(self): + """Loads A Set of ids that have been processed with this filter.""" + try: + with open(self.processed_filename, "rb") as file: + return pickle.load(file) + except FileNotFoundError: + return set() + + def add_processed(self, email_id: str) -> None: + """Add Message to list of processed Messages.""" + print(email_id) + self.processed.add(email_id) + if not os.path.exists(self.save_folder): + os.makedirs(self.save_folder) + with open(self.processed_filename, "wb") as file: + pickle.dump(self.processed, file) + + def get_filter_string(self) -> str: + """Creates string to query email based on parameters. + + Returns: Query String. + + Raises: + NotImplementedError: method not implemented. + """ + raise NotImplementedError(__name__ + " not implemented") + + @property + def sender(self): + """Search query based on sender.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def read(self): + """Search query based on read.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def newer_than(self): + """Search query based on newer_than.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def not_sender(self): + """Search query based on not_sender.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def older_than(self): + """Search query based on older_than.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def phrase(self): + """Search query based on phrase.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def size(self): + """Search query based on size.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def size_larger(self): + """Search query based on size_larger.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def size_smaller(self): + """Search query based on size_smaller.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def sent_after(self): + """Search query based on sent_after.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def sent_before(self): + """Search query based on sent_before.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def starred(self): + """Search query based on starred.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def keyword(self): + """Search query based on keyword.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def important(self): + """Search query based on important.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def filename(self): + """Search query based on filename.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def category(self): + """Search query based on category.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def bcc(self): + """Search query based on bcc.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def around(self): + """Search query based on around.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def attachment(self): + """Search query based on attachment.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def label(self): + """Search query based on label.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def folder(self): + """Search query based on folder.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def cc(self): + """Search query based on cc.""" + raise NotImplementedError(__name__ + " not implemented") + + @property + def to(self): + """Search query based on to.""" + raise NotImplementedError(__name__ + " not implemented") diff --git a/src/promail/filters/gmail_filter.py b/src/promail/filters/gmail_filter.py new file mode 100644 index 0000000..41c7c47 --- /dev/null +++ b/src/promail/filters/gmail_filter.py @@ -0,0 +1,271 @@ +"""Email Filter implementation for Gmail.""" +import re +from typing import Optional + +from promail.filters.email_filter import EmailFilter + + +class GmailFilter(EmailFilter): + """Email Filter Generates a query string used to query the email backend. + + Email filter is used by the email client to store + which emails have been run with which filters. + The Filter uses `name` and `version` to uniquely identify itself. + Queries based on: https://seosly.com/gmail-search-operators/ + """ + + TIME_FRAMES = { + "d": "Day", + "m": "Month", + "y": "Year", + } + + def _validate(self) -> None: + fields = "newer_than", "older_than" + for field in fields: + value = getattr(self, f"_{field}") + if value is None: + continue + value = value.strip().lower() + if len(value) < 2: + raise IndexError( + f"{value} is not valid for{field} is expected to be either None " + f"or a string representing the number of a time period. " + f"example: 6d for 6 days. valid options are: {self.TIME_FRAMES}" + ) + + elif value[-1] not in self.TIME_FRAMES.keys(): + raise ValueError( + f"{value} is not valid for{field}," + f"string must end in one of the following values: " + f"{list(self.TIME_FRAMES.keys())} " + ) + elif not value[:-1].isdigit(): + raise ValueError( + f"{value} is not valid for{field}, " + f"expecting value to begin with a number" + ) + + @staticmethod + def _join_tuple(term: str, data: Optional[tuple], seperator: str): + if data is None: + return "" + if seperator == "OR": + return " OR ".join([f"{term}:{d}" for d in data]) + elif seperator == " ": + return f"{term}:(" + seperator.join([f"{d}" for d in data]) + ")" + + @staticmethod + def _get_boalean(term: str, value: tuple, data: Optional[bool]): + if data is None: + return "" + + elif data: + return f"{term}:{value[1]}" + + else: + return f"{term}:{value[2]}" + + @staticmethod + def _get_string(term: str, data: Optional[str]): + if data is None: + return "" + return f"{term}:{data}" + + @staticmethod + def _get_date(term, date): + if date is None: + return "" + return f"{term}:{date.strftime('%G/%m/%d')}" + + @staticmethod + def _get_toggle(term, data): + """Format toggle.""" + if data: + return f"{term}:{data}" + return "" + + @property + def sender(self): + """Search query based on sender.""" + return self._join_tuple("from", self._sender, seperator="OR") + + @property + def read(self): + """Search query based on read.""" + return self._get_boalean("read", ["read", "unread"], self._read) + + @property + def newer_than(self): + """Search query based on newer_than.""" + return self._get_string("newer_than", self._newer_than) + + @property + def not_sender(self): + """Search query based on not_sender.""" + return self._join_tuple("NOT from", self._not_sender, seperator="OR") + + @property + def older_than(self): + """Search query based on older_than.""" + return self._get_string("older_than", self._older_than) + + @property + def phrase(self): + """Search query based on phrase.""" + if self._phrase is None: + return "" + return self._join_tuple( + "", tuple([f'"{phrase}"' for phrase in self._phrase]), seperator="AND" + ) + + @property + def size(self): + """Search query based on size.""" + return "" if self._size is None else f"size:{self._size}" + + @property + def size_larger(self): + """Search query based on size_larger.""" + return self._get_string("larger", self._size_larger) + + @property + def size_smaller(self): + """Search query based on size_smaller.""" + return self._get_string("larger", self._size_smaller) + + @property + def sent_after(self): + """Search query based on sent_after.""" + return self._get_date("after", self._sent_after) + + @property + def sent_before(self): + """Search query based on sent_before.""" + return self._get_date("before", self._sent_after) + + @property + def starred(self): + """Search query based on starred.""" + data = True if self._starred else None + return self._get_toggle("starred", data) + + @property + def keyword(self): + """Search query based on keyword.""" + if self._keyword is None: + return "" + else: + return "(" + " ".join(self.keyword) + ")" + + @property + def important(self): + """Search query based on important.""" + if self._important is None: + return "" + return self._get_toggle("is", "important") + + @property + def filename(self): + """Search query based on filename.""" + return self._join_tuple("filename", self._filename, "OR") + + @property + def category(self): + """Search query based on category.""" + return self._get_string("category", self._category) + + @property + def bcc(self): + """Search query based on bcc.""" + return self._join_tuple("bcc", self._bcc, "AND") + + @property + def around(self): + """Search query based on around.""" + if self._around is None: + return "" + return ( + f"{self._around['first_term']}" + "AROUND {self._around['apart']} " + "{self._around['second_term']}" + ) + + @property + def attachment(self): + """Search query based on attachment.""" + return self._get_toggle("has", "attachment" if self._attachment else None) + + @property + def label(self): + """Search query based on label.""" + return self._get_string("label", self._label) + + @property + def folder(self): + """Search query based on folder.""" + return self._get_string("folder", self._folder) + + @property + def cc(self): + """Search query based on cc.""" + return self._join_tuple("cc", self._cc, "AND") + + @property + def to(self): + """Search query based on to.""" + return self._join_tuple("to", self._to, "AND") + + def get_filter_string(self) -> str: + """Creates string to query email based on parameters.""" + return re.sub( + " +", + " ", + " ".join( + ( + self.around, + self.attachment, + self.bcc, + self.category, + self.cc, + self.filename, + self.folder, + self.important, + self.keyword, + self.label, + self.newer_than, + self.not_sender, + self.older_than, + self.phrase, + self.read, + self.sender, + self.sent_after, + self.sent_before, + self.size, + self.size_larger, + self.size_smaller, + self.starred, + self.to, + ) + ).strip(), + ) + + def filter_results(self, messages): + """Removes messages in self.processed.""" + return filter(lambda msg: msg["id"] not in self.processed, messages) + + +# a = GmailFilter( +# load_processed=True, +# sender=("Antoinewood@gmail.com", "sue"), +# older_than="99d", +# sent_after=datetime.now(), +# ) +# +# print(a.get_filter_string()) +# # l = list(set(a.__dict__.keys())) +# # l.sort() +# # for value in l: +# # print( +# # f"self.{value.strip('_')}", end = ", " +# # )