forked from willwade/emailJobParser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
EMailDownloader.py
47 lines (39 loc) · 1.85 KB
/
EMailDownloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#Fri Jul 23 12:51:57 IST 2010
# pygmail.py - A Python Library For Gmail
# http://segfault.in/2010/07/playing-with-python-and-gmail-part-1/
import pygmail
import EMail
import datetime
import dateutil.parser
import NHS_JOBS_CONSTANTS as N
def getEmails(since=None, accountName=None, accountPasswd=None, emailFolder=N.FOLDER, sender=N.SENDER):
if accountName is None:
accountName = N.ACCOUNT_NAME
if accountPasswd is None:
accountPasswd = N.ACCOUNT_PASSWD
g = pygmail.pygmail()
g.login(accountName, accountPasswd)
ids = g.get_mails_from(sender,emailFolder, since=since)
if since != None and not since.tzinfo:
since = dateutil.parser.parse(str(since)+' +0000')
for id in ids:
status, response = g.get_date_received_from_id(id)
dateReceivedStr = response[0][1]
dateReceived = dateutil.parser.parse(dateReceivedStr.split('Date:')[1].strip())
if not dateReceived.tzinfo:
dateReceived = dateutil.parser.parse(dateReceivedStr.split('Date:')[1].strip() + ' +0000')
if since != None and dateReceived <= since: continue #We have already downloaded this email!
status, response = g.get_subject_from_id(id)
subject = response[0][1]
status, response = g.get_body_from_id(id)
body = response[0][1]
yield EMail.EMail(sender=sender, receiver=accountName, subject=subject, body=cleanupEmailText(body), dateReceivedStr = dateReceivedStr)
def cleanupEmailText(txt):
"Cleans up some of the formatting of email text"
#Remove annoying carriage returns if they exist
txt = txt.replace('\r\n','\n').replace('\r','\n')
#Combine any lines that end with the line continuation character '='
txt = txt.replace("=\n","")
#Replace the blank space symbol =20 with nothing
txt = txt.replace('=20','')
return txt