/
ExportTransactionsFromGmailToCsv.py
159 lines (147 loc) · 5.4 KB
/
ExportTransactionsFromGmailToCsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# This script parses emails in Gmail to find financial transactions
# and exports them to a .csv-file.
#
# Instructions
# - easy_install beautifulsoup4
# - Add Gmail username, password and a financial account name
# - (modify parsers to suit your needs)
# - Run the script!
#
# Author: Runar Ovesen Hjerpbakk - http://hjerpbakk.com
import imaplib
import email
import csv
import os
import locale
from bs4 import BeautifulSoup
user = ''
password = ''
account = ''
csv_path = os.path.expanduser('transactions.csv')
csv_text_encoding = 'iso-8859-1'
category_column = 3
amount_column = 4
no = 'no_NO'
en = 'en_US'
def archive_email(imap, uid):
imap.uid('STORE', uid, '+FLAGS', r'(\Seen)')
imap.uid('COPY', uid, '[Gmail]/All Mail')
imap.uid('STORE', uid, '+FLAGS', '\\Deleted')
imap.expunge()
def create_negative_amount(amount):
# Remove trailing 'kr' and convert to float
global current_locale
if ('.' in amount):
locale.setlocale(locale.LC_ALL, en)
else:
locale.setlocale(locale.LC_ALL, no)
value = locale.atof(amount[:-2])
return '%.2f' % -value
def get_itunes_values(column, text):
if column == category_column:
if text == 'App' or text == 'iOS App' or text == 'In App Purchase':
return 'iTunes > Apps'
if text == 'Song' or text == 'Playlist':
return 'iTunes > Music'
if text == 'Book':
return 'iTunes > Books'
if text == 'Subscription Renewal' or text == 'Tone' or text == 'Ringtone' or text == 'Init. Subscription':
return 'iTunes > Other'
if text == 'Film (HD)' or text == 'Film' or text == 'Video':
return 'iTunes > Movies'
if column == amount_column:
return create_negative_amount(text)
return text
def parse_itunes_transactions(mail_body):
soup = BeautifulSoup(mail_body)
receipt = soup.findChildren('table')[2]
date = list(receipt.findChildren('td')[2].stripped_strings)[3]
items = []
number_of_categories = 5
i = 0
transaction_table = soup.findChildren('table')[4]
transactions = transaction_table.findChildren(['tr'])
for transaction in transactions:
cells = transaction.findChildren('td')
row_values = [account]
for cell in cells:
values = list(cell.stripped_strings)
if len(values) > 0:
text = values[0].encode(csv_text_encoding)
if (i > 0):
row_values.append(get_itunes_values(len(row_values), text))
else:
row_values.append(text)
if len(row_values) == number_of_categories:
if i > 0:
row_values.append(date)
items.append(row_values)
i += 1
return items
def get_decoded_email_body(message_body):
msg = email.message_from_string(message_body)
text = ""
if msg.is_multipart():
html = None
for part in msg.get_payload():
if part.get_content_charset() is None:
# We cannot know the character set, so return decoded "something"
text = part.get_payload(decode=True)
continue
charset = part.get_content_charset()
if part.get_content_type() == 'text/plain':
text = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
if part.get_content_type() == 'text/html':
html = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace')
if html is not None:
return html.strip()
else:
return text.strip()
else:
text = unicode(msg.get_payload(decode=True), msg.get_content_charset(), 'ignore').encode('utf8', 'replace')
return text.strip()
def get_transactions_and_archive(search_filter, parse_transactions):
transactions = []
imap = imaplib.IMAP4_SSL("imap.gmail.com", 993)
imap.login(user, password)
imap.select('Inbox', readonly = False)
typ, data = imap.uid('search', None, search_filter)
try:
uids = data[0].split()
for uid in uids:
typ, msg_data = imap.uid('fetch', uid, '(RFC822)')
for response_part in msg_data:
if isinstance(response_part, tuple):
body = get_decoded_email_body(response_part[1])
transactions.extend(parse_itunes_transactions(body))
for uid in uids:
archive_email(imap, uid)
finally:
try:
imap.close()
except:
pass
imap.logout()
return transactions
def get_csv(csv_file):
return csv.writer(csv_file, quotechar='"', quoting=csv.QUOTE_ALL)
def create_csv_if_needed():
if (os.path.exists(csv_path)):
return
csv_file = open(csv_path, 'wb')
csv_writer = get_csv(csv_file)
csv_writer.writerow(['Account','Description', 'Payee', 'Category', 'Amount', 'Date'])
csv_file.close()
def write_transactions_to_csv(transactions):
csv_file = open(csv_path, 'ab')
csv_writer = get_csv(csv_file)
for transaction in transactions:
csv_writer.writerow(transaction)
csv_file.close()
if __name__ == "__main__":
create_csv_if_needed()
transactions = get_transactions_and_archive('(FROM do_not_reply@itunes.com) (SUBJECT Receipt)', parse_itunes_transactions)
write_transactions_to_csv(transactions)