In [32]:
import imaplib
import pyzmail
import requests
import email
import email_utils
import re
import openai
import config
import numpy as np

from datetime import datetime

In [2]:
# Set up OpenAI API credentials
openai.api_key = config.open_ai_api_key
email_username = config.username
email_password = config.password

In [61]:
# Login to mailbox
mailbox = email_utils.Outlook()
mailbox.login(email_username, email_password)
mailbox.inbox()

# Grab all unread messages and search for transaction based emails.
unread_ids = mailbox.allIds()
matching_ids = mailbox.getIdswithWord(unread_ids, 'Chase Sapphire Reserve')  # Pass the correct iterable to the method

# Set regular expression patterns for transaction amount and vendor type parsing.
amount_pattern = r'\$(\d+\.\d{2})'
vendor_pattern = r"with\s(.+)"

# Init transaction array
transactions = []

category = "na"

# Loop through and parse each transaction email.
for id in unread_ids:
    mailbox.getEmail(id)
    subject_line = mailbox.mailsubject()
    
    amount_match = re.search(amount_pattern, subject_line)
    vendor_match = re.search(vendor_pattern, subject_line)
    
    if amount_match and vendor_match:
        amount = float(amount_match.group(1))
        vendor = vendor_match.group(1)

        # get date and format
        msg_date = mailbox.maildate()
        msg_date = msg_date.replace("(EDT)", "").strip()

        try:
            input_format = "%a, %d %b %Y %H:%M:%S %z"
            dt = datetime.strptime(msg_date, input_format)
            output_format = "%Y-%m-%d-%H%M%S"
            frmt_date = dt.strftime(output_format)
        except:
            print("Issue with data-time conversion. Ignoring Entry.")

        transactions.append([frmt_date, category, amount, vendor, email_username])


for i, j in enumerate(transactions):
    print(j)


 > Signed in as budget2396@outlook.com [b'LOGIN completed.']
['2023-07-09-222844', 'na', 15.97, 'SQ *ZEDS ICE CREAM', 'budget2396@outlook.com']
['2023-07-09-222904', 'na', 36.82, 'SQ *TFB OPERATIONS L', 'budget2396@outlook.com']
['2023-07-09-235735', 'na', 40.0, 'HCTRA EZ TAG Rebill', 'budget2396@outlook.com']
['2023-07-09-235747', 'na', 21.65, 'SQ *BOULDIN ACRES', 'budget2396@outlook.com']
['2023-07-09-235800', 'na', 9.8, "P. TERRY'S STAND #13", 'budget2396@outlook.com']
['2023-07-10-214714', 'na', 16.54, 'TMOBILE*PREPAID WEB', 'budget2396@outlook.com']


In [58]:
vendor_categories = ['Automotive',
                     'Bills & Utilities',
                     'Education',
                     'Entertainment',
                     'Food & Drink', 
                     'Gas',
                     'Groceries',
                     'Health & Wellness',
                     'Miscellaneous',
                     'Travel',
                     'Tolls',
                     'Retail Shopping']

In [59]:
# Use GPT-3.5 to determine the type of vendor

def EstimateUnknownCategory(vendor):
    gpt_prompt = f"What type of vendor is '{vendor}'? Classify the vendor into one of the following categories: '{vendor_categories}'. Only return the specific category."
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=gpt_prompt,
        max_tokens=100,
        n=1,
        stop=None,
        temperature=0.3
    )
    return response.choices[0].text.strip()

In [60]:
for i, transaction in enumerate(transactions):
    
    if(transaction[1] == "na"):
        vendor_type = EstimateUnknownCategory(transaction[-1])
        transaction[1] = vendor_type

    print(transaction)


['2023-07-09-222844', 'Food & Drink', 15.97, 'SQ *ZEDS ICE CREAM']
['2023-07-09-222904', 'Miscellaneous', 36.82, 'SQ *TFB OPERATIONS L']
['2023-07-09-235735', 'Tolls', 40.0, 'HCTRA EZ TAG Rebill']
['2023-07-09-235747', 'Retail Shopping', 21.65, 'SQ *BOULDIN ACRES']
['2023-07-09-235800', 'Food & Drink', 9.8, "P. TERRY'S STAND #13"]
['2023-07-10-214714', 'Bills & Utilities', 16.54, 'TMOBILE*PREPAID WEB']
