In [6]:
import pandas as pd
import os
import re
import warnings
import pandas as pd
import yaml
from pandas.core.common import SettingWithCopyWarning

import re
import os
from os import path
import shutil
from app.global_constants import *

import time
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

pd.set_option("display.max_rows", 2000)

In [25]:
def load_mint_trans():
    # check that there is only one file in mint folder
    mint_files = os.listdir(PATH_TO_MINT_FOLDER)

    if len(mint_files) == 1:
        df = pd.read_csv(PATH_TO_MINT_FOLDER + mint_files[0])
        df.columns = [to_snake(col) for col in df.columns]

        # remove repeated white space characters
        df['original_description'] = df['original_description'].apply(lambda txt: ' '.join(txt.split()))

        # group by all values to aggregate duplicate transactions
        group_df = df.groupby(["date", "original_description", "transaction_type", "account_name"], as_index = False).amount.sum()

        group_df["amount"] = group_df["amount"].round(2)

        return group_df[RAW_TRANSACT_SCHEMA]

    else:
        return None
        print("Mint folder needs attention!")


def load_raw_trans():
    # load individual sources of raw transactions
    mint_df = load_mint_trans()
    amzn_df = load_amzn_trans()

    #union together
    raw_trans = pd.concat([mint_df, amzn_df])
    
    raw_trans["date"] = raw_trans["date"].apply(lambda x: pd.to_datetime(x))

    return raw_trans


def load_amzn_trans():
    # check that there is only one file in mint folder
    amzn_files = os.listdir(PATH_TO_AMAZON_FOLDER)

    if len(amzn_files) == 1:
        df = pd.read_csv(PATH_TO_AMAZON_FOLDER + amzn_files[0])
        df.columns = [to_snake(col) for col in df.columns]
        df["original_description"] = ("AMZN: " + df["title"] + " " + df["category"]).fillna("AMZN: unknown item / return")
        df["account_name"] = "AMAZON"
        df["amount"] = df["item_total"].apply(lambda x: float(x.replace("$", "")))
        df["transaction_type"] = "debit"
        df.rename(columns =
                  {"order_date": "date",
                  }, inplace = True)

        return df[RAW_TRANSACT_SCHEMA]

    else:
        return None
        print("Amazon folder needs attention!")


def load_yaml(path):
    with open(path, "r") as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            print(exc)


def to_snake(txt):
    words = txt.lower().split(" ")
    return "_".join(words)


def get_transact_keys(df):
    return df[TRANSACT_KEY_COLS].drop_duplicates()

In [27]:
mint_df = load_raw_trans()
mint_df.sort_values("date", ascending = False)

Unnamed: 0,date,original_description,transaction_type,amount,account_name
1275,2021-07-05,AUTH : TST* NORTHSIDE SOCIAL COF,debit,6.78,Fidelity Rewards Visa Signature
1273,2021-07-04,AUTH : SHEETZ 0144,debit,5.30,Fidelity Rewards Visa Signature
1272,2021-07-04,AUTH : HARRIS TEETER #0083,debit,65.35,Fidelity Rewards Visa Signature
1271,2021-07-03,AUTH : T.J. MAXX #1459,debit,5.29,Fidelity Rewards Visa Signature
1270,2021-07-03,AUTH : SQ *BREAD & WATER COMP,debit,4.95,Fidelity Rewards Visa Signature
...,...,...,...,...,...
918,2018-05-01,REMOTE ONLINE DEPOSIT #1,credit,200.00,CHASE COLLEGE
303,2018-04-22,AMZN: Legere Tsss 2.5 Tenor Saxophone Signatur...,debit,71.60,AMAZON
300,2018-01-21,AMZN: The God Delusion ABIS_BOOK,debit,8.00,AMAZON
301,2018-01-21,AMZN: God Is Not Great: How Religion Poisons E...,debit,9.37,AMAZON


In [31]:
mint_df["date"].dt.strftime('%m/%d/%Y')

0      01/02/2020
1      01/02/2020
2      01/02/2020
3      01/02/2020
4      01/03/2020
          ...    
387    11/24/2018
388    12/03/2018
389    12/03/2018
390    12/03/2018
391    12/03/2018
Name: date, Length: 2074, dtype: object

In [10]:
import re
import os
from os import path
import shutil
from app.global_constants import *


def append_new_transactions(transact_file_path, new_file_path):
    """takes the current csv in folder and appends data from new file"""

    print("")
    print(f"updating: {transact_file_path}")
    print(f"using: {new_file_path}")
    
    new_df = pd.read_csv(new_file_path)
    new_df.drop_duplicates(inplace = True)
    print(f"NEW: {len(new_df)}")
    
    # if transaction file exists
    if path.exists(transact_file_path):
    
        curr_df = pd.read_csv(transact_file_path)
        curr_df.drop_duplicates(inplace = True)
        print(f"CURR: {len(curr_df)}")
        
        concat_dfs = pd.concat([curr_df, new_df])
        concat_dfs.drop_duplicates(inplace = True)
        print(f"CONCAT: {len(concat_dfs)}")

        print(f"new records {len(concat_dfs) - len(curr_df)}: total records {len(concat_dfs)}")

        # overwrite with updated df's
        concat_dfs.to_csv(transact_file_path, index = False)  
        
        print(f"CONFIRM_TRANS_LEN: {len(pd.read_csv(transact_file_path))}")
        
    # if it doesnt exist
    else:
        # save new file as transactions
        new_df.to_csv(transact_file_path, index = False)
        print(f"no existing records. new records {len(new_df)}")
        
        


def update_raw_transactions():
    downloads = os.listdir(DOWNLOADS_FOLDER)
    
    print("\nupdating mint records".upper())
    
    # get the filename of the most recent mint download
    mint_file_name = get_last_mint(downloads)
    append_new_transactions(PATH_TO_MINT_FOLDER + MASTER_TRANSACT_FILE_NAMES, DOWNLOADS_FOLDER + "/" + mint_file_name)
    
    print("\nupdating amazon records".upper())
    
    # get all the filenames of amazon order reports
    amzn_file_names = get_all_amzn(downloads)
    for amzn_file_name in amzn_file_names:
        time.sleep(10)
        append_new_transactions(PATH_TO_AMAZON_FOLDER + MASTER_TRANSACT_FILE_NAMES, DOWNLOADS_FOLDER + "/" + amzn_file_name)
    
    
def get_all_amzn(downloads):
    """identify all amzn order histories"""
    amzn_regex = r"\d{2}-\w{3}-\d{4}_to_\d{2}-\w{3}-\d{4}(\s\((\d+)\))?\.csv"

    amzn_downloads = []
    for d in downloads:
        match = re.match(amzn_regex, d)
        if match:
            amzn_downloads.append(match.group())
            
    return amzn_downloads
    

def get_last_mint(downloads):
    """identify filename of most recent mint download"""
    mint_regex = r"transactions(\s\((\d+)\))?\.csv"

    # get all mint downloads
    mint_downloads = []
    for d in downloads:
        match = re.match(mint_regex, d)
        if match:
            if match.group(2):
                mint_downloads.append(int(match.group(2)))

    if len(mint_downloads) == 0:
        return "transactions.csv"
    elif len(mint_downloads) > 0:
        max_ind = max([x for x in mint_downloads if x != None])
        return f"transactions ({max_ind}).csv"

In [35]:
update_raw_transactions()


UPDATING MINT RECORDS

updating: ./data/raw/mint/transactions.csv
using: /Users/caleb.crouse/Downloads/transactions.csv
NEW: 1819
CURR: 1819
CONCAT: 1819
new records 0: total records 1819
CONFIRM_TRANS_LEN: 1819

UPDATING AMAZON RECORDS

updating: ./data/raw/amazon/transactions.csv
using: /Users/caleb.crouse/Downloads/01-Jan-2019_to_05-Jul-2021 (1).csv
NEW: 300
CURR: 300
CONCAT: 300
new records 0: total records 300
CONFIRM_TRANS_LEN: 300

updating: ./data/raw/amazon/transactions.csv
using: /Users/caleb.crouse/Downloads/01-Jan-2019_to_05-Jul-2021.csv
NEW: 300
CURR: 300
CONCAT: 300
new records 0: total records 300
CONFIRM_TRANS_LEN: 300

updating: ./data/raw/amazon/transactions.csv
using: /Users/caleb.crouse/Downloads/05-Jun-2021_to_05-Jul-2021.csv
NEW: 21
CURR: 300
CONCAT: 321
new records 21: total records 321
CONFIRM_TRANS_LEN: 321

updating: ./data/raw/amazon/transactions.csv
using: /Users/caleb.crouse/Downloads/01-Jan-2019_to_05-Jul-2021 (2).csv
NEW: 300
CURR: 300
CONCAT: 300
new re

In [23]:
curr = pd.read_csv(PATH_TO_AMAZON_FOLDER + MASTER_TRANSACT_FILE_NAMES)

In [24]:
next_df = pd.read_csv("/Users/caleb.crouse/Downloads/01-Jan-2019_to_05-Jul-2021.csv")

In [25]:
next_df

Unnamed: 0,Order Date,Order ID,Title,Category,ASIN/ISBN,UNSPSC Code,Website,Release Date,Condition,Seller,...,Carrier Name & Tracking Number,Item Subtotal,Item Subtotal Tax,Item Total,Tax Exemption Applied,Tax Exemption Type,Exemption Opt-Out,Buyer Name,Currency,Group Name
0,01/23/19,114-2020637-4600209,Bissell Cleanview Upright Bagless Vacuum Clean...,VACUUM_CLEANER,B06XKLHSWJ,47121602.0,Amazon.com,,new,Amazon.com,...,USPS(9361289678092453391724),$74.99,$4.50,$79.49,,,,Caleb,USD,
1,01/23/19,114-2020637-4600209,"S&B Golden Curry Sauce Mix, Medium Hot, 8.4-Ou...",SAUCE,B00VEJCJSC,50170000.0,Amazon.com,,new,First SuperMarket,...,USPS(9361289678092453276816),$23.99,$0.00,$23.99,,,,Caleb,USD,
2,01/31/19,112-0377545-9794603,Omron 5 Series Upper Arm Blood Pressure Monito...,BLOOD_PRESSURE_MONITOR,B00KPQB2NS,42181602.0,Amazon.com,,new,Amazon.com,...,USPS(9361289678092458472107),$39.99,$0.00,$39.99,,,,Caleb,USD,
3,02/04/19,113-8041433-9829843,Dragon Age: Inquisition - Game of the Year Edi...,DOWNLOADABLE_VIDEO_GAME,B0167AK9Y8,60141104.0,Amazon.com,2015-10-06T00:00:01,new,Amazon.com Services LLC,...,,$39.99,$0.00,$39.99,,,,Caleb,USD,
4,02/05/19,113-4002262-0089004,Stainless Steel Mixing Bowls with Lids and Non...,DISHWARE_BOWL,B00Z7ZLQCY,52150000.0,Amazon.com,,new,Fitzroy and Fox,...,UPS(1Z065R6E0303260207),$26.99,$0.00,$26.99,,,,Caleb,USD,
5,02/05/19,113-4002262-0089004,Wilton Recipe Right Non-Stick 6 Cup Jumbo Muff...,BAKING_PAN,B07328J6QK,52150000.0,Amazon.com,,new,Amazon.com,...,UPS(1Z065R6E0303260207),$13.99,$0.84,$14.83,,,,Caleb,USD,
6,02/10/19,113-7761104-4592251,New York Biology Dead Sea Mud Mask for Face an...,SKIN_TREATMENT_MASK,B01NCM25K7,53131600.0,Amazon.com,,new,Cnba Inc.,...,AMZN_US(TBA795355943000),$14.95,$0.00,$14.95,,,,Caleb,USD,
7,02/10/19,113-7761104-4592251,"InstaNatural Rose Water Facial Toner for Face,...",ASTRINGENT_SUBSTANCE,B00IMHN0B4,53131600.0,Amazon.com,,new,Instanatural LLC,...,AMZN_US(TBA795355943000),$13.97,$0.84,$14.81,,,,Caleb,USD,
8,02/10/19,113-7761104-4592251,CeraVe Hydrating Facial Cleanser | Moisturizin...,SKIN_CLEANING_AGENT,B01MSSDEPK,53131613.0,Amazon.com,,new,Amazon.com,...,AMZN_US(TBA795355943000),$11.99,$0.72,$12.71,,,,Caleb,USD,
9,02/10/19,113-7761104-4592251,Neutrogena Hydro Boost Hyaluronic Acid Hydrati...,SKIN_MOISTURIZER,B00NR1YQK4,53131613.0,Amazon.com,,new,Amazon.com,...,AMZN_US(TBA795355943000),$14.99,$0.90,$15.89,,,,Caleb,USD,


In [5]:
curr.loc[curr.duplicated()]

Unnamed: 0,Order Date,Order ID,Title,Category,ASIN/ISBN,UNSPSC Code,Website,Release Date,Condition,Seller,...,Carrier Name & Tracking Number,Item Subtotal,Item Subtotal Tax,Item Total,Tax Exemption Applied,Tax Exemption Type,Exemption Opt-Out,Buyer Name,Currency,Group Name
