In [1]:
import git
import os
import pandas as pd

### Settings

In [2]:
def get_git_root(path):
    git_repo = git.Repo(path, search_parent_directories=True)
    git_root = git_repo.git.rev_parse("--show-toplevel")
    return git_root

data_root_dir_append = "data"
data_raw_dir_append = "data/raw"
data_interim_dir_append = "data/interim"
data_processed_dir_append = "data/processed"

In [3]:
git_root_path = get_git_root(os.getcwd())

raw_data_path = os.path.join(git_root_path, data_raw_dir_append)

txn_files = [f for f in os.listdir(raw_data_path) if os.path.isfile(os.path.join(raw_data_path, f))]

### Drop into config file

In [4]:
txn_date_col_name = "txn_date"
txn_description_col_name = "txn_description"
txn_amount_col_name = "txn_amount"
bank_col_name = "bank"
id_col_name = "unique_id"
txn_strftime_format = "%Y-%m-%d"

usaa_config = {
    "columns" : [
        "status", 
        None,
        txn_date_col_name,
        None,
        txn_description_col_name,
        "categorization",
        txn_amount_col_name
    ],
    "filter_positives" : True
}

citizens_config = {
    "columns" : [
        "Transaction Type",
        txn_date_col_name,
        "Account Type",
        txn_description_col_name,
        txn_amount_col_name,
        "Reference No.",
        "Credits",
        "Debits"
    ],
    "filter_positives" : True
}

In [5]:
class TxnFile():
    
    def __init__(self, path, file):
        
        self.root_dir = path
        self.file = file
        self.full_path = os.path.join(path, file)
        self.citizens_bank_key = 'citizens'
        self.usaa_bank_key = 'usaa'
        self.fidelity_bank_key = 'fidelity'
        self.discover_key = 'discover'
        
        self.bank = self.get_bank()
        
        if self.bank == self.citizens_bank_key:
            self.config = citizens_config
        elif self.bank == self.usaa_bank_key:
            self.config = usaa_config
            
        self.account_type = self.get_account_type()
        self.txn_df = self.get_txn_df()
        
    def get_bank(self):
        
        if 'bk_download' in self.file:
            bank = self.usaa_bank_key
        elif 'EXPORT' in self.file:
            bank = self.citizens_bank_key
            
        return bank
                 
    def get_account_type(self):

        acceptable_inputs = ['credit', 'checking', 'investment']
        prompt = "What kind of account is this file? {}\nMust be one of {}".format(self.file, acceptable_inputs)
        
        while True:
            acct_type = input(prompt)
            
            if acct_type not in acceptable_inputs:
                print("ERROR! Input not one of {}\n".format(acceptable_inputs))
                continue
            else:
                break
                
        return acct_type
            
    def get_txn_df(self):

        bank = self.bank
        acct_type = self.account_type
        
        header = 0 if self.bank in [self.citizens_bank_key] else None
        raw_txn_df = pd.read_csv(self.full_path, header=header)
        
        columns = self.config['columns']
        filter_positives = self.config['filter_positives']
        raw_txn_df.columns = columns

        txn_df = raw_txn_df[[txn_date_col_name, txn_amount_col_name, txn_description_col_name]].copy()
        #print(txn_df.dtypes)
#        txn_df = txn_df
        txn_df[txn_amount_col_name] = txn_df[txn_amount_col_name].astype(str)
        txn_df[txn_amount_col_name] = txn_df[txn_amount_col_name].str.replace("--", "")
        txn_df[txn_amount_col_name] = txn_df[txn_amount_col_name].astype(float)
        txn_df[txn_date_col_name]= pd.to_datetime(txn_df[txn_date_col_name]) 
        
        txn_df["bank"] = self.bank
        txn_df["acct_type"] = self.account_type
        
        txn_df["unique_id"] = txn_df.apply(
            lambda x:"{}_{}_{}_{}".format(
                bank, 
                acct_type, 
                x[txn_date_col_name].strftime("%Y%m%d"), 
                abs(x[txn_amount_col_name])
            ), axis = 1
        )
        
#         if filter_positives is True:
#             txn_df = txn_df[txn_df[txn_amount_col_name] > 0]
        return txn_df
    
        

In [6]:
full_txn_df = pd.DataFrame()

for txn_file_path in txn_files:

    txn_file = TxnFile(raw_data_path, txn_file_path)
    full_txn_df = full_txn_df.append(txn_file.txn_df)

What kind of account is this file? bk_download.csv
Must be one of ['credit', 'checking', 'investment']checking
What kind of account is this file? EXPORT.CSV
Must be one of ['credit', 'checking', 'investment']checking


In [7]:
full_txn_df

Unnamed: 0,txn_date,txn_amount,txn_description,bank,acct_type,unique_id
0,2019-12-17,-21.73,AMZN Mktp US*XA2802ID3 AMZN.COM BILLWA,usaa,checking,usaa_checking_20191217_21.73
1,2019-12-17,-8.00,TST* STIR COOKING SCHOOLTDENVER CO,usaa,checking,usaa_checking_20191217_8.0
2,2019-12-17,-4.20,SQ *ALLEGRO COFFEE DENVER CO,usaa,checking,usaa_checking_20191217_4.2
3,2019-12-16,-686.92,USAA CREDIT CARD PAYMENT,usaa,checking,usaa_checking_20191216_686.92
4,2019-12-16,-86.67,WAL-MART #0986840 SUMMIT FRISCO CO,usaa,checking,usaa_checking_20191216_86.67
5,2019-12-16,-81.33,SAFEWAY #0322 GOLDEN CO,usaa,checking,usaa_checking_20191216_81.33
6,2019-12-16,-56.70,EXXONMOBIL 48225817 FRISCO CO,usaa,checking,usaa_checking_20191216_56.7
7,2019-12-16,-53.20,KiwiCo Inc. 800-7144828 CA,usaa,checking,usaa_checking_20191216_53.2
8,2019-12-16,-23.65,WILDWOOD VAIL CO,usaa,checking,usaa_checking_20191216_23.65
9,2019-12-16,-14.20,BRECKENRIDGE PASSPORT 414-4316555 CO,usaa,checking,usaa_checking_20191216_14.2


In [40]:
# txn_date_col_name = "txn_date"
# txn_description_col_name = "txn_description"
# txn_amount_col_name = "txn_amount"

class Txn():
    
    # date, amt, description, bank, acct
    def __init__(self, txn):
        
        self.txn = txn
        self.txn_date = txn[txn_date_col_name].strftime(txn_strftime_format)
        self.txn_description = txn[txn_description_col_name]
        self.txn_amount = txn[txn_amount_col_name]
        self.bank = txn[bank_col_name]
        self.id = txn[id_col_name]
        
        self.txn_categorization_config = self.get_txn_categorization_config()
        self.txn_config_df = self.get_txn_config()
        
    @staticmethod
    def get_txn_config():
        
        txn_config = [
            #{"recipient" : "AMZN", "type" : "discretionary", "category" : None, "sub_category" : None, "dynamic_logic": None},
            {"recipient" : "7-ELEVEN", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "CONOCO", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "CONOCO", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            }, 
            {"recipient" : "7-ELEVEN", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "SAFEWAY", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },  
            {"recipient" : "EXXONMOBIL", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "EXXONMOBIL", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "JUMPBIKESHAR", 
             "type" : "discretionary", 
             "category" : "Auto and Transport", 
             "sub_category" : "Ride Share", 
             "dynamic_logic": None
            },
            {"recipient" : "BLAKE STREET TAVERN", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "BONOBOS", 
             "type" : "discretionary", 
             "category" : "Shopping", 
             "sub_category" : "Clothes", 
             "dynamic_logic": None
            },
            {"recipient" : "CASH REWARDS REDEMPTION", 
             "type" : "discretionary", 
             "category" : "Uncategorized", 
             "sub_category" : "Credit Card Redemption", 
             "dynamic_logic": None
            },
            {"recipient" : "CECILIA'S", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "CHEESE RANCH", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "CHIPOTLE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "CIRCLE K", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "CIRCLE K", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "CITIZEN RAIL", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Group", 
             "dynamic_logic": None
            },
            {"recipient" : "CITY LIQUORS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Alcohol", 
             "dynamic_logic": None
            },
            {"recipient" : "CITY-MARKET", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "CLARK'S MARKET", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "COFFEE DEPOT", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Coffee", 
             "dynamic_logic": None
            },
            {"recipient" : "COLORADO MOUNTAIN SCHO", 
             "type" : "discretionary", 
             "category" : "Experiences",
             "sub_category" : "Hobbies",
             "dynamic_logic" : None
            },
            {"recipient" : "COLORADO SAKE CO", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "CONOCO - JENNY'S MARKET", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "CONOCO - JENNY'S MARKET", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "CORNER STORE", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "CORNER STORE", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "CRIMSON ROOM", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "CROOKED STAVE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "CSM - FOOD COURT", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "CSM - STARBUCKS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Coffee", 
             "dynamic_logic": None
            },
            {"recipient" : "CSM - WOW", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "CubeSmart", 
             "type" : "fixed", 
             "category" : "Vanlife", 
             "sub_category" : "Storage", 
             "dynamic_logic": None
            },
            {"recipient" : "CVS/PHARMACY", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "DAGAR", 
             "type" : "discretionary", 
             "category" : "Experiences", 
             "sub_category" : "Group", 
             "dynamic_logic": None
            },
            {"recipient" : "DAILY HARVEST", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "DAZBOG COFFEE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Coffee", 
             "dynamic_logic": None
            },
            {"recipient" : "DEATH &amp; CO", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "DENVER TEDS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "DIA PARKING", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Parking", 
             "dynamic_logic": None
            },
            {"recipient" : "DICK'S CLOTHING", 
             "type" : "discretionary", 
             "category" : "Shopping", 
             "sub_category" : "Clothes", 
             "dynamic_logic": None
            },
            {"recipient" : "DISCOVER", 
             "type" : "variable", 
             "category" : "Bills and Utilities", 
             "sub_category" : "Credit Card Payment", 
             "dynamic_logic": None
            },
            {"recipient" : "CLUB TAVERN", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "DOORDASH", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "DOS GRINGOS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Group", 
             "dynamic_logic": None
            },
            {"recipient" : "DOWNING SUPERMARKET", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "DTCOM", 
             "type" : "discretionary", 
             "category" : "Experiences", 
             "sub_category" : "Flights", 
             "dynamic_logic": None
            },
            {"recipient" : "NFLSUNDAYTICKET", 
             "type" : "discretionary", 
             "category" : "Entertainment", 
             "sub_category" : "Sports", 
             "dynamic_logic": None
            },
            {"recipient" : "DUNKIN", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Coffee", 
             "dynamic_logic": None
            },
            {"recipient" : "E 470 EXPRESS TOLLS", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Tolls", 
             "dynamic_logic": None
            },
            {"recipient" : "ELEVATIONCYCLES", 
             "type" : "discretionary", 
             "category" : "Hobbies", 
             "sub_category" : "MTB", 
             "dynamic_logic": None
            },
            {"recipient" : "FED'S AUTOMOTIVE", 
             "type" : "intermittent", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Maintenance", 
             "dynamic_logic": None
            },
            {"recipient" : "FEDS AUTOMOTIVE", 
             "type" : "intermittent", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Maintenance", 
             "dynamic_logic": None
            },
            {"recipient" : "FID BKG SVC", 
             "type" : "variable", 
             "category" : "Investments", 
             "sub_category" : "Investment - Withdrawal", 
             "dynamic_logic": "{} < 0".format(txn_amount_col_name)
            },
            {"recipient" : "FID BKG SVC", 
             "type" : "variable", 
             "category" : "Investments", 
             "sub_category" : "Investment - Deposit", 
             "dynamic_logic": "{} > 0".format(txn_amount_col_name)
            },
            {"recipient" : "FINNS MANOR", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "FIRESTONE", 
             "type" : "intermittent", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Maintenance", 
             "dynamic_logic": None
            },
            {"recipient" : "FRONT RANGE FLEET", 
             "type" : "intermittent", 
             "category" : "Vanlife", 
             "sub_category" : "Maintenance", 
             "dynamic_logic": None
            },
            {"recipient" : "FUJI DENVER", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Group", 
             "dynamic_logic": None
            },
            {"recipient" : "GEORGETOWN LIQUORS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Alcohol", 
             "dynamic_logic": None
            },
            {"recipient" : "GOLDEN SINCLAIR", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "GOLDEN SINCLAIR", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            {"recipient" : "GONZO S COFFEE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Coffee", 
             "dynamic_logic": None
            },
            {"recipient" : "GOOD 2 GO", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "GOOGLE *", 
             "type" : "discretionary", 
             "category" : "Shopping", 
             "sub_category" : "Technology", 
             "dynamic_logic": None
            },
            {"recipient" : "GREASE MONKEY", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Maintenance", 
             "dynamic_logic": None
            },
            {"recipient" : "GRUBHUB", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "HAIRCUTS", 
             "type" : "variable", 
             "category" : "Personal Care", 
             "sub_category" : "Haircut", 
             "dynamic_logic": None
            },
            {"recipient" : "HELLOFRESH", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "HULU", 
             "type" : "discretionary", 
             "category" : "Entertainment", 
             "sub_category" : "TV", 
             "dynamic_logic": None
            },
            {"recipient" : "INTEREST CHARGE", 
             "type" : "intermittent", 
             "category" : "Fees and Charge", 
             "sub_category" : "Bank Service Fee", 
             "dynamic_logic": None
            },
            {"recipient" : "INTEREST PAID", 
             "type" : "intermittent", 
             "category" : "Fees and Charge", 
             "sub_category" : "Bank Service Fee", 
             "dynamic_logic": None
            },
            {"recipient" : "INTERNET PAYMENT", 
             "type" : "intermittent", 
             "category" : "Fees and Charge", 
             "sub_category" : "Bank Service Fee", 
             "dynamic_logic": None
            },
            {"recipient" : "INTUIT", 
             "type" : "intermittent", 
             "category" : "Taxes", 
             "sub_category" : "Taxes - Federal", 
             "dynamic_logic": None
            },
            {"recipient" : "JENNY'S MARKET", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "JENNY'S MARKET", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "LIQUOR STORE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Alcohol", 
             "dynamic_logic": None
            },
            {"recipient" : "KAGAN COUNSELING", 
             "type" : "variable", 
             "category" : "Health and Fitness", 
             "sub_category" : "Therapist", 
             "dynamic_logic": None
            },
            {"recipient" : "KINDNESS COLLECTIVE", 
             "type" : "variable", 
             "category" : "Health and Fitnees", 
             "sub_category" : "Yoga", 
             "dynamic_logic": None
            },
            {"recipient" : "KINDNESS YOGA", 
             "type" : "variable", 
             "category" : "Health and Fitnees", 
             "sub_category" : "Yoga", 
             "dynamic_logic": None
            },
            {"recipient" : "KING SOOP", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "KIWICO", 
             "type" : "intermittent", 
             "category" : "Gifts and Donations", 
             "sub_category" : "Holiday/Birthday", 
             "dynamic_logic": None
            },
            {"recipient" : "KUM &amp; GO", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "KUM &amp; GO", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name)
            },
            {"recipient" : "LATE FEES", 
             "type" : "intermittent", 
             "category" : "Fees and Charges", 
             "sub_category" : "Late Fees", 
             "dynamic_logic": None
            },
            {"recipient" : "LIQUORS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Alcohol", 
             "dynamic_logic": None
            },
            {"recipient" : "LEGACY GRILL", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Group", 
             "dynamic_logic": None
            },
            {"recipient" : "LEGACY RIDGE GOLF", 
             "type" : "discretionary", 
             "category" : "Experiences", 
             "sub_category" : "Hobbies", 
             "dynamic_logic": None
            },
            {"recipient" : "LIBRARY", 
             "type" : "intermittent", 
             "category" : "Fees and Charges", 
             "sub_category" : "Late Fees", 
             "dynamic_logic": None
            },
            {"recipient" : "LOAF N JUG", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Auto - Gas", 
             "dynamic_logic": "{} >= 10".format(txn_amount_col_name)
            },
            {"recipient" : "LOAF N JUG", 
             "type" : "variable", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": "{} < 10".format(txn_amount_col_name) 
            },
            {"recipient" : "LOS CHINGONES", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "LUSTRE PEARL", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "MARTA", 
             "type" : "intermittent", 
             "category" : "Auto and Transport", 
             "sub_category" : "Public Transport", 
             "dynamic_logic": None
            },
            {"recipient" : "MARYANNE PASTRY SHOPPE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Groceries", 
             "dynamic_logic": None
            },
            {"recipient" : "MATCHBOX", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "MB STADIUM", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "MCDONALD'S", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "MI CASA-BRECKENRIDGE", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Group", 
             "dynamic_logic": None
            },
            {"recipient" : "MINES PARKING", 
             "type" : "variable", 
             "category" : "Auto and Transport", 
             "sub_category" : "Parking", 
             "dynamic_logic": None
            },
            {"recipient" : "MISCONDUCT TAVERN", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Bars", 
             "dynamic_logic": None
            },
            {"recipient" : "MODMARKET", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Restaurant - Solo", 
             "dynamic_logic": None
            },
            {"recipient" : "MOLLYS SPIRITS", 
             "type" : "discretionary", 
             "category" : "Food and Dining", 
             "sub_category" : "Alcohol", 
             "dynamic_logic": None
            },
            {"recipient" : "MOOSEJAW", 
             "type" : "discretionary", 
             "category" : "Shopping", 
             "sub_category" : "Clothes", 
             "dynamic_logic": None
            },
            {"recipient" : "MOUNTAIN STRONG DENVER", 
             "type" : "fixed", 
             "category" : "Health and Fitness", 
             "sub_category" : "Gym", 
             "dynamic_logic": None
            },
            {"recipient" : "NATURAL GROCERS", 
             "type" : "variable", 
             "category" : "", 
             "sub_category" : "", 
             "dynamic_logic": None},
#            {"recipient" : "", "type" : "", "category" : "", "sub_category" : "", "dynamic_logic": None},
        ]
        
        txn_config_df = pd.DataFrame(txn_config)
        
        return txn_config_df
    
    @staticmethod
    def get_txn_categorization_config():
        
        return {
            "type" : ["discretionary", "fixed", "intermittent", "variable"],
            "mapping" : {
                "Auto and Transport" : [
                    "Ride Share", "Auto - Gas", "Auto - Insurance", "Auto - Loan Payment",
                    "Auto - Maintenance", "Auto - Miscellaneous", "Auto - Parking",  "Auto - Parts",
                    "Auto - Tolls", "Parking", "Public Transport", "Vanlife - Miscellaneous",
                    "Vanlife - Storage"], 
                "Bills and Utilities" : [
                    "Credit Card Payment", "Energy", "Phone", "Rent", "TV", "Utilities (Energy, TV, Wifi)", "Wifi"
                ],
                "Education" : [
                    "Education - Loan Payment",
                    "Education - Miscellaneous",
                    "Education - Tuition"
                ],
                "Experiences" : [
                    "Flight", "Group", "Hobbies", "Lodging"
                ],
                "Entertainment" : [
                    "Audible", "Amazon Prime", "Other", "Sports", "Spotify", "TV"
                ],
                "Fees and Charges" : [
                    "Bank Service Fee", "Late Fee"
                ],
                "Food and Dining" : [
                    "Alcohol", "Bars", "Coffee", "Groceries", "Restaurant - Solo", 
                    "Restaurant - Group"
                ],
                "Gifts and Donations" : [
                    "Charity", "Holiday/Birthday", "Political"
                ],
                "Health and Fitness" : [
                    "Gym", "Medical Care", "Therapist", "Yoga"
                ], 
                "Hobbies" : [
                    "MTB", "Other"
                    "Snowboarding - Gear", "Snowboarding - Miscellaneous", "Snowboarding - Pass"
                ],
                "Investments" : [
                    "Investment - Withdrawal", "Investment - Deposit"
                ],
                "Personal Care" : [
                    "Drycleaning", "Haircut", "Laundry"
                ],
                "Shopping" : [
                    "Clothes", "Technology"
                ], 
                "Taxes" : [
                    "Taxes - Federal", "Taxes - State", "Taxes - Local"
                ],
                "Uncategorized" : [
                    "ATM - Withdrawal", "Credit Card Redemption" , "Other"
                ],
                "Vanlife" : [
                    "Maintenance", "Storage"
                ]
            }
        }
    
    def categorize_txn(self):
        
        txn = self.txn
        txn_description = txn['txn_description']
        txn_amount = - 1 * txn['txn_amount']
        txn_mapping = self.txn_categorization_config['mapping']
        
        txn_config_subset = self.txn_config_df.copy()
        txn_config_subset["recipient_match"] = txn_config_subset.apply(
            lambda x: True if x["recipient"] in txn_description else False, axis = 1
        )
        txn_config_subset = txn_config_subset[txn_config_subset["recipient_match"] == True]
        
        if len(txn_config_subset) > 1:
            txn_config_subset["dynamic_logic_match"] = txn_config_subset.apply(
                lambda x: eval(x["dynamic_logic"]) if x["dynamic_logic"] is not None else True, axis = 1
            )
            txn_config_subset = txn_config_subset[txn_config_subset["dynamic_logic_match"] == True]
        
        assert len(txn_config_subset) <= 1, "multiple categorizations found in config for txn:\n{}\n{}".format(self.txn, txn_config_subset)
        
        if len(txn_config_subset) == 1:
            
            txn_category = txn_config_subset.iloc[0]['category']
            txn_sub_category = txn_config_subset.iloc[0]['sub_category']
            
        elif len(txn_config_subset) == 0:
            
            txn_category, txn_sub_category = self.get_categorization_prompt()
            
#             cat_prompt = "----------${} / {} / {}\nWhat category is this txn? Must be one of:\n{}".format(
#                 self.txn_amount,
#                 self.txn_date, 
#                 self.txn_description,
#                 [cat for cat in txn_mapping.keys()]
#             )

            
#             while True:
#                 txn_category = input(cat_prompt)
            
#                 if txn_category not in txn_mapping.keys():
#                     print("ERROR! Input not one of {}\n".format(txn_mapping.keys()))
#                     continue
#                 else:
#                     sub_cats = txn_mapping[txn_category]
#                     break
                    
#             while True:
            
#                 subcat_prompt = "What sub-category is the above txn? Must be one of:\n{}".format(
#                     sub_cats
#                 )
                
#                 txn_sub_category = input(subcat_prompt)
#                 if txn_sub_category not in sub_cats:
#                     print("ERROR! Input not one of {}\n".format(sub_cats))
#                     continue
#                 else:
#                     break
                
        return txn_category, txn_sub_category
    
    def get_categorization_prompt(self):
        
        txn_mapping = self.txn_categorization_config['mapping']
        
        cat_prompt = "----------${} / {} / {}\nWhat category is this txn? Must be one of:\n{}".format(
            self.txn_amount,
            self.txn_date, 
            self.txn_description,
            [cat for cat in txn_mapping.keys()]
        )
            
        while True:
            txn_category = input(cat_prompt)

            if txn_category not in txn_mapping.keys():
                print("ERROR! Input not one of {}\n".format(txn_mapping.keys()))
                continue
            else:
                sub_cats = txn_mapping[txn_category]
                break

        while True:

            sub_cats.append('Redo')
            subcat_prompt = "What sub-category is the above txn? Must be one of:\n{}".format(
                sub_cats
            )

            txn_sub_category = input(subcat_prompt)
            
            if txn_sub_category not in sub_cats:
                print("ERROR! Input not one of {}\n".format(sub_cats))
                continue
            else:
                if txn_sub_category == "Redo":
                    self.get_categorization_prompt()
                break
                
        return txn_category, txn_sub_category

In [41]:
categorized_txn_df = pd.DataFrame()
for idx, txn in full_txn_df[0:6].iterrows():
    indiv_txn = Txn(txn)
    indiv_txn_cat, indiv_txn_subcat = indiv_txn.categorize_txn()
    
    categorized_txn = {
        id_col_name : indiv_txn.id,
        txn_date_col_name : indiv_txn.txn_date,
        txn_amount_col_name : indiv_txn.txn_amount,
        txn_description_col_name : indiv_txn.txn_description,
        "category" : indiv_txn_cat,
        "sub_category" : indiv_txn_subcat
    }
    
    categorized_txn_df = categorized_txn_df.append(categorized_txn, ignore_index=True)
    

----------$-21.73 / 2019-12-17 / AMZN Mktp US*XA2802ID3   AMZN.COM BILLWA
What category is this txn? Must be one of:
['Auto and Transport', 'Bills and Utilities', 'Education', 'Experiences', 'Entertainment', 'Fees and Charges', 'Food and Dining', 'Gifts and Donations', 'Health and Fitness', 'Hobbies', 'Investments', 'Personal Care', 'Shopping', 'Taxes', 'Uncategorized']Education
What sub-category is the above txn? Must be one of:
['Education - Loan Payment', 'Education - Miscellaneous', 'Education - Tuition', 'Redo']Redo
----------$-21.73 / 2019-12-17 / AMZN Mktp US*XA2802ID3   AMZN.COM BILLWA
What category is this txn? Must be one of:
['Auto and Transport', 'Bills and Utilities', 'Education', 'Experiences', 'Entertainment', 'Fees and Charges', 'Food and Dining', 'Gifts and Donations', 'Health and Fitness', 'Hobbies', 'Investments', 'Personal Care', 'Shopping', 'Taxes', 'Uncategorized']Auto and Transport
What sub-category is the above txn? Must be one of:
['Ride Share', 'Auto - Gas', '

In [42]:
categorized_txn_df

Unnamed: 0,category,sub_category,txn_amount,txn_date,txn_description,unique_id
0,Education,Redo,-21.73,2019-12-17,AMZN Mktp US*XA2802ID3 AMZN.COM BILLWA,usaa_checking_20191217_21.73
1,Food and Dining,Redo,-8.0,2019-12-17,TST* STIR COOKING SCHOOLTDENVER CO,usaa_checking_20191217_8.0
2,Food and Dining,Coffee,-4.2,2019-12-17,SQ *ALLEGRO COFFEE DENVER CO,usaa_checking_20191217_4.2
3,Bills and Utilities,Credit Card Payment,-686.92,2019-12-16,USAA CREDIT CARD PAYMENT,usaa_checking_20191216_686.92
4,Auto and Transport,Auto - Parts,-86.67,2019-12-16,WAL-MART #0986840 SUMMIT FRISCO CO,usaa_checking_20191216_86.67
5,Food and Dining,Groceries,-81.33,2019-12-16,SAFEWAY #0322 GOLDEN CO,usaa_checking_20191216_81.33


In [27]:
txn_category = txn_config_subset.iloc[0]['category']
txn_sub_category = txn_config_subset.iloc[0]['sub_category']

'Auto - Gas'

In [38]:
a = [1, 2, 3]

a.append(4)

In [39]:
a

[1, 2, 3, 4]