In [1]:
import git
import os
import pandas as pd

### Settings

In [2]:
def get_git_root(path):
    git_repo = git.Repo(path, search_parent_directories=True)
    git_root = git_repo.git.rev_parse("--show-toplevel")
    return git_root

data_root_dir_append = "data"
data_raw_dir_append = "data/raw"
data_interim_dir_append = "data/interim"
data_processed_dir_append = "data/processed"

In [3]:
git_root_path = get_git_root(os.getcwd())

raw_data_path = os.path.join(git_root_path, data_raw_dir_append)

txn_files = [f for f in os.listdir(raw_data_path) if os.path.isfile(os.path.join(raw_data_path, f))]

### Drop into config file

In [7]:
txn_date_col_name = "txn_date"
txn_description_col_name = "txn_description"
txn_amount_col_name = "txn_amount"

usaa_config = {
    "columns" : [
        "status", 
        None,
        txn_date_col_name,
        None,
        txn_description_col_name,
        "categorization",
        txn_amount_col_name
    ],
    "txn_amount_positive_bool" : False
}

citizens_config = {
    "columns" : [
        "Transaction Type",
        txn_date_col_name,
        "Account Type",
        txn_description_col_name,
        txn_amount_col_name,
        "Reference No.",
        "Credits",
        "Debits"
    ],
    "txn_amount_positive_bool" : False
}

In [10]:
class TxnFile():
    
    def __init__(self, path, file):
        
        self.root_dir = path
        self.file = file
        self.full_path = os.path.join(path, file)
        self.citizens_bank_key = 'citizens'
        self.usaa_bank_key = 'usaa'
        self.fidelity_bank_key = 'fidelity'
        self.discover_key = 'discover'
        
        self.bank = self.get_bank()
        
        if self.bank == self.citizens_bank_key:
            self.config = citizens_config
        elif self.bank == self.usaa_bank_key:
            self.config = usaa_config
            
        self.account_type = self.get_account_type()
        self.txn_df = self.get_txn_df()
        
    def get_bank(self):
        
        if 'bk_download' in self.file:
            bank = self.usaa_bank_key
        elif 'EXPORT' in self.file:
            bank = self.citizens_bank_key
            
        return bank
                 
    def get_account_type(self):

        acceptable_inputs = ['credit', 'checking', 'investment']
        prompt = "What kind of account is this file? {}\nMust be one of {}".format(self.file, acceptable_inputs)
        
        while True:
            acct_type = input(prompt)
            
            if acct_type not in acceptable_inputs:
                print("ERROR! Input not one of {}\n".format(acceptable_inputs))
                continue
            else:
                break
                
        return acct_type
            
    def get_txn_df(self):

        header = 0 if self.bank in [self.citizens_bank_key] else None
        raw_txn_df = pd.read_csv(self.full_path, header=header)
        
        columns = self.config['columns']
        raw_txn_df.columns = columns

        txn_df = raw_txn_df[[txn_date_col_name, txn_amount_col_name, txn_description_col_name]]
        return txn_df
    
        

In [13]:
#txn_files
txn_file = TxnFile(raw_data_path, txn_files[0])

What kind of account is this file? bk_download.csv
Must be one of ['credit', 'checking', 'investment']checking


In [14]:
txn_file.txn_df

Unnamed: 0,txn_date,txn_amount,txn_description
0,12/17/2019,-21.73,AMZN Mktp US*XA2802ID3 AMZN.COM BILLWA
1,12/17/2019,-8,TST* STIR COOKING SCHOOLTDENVER CO
2,12/17/2019,-4.2,SQ *ALLEGRO COFFEE DENVER CO
3,12/16/2019,-686.92,USAA CREDIT CARD PAYMENT
4,12/16/2019,-86.67,WAL-MART #0986840 SUMMIT FRISCO CO
5,12/16/2019,-81.33,SAFEWAY #0322 GOLDEN CO
6,12/16/2019,-56.7,EXXONMOBIL 48225817 FRISCO CO
7,12/16/2019,-53.2,KiwiCo Inc. 800-7144828 CA
8,12/16/2019,-23.65,WILDWOOD VAIL CO
9,12/16/2019,-14.2,BRECKENRIDGE PASSPORT 414-4316555 CO
