## [Pandas Reference](https://pandas.pydata.org/pandas-docs/stable/index.html)
1. [Find the unique values in a column and then sort them](https://stackoverflow.com/questions/32072076/find-the-unique-values-in-a-column-and-then-sort-them)
2. [How to select rows from a DataFrame based on column values?](https://stackoverflow.com/questions/17071871/how-to-select-rows-from-a-dataframe-based-on-column-values)
3. [Selecting multiple columns in a pandas dataframe](https://stackoverflow.com/questions/11285613/selecting-multiple-columns-in-a-pandas-dataframe)

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
print('pandas version: {}'.format(pd.__version__))
print('numpy version: {}'.format(np.__version__))
print('matplotlib version: {}'.format(mpl.__version__))

In [None]:
def exportSheet(data, folder, name):
    if not os.path.exists(folder):
        os.makedirs(folder)
    data.to_csv('{}/{}.CSV'.format(folder, name), index = False)

def getAccounts(csv):
    expenses = pd.read_csv(csv)
    uniqueAccounts = expenses['Account'].unique()
    uniqueAccounts.sort()
    return uniqueAccounts

def getCategories(csv):
    expenses = pd.read_csv(csv)
    uniqueCategories = expenses['Category'].unique()
    uniqueCategories.sort()
    return uniqueCategories

def getParentCategories(csv):
    expenses = pd.read_csv(csv)
    uniqueParentCategories = expenses['Parent Category'].unique()
    uniqueParentCategories.sort()
    return uniqueParentCategories

def summarizeExpenses(accountexpenses, categorytranslation, outputdir):
    accountsummary = accountexpenses[['Category', 'Amount']].groupby('Category').agg({'Amount': 'sum'})
    exportSheet(accountsummary, outputdir, 'summary')
    summarytrans = pd.merge(accountsummary, categorytranslation, on = 'Category', how = 'outer')
    # summarytrans = summarytrans[summarytrans.Amount.notnull()]
    exportSheet(summarytrans[summarytrans.Amount.notnull()][['CategoryZh', 'Amount']], outputdir, 'summaryTrans')

def parseExpenses(csv, accounts, categorytranslation, folder, subfolder):
    expenses = pd.read_csv(csv)
    outputdir = '{}/{}'.format(folder, subfolder)
    accountexpenses = expenses[expenses['Account'].isin(accounts)]
    '''
    accountsummary = accountexpenses[['Category', 'Amount']].groupby('Category').agg({'Amount': 'sum'})
    # accountsummary['Category'] = accountsummary.index
    # exportSheet(accountsummary[['Category', 'Amount']], outputdir, 'summary')
    exportSheet(accountsummary, outputdir, 'summary')
    summarytrans = pd.merge(accountsummary, categorytranslation, on = 'Category', how = 'outer')
    summarytrans = summarytrans[summarytrans.Amount.notnull()]
    exportSheet(summarytrans[['CategoryZh', 'Amount']], outputdir, 'summaryTrans')
    '''
    summarizeExpenses(accountexpenses, categorytranslation, outputdir)
    
    for category in accountexpenses['Category'].unique():
        datasetraw = accountexpenses[accountexpenses['Category'] == category].sort_values('Date')
        dataset = datasetraw[['Date', 'Description', 'Original Description',
                              'Amount', 'Type', 'Account', 'Memo', 'Pending']]
        dataset.loc['Total'] = pd.Series(dataset['Amount'].sum(), index = ['Amount'])
        exportSheet(dataset, outputdir, category)

def pickExpensesByAccounts(csv, accounts, categorytranslation, folder, subfolder, accountCategory):
    expenses = pd.read_csv(csv)
    outputdir = '{}/{}'.format(folder, subfolder)
    accountexpenses = expenses[expenses['Account'].isin(accounts)]
    summarizeExpenses(accountexpenses, categorytranslation, outputdir)
    exportSheet(accountexpenses[['Date', 'Description', 'Original Description',
                                 'Amount', 'Type', 'Parent Category', 'Category',
                                 'Account', 'Memo', 'Pending']], outputdir, accountCategory)

In [None]:
getAccounts('Expenses.csv')
getAccounts('../work/transactions_20191122_past90days.csv')
parseExpenses('Expenses.csv', ['0289 * Business Member Share Savings',
                               '0388 * Business Basic Checking',
                               '5734 * Member Share Savings',
                               '7238 * ',
                               'Adv Plus Banking - 8129',
                               'Annie - 9823',
                               'Bryce - 9470',
                               'Nicole - 9471'], '20191114', 'bankaccounts')
parseExpenses('Expenses.csv', ['Hilton Honors Card',
                               'chunyenwang-8046'], '20191114', 'creditcards')

In [None]:
expenses = pd.read_csv('Expenses.csv')

cols = expenses.columns.tolist()
print(cols)
uniqueAccounts = expenses['Account'].unique()
uniqueAccounts.sort()
print(uniqueAccounts)
uniqueCategories = expenses['Category'].unique()
uniqueCategories.sort()
print(uniqueCategories)

creditcards = expenses[expenses['Account'].isin(['Hilton Honors Card', 'chunyenwang-8046'])]
creditcardsummary = creditcards.groupby('Category').agg({'Amount': 'sum'})
creditcardsummary['Category'] = summary.index
print(creditcardsummary)
exportSheet(creditcardsummary[['Category', 'Amount']], '20191114/creditcards', 'summary')

for category in creditcards['Category'].unique():
    datasetraw = creditcards[creditcards['Category'] == category]
    dataset = datasetraw[['Date', 'Description', 'Original Description', 'Amount', 'Type', 'Account', 'Memo', 'Pending']]
    dataset.loc['Total'] = pd.Series(dataset['Amount'].sum(), index = ['Amount'])
    exportSheet(dataset, '20191114/creditcards', category)


'''
Uncategorized = creditcards[creditcards['Category'] == 'Uncategorized']
Uncategorized.loc['Total'] = pd.Series([Uncategorized['Amount'].sum(), 'Uncategorized'],
                                       index = ['Amount', 'Category'])
exportSheet(Uncategorized, '20191114', 'Uncategorized')
'''

In [None]:
expenses[['Date', 'Description', 'Original Description',
          'Amount', 'Type', 'Parent Category', 'Category',
          'Account', 'Memo', 'Pending']].head()

In [None]:
print(getAccounts('../work/transactions_20191122_past90days.csv'))
categoryEN = getCategories('../work/transactions_20191122_past90days.csv')
categoryTrans = list(zip(categoryEN.tolist(), ['ATM費用', '酒類和酒吧', '汽車與運輸', '汽車保險', '書籍和用品', '商業服務',
                                 '慈善', '支票', '咖啡店', '信用卡付款', '分紅和上限', '醫生', '教育',
                                 '電子和軟件', '娛樂', '快餐', '費用', '財務費用', '財務顧問', '餐飲',
                                 '汽油', '雜貨店', '健康與健身', '房屋', '房屋裝修', '收入', '利息收入',
                                 '投資', '兒童活動', '手機', '電影和DVD', '音樂', '音樂課', '辦公用品',
                                 '停車', '個人護理', '出租車', '服務和零件', '運輸', '購物', '稅收', '轉帳', '水電費']))
catDf = pd.DataFrame(categoryTrans, columns = ['Category', 'CategoryZh'])
catDf.to_csv('../work/categories_chinese_translation.csv', index = False)

In [None]:
parseExpenses('../work/transactions_20191122_past90days.csv', ['0289 * Business Member Share Savings',
                               '0388 * Business Basic Checking',
                               '5734 * Member Share Savings',
                               '7238 * ',
                               'Adv Plus Banking - 8129',
                               'Annie - 9823',
                               'Bryce - 9470',
                               'Nicole - 9471'], catDf, '../work/transactions_20191122_past90days', 'bankaccounts')
parseExpenses('../work/transactions_20191122_past90days.csv', ['Hilton Honors Card',
                               'chunyenwang-8046'], catDf, '../work/transactions_20191122_past90days', 'creditcards')

In [None]:
catDf = pd.read_csv('../work/categories_chinese_translation.csv')
pickExpensesByAccounts('../work/transactions_20191122_past90days.csv',
                       ['0289 * Business Member Share Savings',
                        '0388 * Business Basic Checking',
                        '5734 * Member Share Savings',
                        '7238 * ',
                        'Adv Plus Banking - 8129',
                        'Annie - 9823',
                        'Bryce - 9470',
                        'Nicole - 9471'],
                       catDf, '../work/transactions_20191122_past90days',
                       'bankaccounts', 'bankaccounts')
pickExpensesByAccounts('../work/transactions_20191122_past90days.csv',
                       ['Hilton Honors Card',
                        'chunyenwang-8046'],
                       catDf, '../work/transactions_20191122_past90days',
                       'creditcards', 'creditcards')