In [1]:
import pandas as pd

BASE = '/Users/dennis/Documents/Google Drive/Tax/2019'

## ADP 2019 Pay statement from web

In [2]:
pay_raw = pd.read_csv(
    f'{BASE}/pay-stmts.tsv',
    sep='\t',
    usecols=['PAY DATE', 'CHECK NUMBER', 'GROSS PAY', 'NET PAY 1'],
    parse_dates=['PAY DATE']
)
pay_stmts = pay_raw.copy()
pay_stmts['Gross_Pay'] = pd.to_numeric(pay_raw['GROSS PAY'].str.replace(',', ''))
pay_stmts['Net_Pay'] = pd.to_numeric(pay_raw['NET PAY 1'].str.replace(',', ''))
pay_stmts = pay_stmts[['PAY DATE', 'CHECK NUMBER', 'Gross_Pay', 'Net_Pay']]
pay_stmts.columns = ['Date', 'Check Number', 'Gross', 'Net']
pay_stmts = pay_stmts[pay_stmts['Date'] < '2020-01-01']
pay_stmts.to_csv(f'pay_statements_transformed.csv', index=False)
pay_stmts

Unnamed: 0,Date,Check Number,Gross,Net
7,2019-12-20,510050,6080.0,4434.64
8,2019-10-11,410052,2016.0,1474.82
9,2019-09-27,390053,5760.0,3773.53
10,2019-09-13,370047,5184.0,3425.5
11,2019-08-30,350050,5760.0,3778.08
12,2019-08-16,330052,5760.0,3773.54
13,2019-08-02,310053,5760.0,3773.53
14,2019-07-19,290053,5040.0,3338.49
15,2019-07-05,270052,4050.0,2740.36
16,2019-06-21,250053,2268.0,1815.02


## Bank/Credit card transactions

### AMEX 2019 transactions

In [49]:
amex2019 = pd.read_csv(
    f'{BASE}/Amex-Skymiles/activity.csv',
    parse_dates=['Date']
)
amex2019.info()
print(amex2019['Date'].min(), amex2019['Date'].max()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 8 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   Date                          392 non-null    datetime64[ns]
 1   Description                   392 non-null    object        
 2   Amount                        392 non-null    float64       
 3   Extended Details              379 non-null    object        
 4   Appears On Your Statement As  392 non-null    object        
 5   Address                       379 non-null    object        
 6   Reference                     392 non-null    object        
 7   Category                      380 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(6)
memory usage: 24.6+ KB
2019-01-01 00:00:00 2019-12-23 00:00:00


In [50]:
amex2019['Source'] = 'AMEX DELTA'
amex2019['Amount'] = amex2019['Amount'] * -1  # DUH
amex2019['Category'].fillna(value='', inplace=True)
amex2019.sort_values(by='Date', inplace=True, ascending=True)
amex_cleaned = amex2019[['Date', 'Amount', 'Description', 'Category', 'Source']]
amex_cleaned.to_csv('amex_transformed.csv', index=False)
amex_cleaned.to_csv(f'{BASE}/amex_transformed.csv', index=False)
print(amex_cleaned.info())
amex_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 392 entries, 391 to 0
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         392 non-null    datetime64[ns]
 1   Amount       392 non-null    float64       
 2   Description  392 non-null    object        
 3   Category     392 non-null    object        
 4   Source       392 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 18.4+ KB
None


Unnamed: 0,Date,Amount,Description,Category,Source
391,2019-01-01,1866.03,AUTOPAY PAYMENT - THANK YOU,,AMEX DELTA
390,2019-01-02,-94.48,Amazon Web Services AWS.Amazon.com WA,Merchandise & Supplies-Internet Purchase,AMEX DELTA
389,2019-01-05,-16.50,GOOGLE *HBO DIG SV I855-836-3987 CA,Business Services-Internet Services,AMEX DELTA
388,2019-01-06,-49.02,JACK'S BBQ,Restaurant-Restaurant,AMEX DELTA
387,2019-01-06,-1.99,GOOGLE *GOOGLE STORA855-836-3987 CA,Business Services-Internet Services,AMEX DELTA
...,...,...,...,...,...
4,2019-12-20,-47.48,LYFT,Transportation-Taxis & Coach,AMEX DELTA
3,2019-12-20,-61.31,LYFT,Transportation-Taxis & Coach,AMEX DELTA
2,2019-12-21,-988.90,GOOGLE *GOOGLE FI G.CO/HELPPAY# CA,Other-Miscellaneous,AMEX DELTA
1,2019-12-22,-114.95,U-HAUL MOVING & STORAGE OF BURIEN,Other-Miscellaneous,AMEX DELTA


### Bank of America - Alaska Air Visa - 2019 Transactions

In [51]:
for i, csv in enumerate([
        'BofA_10_2019.csv',
        'BofA_11_2019.csv',
        'BofA_12_2019.csv']):
    alaska_raw = pd.read_csv(
        csv,
        parse_dates=['Posted Date']
    )
    alaska_raw['Source'] = 'ALASKA VISA'

    if i > 0:
        alaska2019 = pd.concat([alaska_raw, alaska2019])
    else:
        alaska2019 = alaska_raw.copy()
    print(alaska2019['Posted Date'].min(), alaska2019['Posted Date'].max())
alaska2019.info()

2019-10-24 00:00:00 2019-11-19 00:00:00
2019-10-24 00:00:00 2019-12-20 00:00:00
2019-10-24 00:00:00 2020-01-20 00:00:00
<class 'pandas.core.frame.DataFrame'>
Int64Index: 155 entries, 0 to 37
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Posted Date       155 non-null    datetime64[ns]
 1   Reference Number  155 non-null    object        
 2   Payee             155 non-null    object        
 3   Address           148 non-null    object        
 4   Amount            155 non-null    float64       
 5   Source            155 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(4)
memory usage: 8.5+ KB


In [52]:
alaska2019['Address'].fillna(value='', inplace=True)
alaska2019['Category'] = ''
alaska2019.columns = [
    'Date', 'Reference', 'Description', 'Address', 'Amount', 'Source', 'Category']
alaska2019 = alaska2019[alaska2019['Date'].dt.year == 2019]
alaska2019 = alaska2019.sort_values(by='Date', ascending=True)

alaska_cleaned = alaska2019[['Date', 'Amount', 'Description', 'Category', 'Source']]
alaska_cleaned.info()
alaska_cleaned.to_csv('alaska_transformed.csv', index=False)
alaska_cleaned.to_csv(f'{BASE}/alaska_transformed.csv', index=False)
alaska_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 120 entries, 37 to 35
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         120 non-null    datetime64[ns]
 1   Amount       120 non-null    float64       
 2   Description  120 non-null    object        
 3   Category     120 non-null    object        
 4   Source       120 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 5.6+ KB


Unnamed: 0,Date,Amount,Description,Category,Source
37,2019-10-24,-25.75,LATONA PUB SEATTLE WA,,ALASKA VISA
36,2019-10-24,-252.00,MADRONA FAMILY DENTAL SEATTLE WA,,ALASKA VISA
35,2019-10-24,-11.12,CUPCAKE ROYALE - MADRONA SEATTLE WA,,ALASKA VISA
32,2019-10-25,-44.40,TST* BENNETT S PURE FOOD MERCER ISLANDWA,,ALASKA VISA
34,2019-10-25,-19.38,76 - UNITED PACIFIC 5483 FEDERAL WAY WA,,ALASKA VISA
...,...,...,...,...,...
39,2019-12-30,-47.92,TST* TAVOLATA SEATTLE WA,,ALASKA VISA
38,2019-12-30,-46.64,TST* ELYSIAN BREWING - FISEATTLE WA,,ALASKA VISA
37,2019-12-30,-7.16,FLATSTICK PUB LLC SEATTLE WA,,ALASKA VISA
36,2019-12-30,-31.86,QFC #5839 MERCER ISLANDWA,,ALASKA VISA


### Capital One Visa 2019 transactions

In [67]:
capone_raw = pd.read_csv(
    'CapOne2019.csv',
    parse_dates=['Transaction Date', 'Posted Date']
)
capone_raw.info()
print(capone_raw['Transaction Date'].min(), capone_raw['Transaction Date'].max()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Transaction Date  149 non-null    datetime64[ns]
 1   Posted Date       149 non-null    datetime64[ns]
 2   Card No.          149 non-null    int64         
 3   Description       149 non-null    object        
 4   Category          149 non-null    object        
 5   Debit             137 non-null    float64       
 6   Credit            12 non-null     float64       
dtypes: datetime64[ns](2), float64(2), int64(1), object(2)
memory usage: 8.3+ KB
2019-01-08 00:00:00 2019-12-28 00:00:00


In [68]:
capone_raw['Source'] = 'CAPITAL ONE VISA'
capone_raw['Debit'].fillna(value=0.0, inplace=True)
capone_raw['Credit'].fillna(value=0.0, inplace=True)
capone_raw['Debit'] = capone_raw['Debit'] * -1
capone_raw['Amount'] = capone_raw['Debit'] + capone_raw['Credit']

capone_raw.columns = [
    'Date', 'Posted Date', 'Card No.', 'Description', 'Category', 'Debit', 
    'Credit', 'Source', 'Amount'
]
capone_raw.sort_values(by='Date', inplace=True, ascending=True)
capone_cleaned = capone_raw[['Date', 'Amount', 'Description', 'Category', 'Source']]
capone_cleaned.to_csv('cap_one_transformed.csv', index=False)
capone_cleaned.to_csv(f'{BASE}/cap_one_transformed.csv', index=False)
print(capone_cleaned.info())
capone_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 149 entries, 148 to 0
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         149 non-null    datetime64[ns]
 1   Amount       149 non-null    float64       
 2   Description  149 non-null    object        
 3   Category     149 non-null    object        
 4   Source       149 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 7.0+ KB
None


Unnamed: 0,Date,Amount,Description,Category,Source
148,2019-01-08,-0.25,COLLEGE TRANSCRIPT,Other Services,CAPITAL ONE VISA
147,2019-01-09,-27.50,FEDERAL WAY INDOOR RANGE,Merchandise,CAPITAL ONE VISA
146,2019-01-09,-8.26,DICK'S DRIVE-IN WALLINGFO,Dining,CAPITAL ONE VISA
145,2019-01-15,-15.37,TST* SALUMI ARTISAN CURED,Dining,CAPITAL ONE VISA
144,2019-01-15,-16.01,TST* SALUMI ARTISAN CURED,Dining,CAPITAL ONE VISA
...,...,...,...,...,...
4,2019-11-28,1756.65,CAPITAL ONE AUTOPAY PYMT,Payment/Credit,CAPITAL ONE VISA
3,2019-12-07,-0.20,SDOT PAYBYPHONE PARKING,Gas/Automotive,CAPITAL ONE VISA
2,2019-12-28,9.33,CAPITAL ONE AUTOPAY PYMT,Payment/Credit,CAPITAL ONE VISA
1,2019-12-28,-2.83,SDOT PAYBYPHONE PARKING,Gas/Automotive,CAPITAL ONE VISA


### Morgan Stanley Checking 2019 transactions

In [79]:
ms_raw = pd.read_csv(
    'MS_Checking_2019.csv',
    skiprows=4,
    parse_dates=['Activity Date', 'Transaction Date']
)
ms_raw.info()
print(ms_raw['Transaction Date'].min(), ms_raw['Transaction Date'].max()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Activity Date     150 non-null    datetime64[ns]
 1   Transaction Date  150 non-null    datetime64[ns]
 2   Activity          150 non-null    object        
 3   Description       150 non-null    object        
 4   Amount($)         150 non-null    float64       
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 6.0+ KB
2019-01-02 00:00:00 2019-12-31 00:00:00


In [72]:
capone_cleaned.columns

Index(['Date', 'Amount', 'Description', 'Category', 'Source'], dtype='object')

In [80]:
ms_raw['Category'] = ''
ms_raw['Source'] = 'MS CHECKING'
ms_raw['combined_desc'] = ms_raw['Activity'] + ': ' + ms_raw['Description']
ms_raw.columns = [
    'Activity Date', 'Date', 'Activity', 'orig_Description', 
    'Amount', 'Category', 'Source', 'Description'
]
ms_raw.sort_values(by='Date', inplace=True, ascending=True)
ms_cleaned = ms_raw[['Date', 'Amount', 'Description', 'Category', 'Source']]
ms_cleaned.to_csv('ms_checking_transformed.csv', index=False)
ms_cleaned.to_csv(f'{BASE}/ms_checking_transformed.csv', index=False)
print(ms_cleaned.info())
ms_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 150 entries, 149 to 0
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         150 non-null    datetime64[ns]
 1   Amount       150 non-null    float64       
 2   Description  150 non-null    object        
 3   Category     150 non-null    object        
 4   Source       150 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 7.0+ KB
None


Unnamed: 0,Date,Amount,Description,Category,Source
149,2019-01-02,-1866.03,Automated Payment: AMEX EPAYMENT ACH PMT CHK A...,,MS CHECKING
148,2019-01-09,15000.00,CASH TRANSFER: FUNDS TRANSFERRED CONFIRMATION ...,,MS CHECKING
147,2019-01-14,-3000.00,Check: BARTON SKUBI Check # 1148,,MS CHECKING
146,2019-01-16,-2669.20,Automated Payment: CHASE CREDIT CRD AUTOPAY CH...,,MS CHECKING
144,2019-01-22,-904.17,Automated Payment: CITI AUTOPAY PAYMENT CHK AC...,,MS CHECKING
...,...,...,...,...,...
4,2019-12-20,4434.64,Direct Deposit: DIRECT DEP FUNDS RECVD NEAL AN...,,MS CHECKING
3,2019-12-23,-2534.87,Automated Payment: BK OF AMER VISA ONLINE PMT ...,,MS CHECKING
2,2019-12-27,-127.82,Automated Payment: PUGET SOUND ENER ONLINE PMT...,,MS CHECKING
1,2019-12-30,-9.33,Automated Payment: CAPITAL ONE CRCARDPMT CHK A...,,MS CHECKING


## Chase Visa

In [142]:
chase_raw = pd.read_csv(
    'ChaseUnitedVisa2019.csv',
    parse_dates=['Transaction Date', 'Post Date']
)
chase_raw.sort_values(by='Transaction Date', inplace=True, ascending=True)
print(chase_raw['Transaction Date'].min(), chase_raw['Transaction Date'].max())
chase_raw.info()
chase_raw[chase_raw['Description'].str.contains('AUDIT')]

2018-12-31 00:00:00 2019-10-24 00:00:00
<class 'pandas.core.frame.DataFrame'>
Int64Index: 194 entries, 193 to 0
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Transaction Date  194 non-null    datetime64[ns]
 1   Post Date         194 non-null    datetime64[ns]
 2   Description       194 non-null    object        
 3   Category          183 non-null    object        
 4   Type              194 non-null    object        
 5   Amount            194 non-null    float64       
dtypes: datetime64[ns](2), float64(1), object(3)
memory usage: 10.6+ KB


Unnamed: 0,Transaction Date,Post Date,Description,Category,Type,Amount
175,2019-03-01,2019-03-04,AUDIT &amp; ADJUSTMENT COMPAN,Health & Wellness,Sale,-62.71


In [124]:
chase_raw['Source'] = 'CHASE UNITED VISA'
chase_raw['Category'].fillna(value='', inplace=True)
chase_raw['combined_desc'] = chase_raw['Description'] + ': ' + chase_raw['Type']
chase_raw.columns = [
    'Date', 'Post Date', 'orig_desc', 'Category', 'Type', 'Amount', 'Source', 'Description'
]

In [125]:
chase_raw.sort_values(by='Date', inplace=True, ascending=True)
chase_cleaned = chase_raw[['Date', 'Amount', 'Description', 'Category', 'Source']]
chase_cleaned.to_csv('chase_visa_transformed.csv', index=False)
chase_cleaned.to_csv(f'{BASE}/chase_visa_transformed.csv', index=False)
print(chase_cleaned.info())
chase_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 194 entries, 193 to 0
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         194 non-null    datetime64[ns]
 1   Amount       194 non-null    float64       
 2   Description  194 non-null    object        
 3   Category     194 non-null    object        
 4   Source       194 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 9.1+ KB
None


Unnamed: 0,Date,Amount,Description,Category,Source
193,2018-12-31,-63.42,QFC 5849: Sale,Groceries,CHASE UNITED VISA
192,2018-12-31,-42.23,NORMS EATERY AND ALEHOUSE: Sale,Food & Drink,CHASE UNITED VISA
191,2019-01-04,-8.80,Amazon Music*M238Y32J2: Sale,Shopping,CHASE UNITED VISA
190,2019-01-05,-131.02,Amazon Prime: Sale,Bills & Utilities,CHASE UNITED VISA
189,2019-01-07,-199.00,WWW.GALVANIZE.COM: Sale,Professional Services,CHASE UNITED VISA
...,...,...,...,...,...
4,2019-10-04,-65.00,CENTURYLINK SIMPLE: Sale,Bills & Utilities,CHASE UNITED VISA
3,2019-10-15,245.31,AUTOMATIC PAYMENT - THANK: Payment,,CHASE UNITED VISA
2,2019-10-17,-22.75,TUSCAN STONE PIZZA (ME: Sale,Food & Drink,CHASE UNITED VISA
1,2019-10-21,-22.75,TUSCAN STONE PIZZA (ME: Sale,Food & Drink,CHASE UNITED VISA


### Citi CostCo Visa

In [134]:
citi_raw = pd.read_csv(
    'citi_costco_visa_2019.csv',
    parse_dates=['Date']
)
citi_raw.sort_values(by='Date', inplace=True, ascending=True)
print(citi_raw['Date'].min(), citi_raw['Date'].max())
citi_raw.info()

2019-01-02 00:00:00 2019-12-23 00:00:00
<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 99 to 0
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Status       100 non-null    object        
 1   Date         100 non-null    datetime64[ns]
 2   Description  100 non-null    object        
 3   Debit        88 non-null     float64       
 4   Credit       12 non-null     float64       
 5   Member Name  100 non-null    object        
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 5.5+ KB


In [135]:
citi_raw['Source'] = 'CITI COSTCO VISA'
citi_raw['Category'] = ''
citi_raw['Debit'].fillna(value=0.0, inplace=True)
citi_raw['Debit'] = citi_raw['Debit'] * -1
citi_raw['Credit'].fillna(value=0.0, inplace=True)
citi_raw['Credit'] = citi_raw['Credit'] * -1
citi_raw['Amount'] = citi_raw['Debit'] + citi_raw['Credit']

In [136]:
citi_raw.sort_values(by='Date', inplace=True, ascending=True)
citi_raw = citi_raw[citi_raw['Date'].dt.year == 2019]
citi_cleaned = citi_raw[['Date', 'Amount', 'Description', 'Category', 'Source']]
citi_cleaned.to_csv('citi_visa_transformed.csv', index=False)
citi_cleaned.to_csv(f'{BASE}/citi_visa_transformed.csv', index=False)
print(citi_cleaned.info())
citi_cleaned

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 99 to 0
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Date         100 non-null    datetime64[ns]
 1   Amount       100 non-null    float64       
 2   Description  100 non-null    object        
 3   Category     100 non-null    object        
 4   Source       100 non-null    object        
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 4.7+ KB
None


Unnamed: 0,Date,Amount,Description,Category,Source
99,2019-01-02,-43.96,COSTCO GAS #0001 SEATTLE WA,,CITI COSTCO VISA
98,2019-01-02,-8.80,COSTCO GAS #0001 SEATTLE WA,,CITI COSTCO VISA
97,2019-01-06,-9.90,COMPLETE ID 855-591-0202 TX,,CITI COSTCO VISA
96,2019-01-18,904.17,AUTOPAY 999990000082565RAUTOPAY AUTO-PMT,,CITI COSTCO VISA
94,2019-01-19,-43.14,COSTCO GAS #0001 SEATTLE WA,,CITI COSTCO VISA
...,...,...,...,...,...
4,2019-12-02,-43.91,COSTCO GAS #0008 KIRKLAND WA,,CITI COSTCO VISA
3,2019-12-06,-9.89,COMPLETE ID 855-591-0202 TX,,CITI COSTCO VISA
2,2019-12-07,-391.24,COSTCO WHSE #0001 SEATTLE WA,,CITI COSTCO VISA
1,2019-12-18,1118.39,AUTOPAY 999990000082565RAUTOPAY AUTO-PMT,,CITI COSTCO VISA


### Combine into one df

In [122]:
pieces = [
    ms_cleaned, alaska_cleaned, capone_cleaned, amex_cleaned, chase_cleaned, citi_cleaned
]

In [137]:
combined = pd.concat(pieces).sort_values(by='Date', ascending=True)

In [140]:
combined.to_csv('combined_transactions.csv', index=False)
combined.to_csv(f'{BASE}/combined_transactions.csv', index=False)

In [139]:
# of transactions by data source
combined.groupby('Source').count()

Unnamed: 0_level_0,Date,Amount,Description,Category
Source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ALASKA VISA,120,120,120,120
AMEX DELTA,392,392,392,392
CAPITAL ONE VISA,149,149,149,149
CHASE UNITED VISA,194,194,194,194
CITI COSTCO VISA,100,100,100,100
MS CHECKING,150,150,150,150


# DONE.

In [87]:
pd.concat([df_, df2]).reset_index()[['A', 'B', 'C']]

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9
3,10,40,70
4,20,50,80
5,30,60,90


### looking for `galvanize` transactions
Yes to tuition. "Premium Prep" payments below on United `Chase` visa

In [45]:
amex[amex['Description'].str.contains('galv', case=False, regex=False)]

Unnamed: 0,datetime,Description,Amount
311,2019-03-07,GALVANIZE,8000.0
376,2019-01-16,GALVANIZE,6000.0
384,2019-01-09,GALVANIZE,2000.0


In [47]:
# looking for other galvanize transactions (feb)
amex[(amex['datetime'] >= '02-01-2019') & (amex['datetime'] < '03-01-2019')]

Unnamed: 0,datetime,Description,Amount
321,2019-02-28,DIGITALOCEAN.COM,5.0
322,2019-02-28,VINCE'S ITALIAN RESTAURANT & PIZZERIA,23.65
323,2019-02-27,MIDWAY USA,363.0
324,2019-02-26,BATON VAPOR,68.74
325,2019-02-26,LA PALMERA,31.41
326,2019-02-23,RHEIN HAUS,196.05
327,2019-02-23,VZW APOCC 800-922-0204 FL,153.64
328,2019-02-22,U-HAUL MOVING & STORAGE OF BURIEN,114.95
329,2019-02-21,MANU'S TACOS FLATSTICK,11.4
330,2019-02-19,CENTRAL PIZZA,34.18


### Galvanize Premium Prep payments

In [55]:
chase[chase['Description'].str.contains('galvanize', case=False, regex=False)]

Unnamed: 0,datetime,Description,Amount
151,2019-04-07,WWW.GALVANIZE.COM,-199.0
172,2019-03-07,WWW.GALVANIZE.COM,-199.0
181,2019-02-07,WWW.GALVANIZE.COM,-199.0
189,2019-01-07,WWW.GALVANIZE.COM,-199.0


### Diversion

In [137]:
from holidays import US
from tabulate import tabulate

hols = US(years=2020)

hdate = []
hname = []
hdow = []
for k, v in hols.items():
    hd = pd.to_datetime(k)
    hdate.append(hd.isoformat()[:10])
    hname.append(v)
    hdow.append(hd.day_name())

holdf = pd.DataFrame({'Date': hdate, 'Day of Week': hdow, 'Holiday': hname})

print(tabulate(holdf, headers='keys', showindex=False))

Date        Day of Week    Holiday
----------  -------------  ---------------------------
2020-01-01  Wednesday      New Year's Day
2020-01-20  Monday         Martin Luther King Jr. Day
2020-02-17  Monday         Washington's Birthday
2020-05-25  Monday         Memorial Day
2020-07-04  Saturday       Independence Day
2020-07-03  Friday         Independence Day (Observed)
2020-09-07  Monday         Labor Day
2020-10-12  Monday         Columbus Day
2020-11-11  Wednesday      Veterans Day
2020-11-26  Thursday       Thanksgiving
2020-12-25  Friday         Christmas Day


## Checking – Morgan Stanley

In [25]:
folder = '/Users/dennis/Documents/Google Drive/Tax/2019/MorganStanley/'
checking = pd.read_csv(
    f'{folder}/MS Checking 2019 - CashActivity.csv',
    skiprows=4,
    infer_datetime_format=True,
    parse_dates=['Activity Date', 'Transaction Date']
)

In [26]:
checking.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   Activity Date     150 non-null    datetime64[ns]
 1   Transaction Date  150 non-null    datetime64[ns]
 2   Activity          150 non-null    object        
 3   Description       150 non-null    object        
 4   Amount($)         150 non-null    float64       
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 6.0+ KB


In [27]:
checking

Unnamed: 0,Activity Date,Transaction Date,Activity,Description,Amount($)
0,2019-12-31,2019-12-31,Interest Income,MORGAN STANLEY BANK N.A. (Period 12/01-12/31),0.82
1,2019-12-30,2019-12-30,Automated Payment,CAPITAL ONE CRCARDPMT CHK ACCT ENDING IN 2622,-9.33
2,2019-12-27,2019-12-27,Automated Payment,PUGET SOUND ENER ONLINE PMT MORGAN STANLEY PAY...,-127.82
3,2019-12-23,2019-12-23,Automated Payment,BK OF AMER VISA ONLINE PMT MORGAN STANLEY PAYM...,-2534.87
4,2019-12-20,2019-12-20,Direct Deposit,DIRECT DEP FUNDS RECVD NEAL ANALYTICS L DIRECT...,4434.64
...,...,...,...,...,...
145,2019-01-22,2019-01-22,Automated Payment,CAPITAL ONE CRCARDPMT CHK ACCT ENDING IN 2622,-81.75
146,2019-01-16,2019-01-16,Automated Payment,CHASE CREDIT CRD AUTOPAY CHK ACCT ENDING IN 2622,-2669.20
147,2019-01-14,2019-01-14,Check,BARTON SKUBI Check # 1148,-3000.00
148,2019-01-09,2019-01-09,CASH TRANSFER,FUNDS TRANSFERRED CONFIRMATION # 101874046 FRO...,15000.00
