# Import Required Libraries
Import the necessary libraries, including pathlib, datetime, and the custom modules.

In [13]:
# Import the necessary libraries
import pandas as pd
from pathlib import Path
import re
import sys
sys.path.append('src')

from datetime import datetime, timedelta

# Import custom modules
from data.loader import BankDataLoader
import config
from analysis.reports import rolling_12_month_average, monthly_totals, list_of_categories

# Load and Filter Data
Load the bank statements data using BankDataLoader and filter out transactions before a specified date.

In [14]:
# Load the bank statements data using BankDataLoader
loader = BankDataLoader(config.DATA_DIRS)
data = loader.load_statements()

# Filter out transactions before a specified date
filter_date = '2023-02-01'
data = data[data['Date'] >= filter_date]

# Dump the intermediate data to a CSV file
loader.dump_intermediate(data, 'intermediate.csv')

# Display the first few rows of the filtered data
data.head()

Unnamed: 0,Date,Description,Amount,Income,Expense,Category,Category1,Category2,Category3,Category12,Category123,Account,Tag
2544,2023-02-01,Gallipott,-78.9,0.0,-78.9,Expense:Miscellaneous:hair cuts,Expense,Miscellaneous,hair cuts,Expense:Miscellaneous,Expense:Miscellaneous:hair cuts,Discover,
1073,2023-02-01,American Family Insurance,-257.05,0.0,-257.05,Expense:Insurance,Expense,Insurance,Unassigned,Expense:Insurance,Expense:Insurance:Unassigned,First Tech,
2545,2023-02-01,HILTON ADVPURCH800236711 MEMPHIS TN,-136.16,0.0,-136.16,Expense:Unassigned:Unassigned,Expense,Unassigned,Unassigned,Expense:Unassigned,Expense:Unassigned:Unassigned,Discover,
2546,2023-02-02,Amazon,-6.83,0.0,-6.83,Expense:Miscellaneous,Expense,Miscellaneous,Unassigned,Expense:Miscellaneous,Expense:Miscellaneous:Unassigned,Discover,
2547,2023-02-02,SHOWTIX4U* FCHS TOWER 7029898987 NV,-56.95,0.0,-56.95,Expense:Unassigned:Unassigned,Expense,Unassigned,Unassigned,Expense:Unassigned,Expense:Unassigned:Unassigned,Discover,


# Filter Data

In [15]:
data = data[~data['Tag'].fillna('').str.contains('#exclude_investment', flags=re.IGNORECASE, regex=True)]
data = data[~data['Tag'].fillna('').str.contains('#ccpayment', flags=re.IGNORECASE, regex=True)]
data = data[~data['Tag'].fillna('').str.contains('#colege529', flags=re.IGNORECASE, regex=True)]
data = data[~data['Tag'].fillna('').str.contains('#IBondRedemption', flags=re.IGNORECASE, regex=True)]
data = data[~data['Tag'].fillna('').str.contains('#exclude_car', flags=re.IGNORECASE, regex=True)]
data = data[~data['Tag'].fillna('').str.contains('#exclude_Janet529', flags=re.IGNORECASE, regex=True)]


# Dump Intermediate Data
Dump the filtered data to an intermediate CSV file.

In [16]:
# Dump the intermediate data to a CSV file
loader.dump_intermediate(data, 'intermediate.csv')

# Display the first few rows of the filtered data
# data.head()

# Generate Monthly Totals Report
Generate and display the monthly totals report using the monthly_totals function.

In [17]:
# Generate Monthly Totals Report
monthly_totals(data)

Total for 2023-02:  -1,486.56 (Income:  10,072.87, Expense: -11,559.43)
Total for 2023-03:   3,656.99 (Income:  15,155.19, Expense: -11,498.20)
Total for 2023-04:  -2,330.90 (Income:   8,811.47, Expense: -11,142.37)
Total for 2023-05:  -3,124.25 (Income:  11,020.18, Expense: -14,144.43)
Total for 2023-06:    -107.66 (Income:   8,758.51, Expense:  -8,866.17)
Total for 2023-07:    -389.69 (Income:  11,823.20, Expense: -12,212.89)
Total for 2023-08:  -2,007.63 (Income:   9,127.89, Expense: -11,135.52)
Total for 2023-09:  -1,709.58 (Income:  12,123.74, Expense: -13,833.32)
Total for 2023-10:  -1,173.63 (Income:  11,552.61, Expense: -12,726.24)
Total for 2023-11:     943.98 (Income:  11,562.29, Expense: -10,618.31)
Total for 2023-12:   9,463.89 (Income:  20,624.44, Expense: -11,160.55)
Total for 2024-01:  -3,650.14 (Income:  12,820.87, Expense: -16,471.01)
Total for 2024-02:      -5.69 (Income:   9,464.60, Expense:  -9,470.29)
Total for 2024-03:  -1,737.65 (Income:   9,465.00, Expense: -11,

In [18]:
# Pivot?
df = data

df['Date'] = pd.to_datetime(df['Date'])
df['Month'] = df['Date'].dt.to_period('M')
df['Amount'] = df['Amount'].fillna(0)

# Group by month
# df['Amount']

df_group = df.groupby([pd.Grouper(key='Month'), 'Category12'])['Amount'].sum().reset_index()
# Ensure every month/category12 pair has a valid value, which is 0 if NaN
df_group = df_group.pivot(index='Month', columns='Category12', values='Amount').fillna(0).reset_index()
df_group = df_group.melt(id_vars=['Month'], var_name='Category12', value_name='Amount')
# with pd.option_context('display.max_rows', None):
#     print(df_group) 
# df_group

# # Calculate the rolling average
df_group['Rolling_Avg'] = df_group.groupby('Category12')['Amount'].transform(lambda x: x.rolling(window=12, min_periods=1).mean())

# # Pivot to create a column for each category
df_group = df_group.pivot(index='Category12', columns='Month', values='Rolling_Avg').reset_index()
df_group = df_group.round(2)

# Filter columns to show only months after 2024-01
df_group = df_group[['Category12'] + [col for col in df_group.columns if str(col) >= '2024-01']]
df_group


Month,Category12,Category12.1,2024-01,2024-02,2024-03,2024-04,2024-05,2024-06,2024-07,2024-08,2024-09,2024-10,2024-11,2024-12
0,Expense:Auto,Expense:Auto,-251.83,-241.58,-236.41,-225.24,-213.37,-216.67,-205.79,-201.35,-94.48,-93.78,-90.8,-80.28
1,Expense:Charity,Expense:Charity,-124.51,-123.66,-121.98,-121.13,-118.63,-117.38,-118.22,-120.73,-125.33,-124.67,-123.6,-122.53
2,Expense:Chiropractic,Expense:Chiropractic,-113.74,-91.87,-113.74,-89.74,-135.37,-180.27,-225.53,-284.99,-363.65,-376.0,-387.25,-409.12
3,Expense:Clothes,Expense:Clothes,-130.81,-127.87,-152.7,-164.42,-168.32,-167.35,-156.37,-148.61,-154.48,-154.48,-139.35,-105.99
4,Expense:Credit card payment,Expense:Credit card payment,-1181.7,-1174.71,-1146.89,-1164.25,-1165.95,-1135.63,-1109.91,-1077.77,-1058.37,-989.12,-988.44,-968.54
5,Expense:Dining,Expense:Dining,-559.17,-609.32,-650.54,-692.2,-712.61,-785.61,-815.7,-868.49,-915.59,-949.33,-974.73,-980.59
6,Expense:Education,Expense:Education,-1806.54,-1794.52,-1846.04,-1865.95,-1899.37,-1904.75,-1925.42,-1947.17,-1902.28,-1722.76,-1732.35,-1734.56
7,Expense:Entertainment,Expense:Entertainment,-228.3,-203.56,-212.66,-206.87,-213.94,-205.34,-191.44,-195.84,-199.94,-198.54,-200.56,-202.36
8,Expense:Groceries,Expense:Groceries,-646.5,-645.44,-619.86,-603.81,-613.5,-645.83,-652.05,-683.34,-696.39,-702.53,-647.44,-613.94
9,Expense:Hobbies,Expense:Hobbies,-70.92,-70.92,-67.33,-47.86,-46.16,-43.91,-42.35,-38.37,-38.37,-39.04,-32.49,-21.5
