#### Notes for later

In [9]:
import polars as pl
import plotly.express as px
import pandas as pd  
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, date2num
import time
import csv

#### Load last month's data

In [22]:
last_month = time.localtime().tm_mon - 1

current_yr = time.localtime().tm_year

# handling if last month was January
if last_month == 0:
    last_month = 12
    current_yr = current_yr - 1


df = pl.read_csv(r'C:\Users\Denne\Documents\VSCodeProjects\FinancialStreamline2\BankStreamline\Data\DebitHistory.csv')

# Date work
df = df.with_columns(
                pl.col("Post Date").str.strptime(pl.Date, "%m/%d/%Y").alias("date")
                ).with_columns([
                pl.col("date").dt.month().alias("month"),
                pl.col("date").dt.day().alias("day"),
                pl.col("date").dt.year().alias("year")
                ]) \
        .filter((pl.col('month') == last_month) & 
                (pl.col('year') == current_yr)) \
        .with_columns(date = pl.concat_str([pl.col('year').cast(pl.Utf8), pl.lit('-'), pl.col('month').cast(pl.Utf8), pl.lit('-'), pl.col('day').cast(pl.Utf8)], separator="")) \
        .with_columns(monthName = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%b")) \
        .with_columns(weekDay = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%a")) \
        .with_columns(quarter = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.quarter())
                        
# # cost column
df = df.with_columns(pl.col('Debit').fill_null(value=0)) \
        .with_columns(pl.col('Credit').fill_null(value=0)) \
        .with_columns(cost =pl.col('Debit') + pl.col('Credit')) \
        
# # the gate/clean up
df = df.filter(~pl.col('Description').str.contains('ELECTRONIC DEPOSIT MURPHY OIL USA PPAYROLL|Funds Transfer via Mobile')) \
        .filter(~pl.col('Description').str.contains('DEPOSIT|deposit|Deposit|WITHDRAWAL-CASH')) \
        .filter(~pl.col('Description').str.contains('Withdrawal LOAN PAYMENT TRANSFER')) \
        .rename({'Description': 'description'}) \
        .with_columns(cardType = pl.lit('Debit'))

print(len(df))
df.limit(3)

66


Account Number,Post Date,Check,description,Debit,Credit,Status,date,month,day,year,monthName,weekDay,quarter,cost,cardType
str,str,str,str,f64,f64,str,str,i8,i8,i32,str,str,i8,f64,str
"""***0530002""","""11/30/2025""",,"""Card purchase THE HOME DEPOT #…",80.85,0.0,"""Posted""","""2025-11-30""",11,30,2025,"""Nov""","""Sun""",4,80.85,"""Debit"""
"""***0530002""","""11/29/2025""",,"""Card purchase TIMMINS HDWE EL…",5.99,0.0,"""Posted""","""2025-11-29""",11,29,2025,"""Nov""","""Sat""",4,5.99,"""Debit"""
"""***0530002""","""11/26/2025""",,"""Card purchase THE HOME DEPOT #…",100.0,0.0,"""Posted""","""2025-11-26""",11,26,2025,"""Nov""","""Wed""",4,100.0,"""Debit"""


### Categorize data

In [23]:
df = df.with_columns(category = 
                  # ======================= Food =======================
    pl.when(pl.col('description').str.contains('McDon|CORPORATE|CHICK-FIL-A|SLIM CHICKENS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('MADDIES PLACE|RAISING CANES|Subway|HICKORY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('JAMBA|PIZZA|GOODCENTS|SONIC|TACO BELL|BUFFET')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('Waffle House|DAIRY QUEEN|COLDSTONE|MCGRAWS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('LITTLE CAESARS|MCDON|WENDY|APPLEBEES|PERCY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('EL SUR|BAREFOOT BISTRO|YAMATO|SMOOTHIE|CREAMERY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOLLAR GENERAL|DOLLAR TREE|FLYING BURGER|WWW.HOMECHEF.IL')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOMINO.S|POPEYES|COCA COLA|LA VILLA MEXICAN|PJ\'S COFFEE')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('CRCKR BRRL|OFF THE RAIL CAFE|HOUSE-WYLIE|SWOLE FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DC FUDDRUCKERS|THE BLACK CAT CAFE|RHEA LANA|BYUI FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('WHATABURGER|CUPBOP|TASTY DONUTS|HELLA FRESH|JOHNNY B\'S GRILL')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('WILDCAT SNACK|ARBYS|SNACKS ABUELITA|MURPHY 1111')).then(pl.lit('food'))
                  # ======================= subscriptions =======================
    .when(pl.col('description').str.contains('Adobe|Spotify|Phtoshp Lightrm|Peacock')).then(pl.lit('subscriptions'))
    .when(pl.col('description').str.contains('WMT PLUS|HEALTHWORKS|APPLE.COM/BILL')).then(pl.lit('subscriptions'))
    .when(pl.col('description').str.contains('NETFLIX.COM|NETFLIX|ADOBE|ADT SECURITY')).then(pl.lit('subscriptions'))
                  # ======================= wmt =======================
    .when(pl.col('description').str.contains('WM SUPER|Wal-Mart|WAL-MART|BROOKSHIRES|BROULIM|ALBERTSONS')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('WALGR|SMITHS')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('Walmart|WALMART') &
          ~(pl.col('description').str.contains('MURPHY'))).then(pl.lit('wmt'))
                  # ======================= gas =======================
    .when(pl.col('description').str.contains('MURPHY') & 
          ~pl.col('description').str.contains('DEPOSIT|1111')).then(pl.lit('gas'))
    .when(pl.col('description').str.contains('SHELL|CHEVRON|CIRCLE K|LOVE\'S|EXXON EXPRESSWAY|EXXON MISSLE')).then(pl.lit('gas'))
    .when(pl.col('description').str.contains('MISSLE MART')).then(pl.lit('gas'))
                  # ======================= internet =======================
    .when(pl.col('description').str.contains('VIASAT')).then(pl.lit('internet'))
    .when(pl.col('description').str.contains('OPTIMUM')).then(pl.lit('internet'))
                  # ======================= shopping =======================
    .when(pl.col('description').str.contains('T J MAXX|OLD NAVY|SHEIN|REAL DEALS|SALLY BEAUTY')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('LDS DIST ONLINE STORE|DC ULTA|DC H&amp|SEPHORA.COM')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('American Eagle|SPORTSMANS WAREHOUSE|ZOE FRYE HAIR|OLDNAVY')).then(pl.lit('shopping'))
                  # ======================= amazon =======================
    .when(pl.col('description').str.contains('AMZN|AMAZON|Amazon.com|temu.com|eBay')).then(pl.lit('amazon'))
                  # ======================= fun =======================
    .when(pl.col('description').str.contains('Amazon Prime|YouTube|ZOO|MUSEUM|AQUARIUM|LION.S CLUB GOLF')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('COMFORT INN|CINEMA|HOLIDAY INN|El Dorado Golf|PlayStation')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('EXCALIBUR FAMILY FUN|AIRBNB|EXPEDIA|EL DORADO GOLF|UNITED')).then(pl.lit('fun'))
                  # ======================= power =======================
    .when(pl.col('description').str.contains('ENTERGY')).then(pl.lit('power'))
                  # ======================= car =======================
    .when(pl.col('description').str.contains('O.REILLY|MUFFLEX MUFFLER|AUTOZONE|DC TAKE 5|KARL MALONE FORD')).then(pl.lit('car'))
    .when(pl.col('description').str.contains('VAN HOOK TIRE|IRONHEART AUTOMOTIVE')).then(pl.lit('car'))
                  # ======================= progressive =======================
    .when(pl.col('description').str.contains('PROG DIRECT|STATE FARM')).then(pl.lit('progressive'))
                  # ======================= water =======================
    .when(pl.col('description').str.contains('SHARE CHECK')).then(pl.lit('water'))
    .when(pl.col('description').str.contains('EL DORADO WATER UTI|EL DORADO WATER')).then(pl.lit('water'))
                  # ======================= tithing =======================
    .when(pl.col('description').str.contains('Ch JesusChrist  DONATION|Ch JesusChrist DONATION')).then(pl.lit('tithing'))
                  # ======================= Natural Gas =======================
    .when(pl.col('description').str.contains('SUMMIT')).then(pl.lit('natural gas'))
                  # ======================= Home improvement =======================
    .when(pl.col('description').str.contains('SHERWIN-WILLIAMS|THE HOME DEPOT|MAIN STREET ANTIQUES')).then(pl.lit('home improvement'))
    .when(pl.col('description').str.contains('HOBBYLOBBY|EVERYBODYS ANTIQUE|HOBBY-LOBBY')).then(pl.lit('home improvement'))
    .when(pl.col('description').str.contains('TIMMINS|Sherwin-Williams|WAYFAIR')).then(pl.lit('home improvement'))
                  # ======================= rent =======================
    .when((pl.col('description').str.contains('PENNYMAC'))).then(pl.lit('rent'))
    .otherwise(pl.lit('misc'))
)


controllables = ['food', 'wmt', 'shopping', 'subscriptions', 'fun', 'misc', 'amazon', 'food']
uncontrollables = ['tithing', 'progressive', 'power', 'gas', 'water', 'internet', 'rent', 'car']

df = df.with_columns(controllable = pl.when(pl.col('category').is_in(controllables)).then(pl.lit(1))
                                    .when(pl.col('category').is_in(uncontrollables)).then(pl.lit(0))
                                    .otherwise(pl.lit('error')))
                                    

df = df.select(['date', 'category', 'description', 'cost', 'controllable', 'monthName', 'month', 'day', 'year', 'weekDay', 'cardType', 'quarter'])

# df.limit(10)

# list(df.filter(pl.col('Category') == 'wmt')['description'].unique())
# list(df.filter(pl.col('Category') == 'gas')['description'].unique())
# list(df.filter(pl.col('Category') == 'unknown')['description'].unique())
df.sort(pl.col('cost'), descending=True).limit(20)

date,category,description,cost,controllable,monthName,month,day,year,weekDay,cardType,quarter
str,str,str,f64,str,str,i8,i8,i32,str,str,i8
"""2025-11-24""","""home improvement""","""Credit or debit card Withdrawa…",987.74,"""error""","""Nov""",11,24,2025,"""Mon""","""Debit""",4
"""2025-11-23""","""home improvement""","""Card purchase WF* WAYFAIR45165…",987.74,"""error""","""Nov""",11,23,2025,"""Sun""","""Debit""",4
"""2025-11-18""","""home improvement""","""Card purchase WF* WAYFAIR42253…",987.74,"""error""","""Nov""",11,18,2025,"""Tue""","""Debit""",4
"""2025-11-26""","""home improvement""","""POS Withdrawal THE HOME DEPOT …",725.14,"""error""","""Nov""",11,26,2025,"""Wed""","""Debit""",4
"""2025-11-19""","""rent""","""ACH Withdrawal PENNYMAC CASH B…",639.61,"""0""","""Nov""",11,19,2025,"""Wed""","""Debit""",4
…,…,…,…,…,…,…,…,…,…,…,…
"""2025-11-24""","""amazon""","""Card purchase AMAZON MKTPL*B24…",210.71,"""1""","""Nov""",11,24,2025,"""Mon""","""Debit""",4
"""2025-11-25""","""home improvement""","""POS Withdrawal THE HOME DEPOT …",175.38,"""error""","""Nov""",11,25,2025,"""Tue""","""Debit""",4
"""2025-11-4""","""amazon""","""Card purchase AMAZON MKTPL*NK9…",163.42,"""1""","""Nov""",11,4,2025,"""Tue""","""Debit""",4
"""2025-11-26""","""home improvement""","""Card purchase THE HOME DEPOT #…",100.0,"""error""","""Nov""",11,26,2025,"""Wed""","""Debit""",4


### Look at last month's misc

In [24]:
list(df.filter(pl.col('category') == 'misc')['description'].unique())

[]

#### Make sure correct month/year is being loaded

In [25]:
print(list(df['month'].unique()))
print(list(df['monthName'].unique()))
print(list(df['weekDay'].unique()))
print(list(df['year'].unique()))
print(list(df['day'].unique()))
# print(df.select(pl.col("date").min()))
# print(df.select(pl.col("date").max())) # needs fixed, looking at str, not date

[11]
['Nov']
['Fri', 'Tue', 'Mon', 'Thu', 'Sat', 'Wed', 'Sun']
[2025]
[1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 29, 30]


### Check number of rows (40ish per month)
Apr: 38
<br>May: 53
<br>June: 23 - Vegas trip
<br>May: 65
<br>Aug: 48
<br>Sep: 75
<br>Oct: 46
<br>Nov: 66

In [27]:
len(df)

66

### Load the data (handle w/ caution)

In [None]:
# path = r'C:\Users\Denne\Documents\VSCodeProjects\FinancialStreamline2\BankStreamline\Data\BankDataProd.csv'


# with open(path, mode="a", encoding="utf-8") as f:
#    df.write_csv(f, include_header=False)