#### Notes for later

In [1]:
import polars as pl
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, date2num
import time

## Pull from file / make dates

In [2]:
last_month = time.localtime().tm_mon - 1

current_yr = time.localtime().tm_year

# handling if last month was January
if last_month == 0:
    last_month = 12
    current_yr = current_yr - 1


# current_month = 4
# current_yr = 2025

df = pl.read_csv(r'C:\Users\Blake Dennett\Downloads\StreamlineRepo\FinancialStreamline2\BankStreamline\Data\CreditHistory.csv')


# date work
df = df.with_columns(pl.col("Post Date")
                        .str.split("/")
                        .alias("split_str")
                        .list.to_struct()) \
                        .unnest('split_str') \
                        .rename({'field_0':'month', 'field_1':'day', 'field_2':'year'}) \
                        .cast({'day':pl.Int32, 'month':pl.Int32, 'year':pl.Int32}) \
        .filter((pl.col('month') == last_month) & 
                (pl.col('year') == current_yr)) \
        .with_columns(date = pl.concat_str([pl.col('year').cast(pl.Utf8), pl.lit('-'), pl.col('month').cast(pl.Utf8), pl.lit('-'), pl.col('day').cast(pl.Utf8)], separator="")) \
        .with_columns(monthName = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%b")) \
        .with_columns(weekDay = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%a")) \
        .with_columns(quarter = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.quarter())

        
# The gate/clean up        
df = df.filter(pl.col('Amount') < 0) \
        .drop('Category', 'Reference Number') \
        .rename({'Amount':'cost', 'Description': 'description'}) \
        .with_columns(cardType = pl.lit('Credit')) \
        .with_columns(cost = abs(pl.col('cost')))


df.limit(10)

Post Date,description,cost,month,day,year,date,monthName,weekDay,quarter,cardType
str,str,f64,i32,i32,i32,str,str,str,u32,str
"""7/30/2025""","""SHERWIN-WILLIA…",16.43,7,30,2025,"""2025-7-30""","""Jul""","""Wed""",3,"""Credit"""
"""7/30/2025""","""CORPORATE OFFI…",4.38,7,30,2025,"""2025-7-30""","""Jul""","""Wed""",3,"""Credit"""
"""7/29/2025""","""CORPORATE OFFI…",1.31,7,29,2025,"""2025-7-29""","""Jul""","""Tue""",3,"""Credit"""
"""7/23/2025""","""CORPORATE OFFI…",1.75,7,23,2025,"""2025-7-23""","""Jul""","""Wed""",3,"""Credit"""
"""7/22/2025""","""CORPORATE OFFI…",1.75,7,22,2025,"""2025-7-22""","""Jul""","""Tue""",3,"""Credit"""
"""7/21/2025""","""THE HOME DEPOT…",53.99,7,21,2025,"""2025-7-21""","""Jul""","""Mon""",3,"""Credit"""
"""7/20/2025""","""WAL-MART #0530…",35.97,7,20,2025,"""2025-7-20""","""Jul""","""Sun""",3,"""Credit"""
"""7/20/2025""","""VCN*UNIONCOCIR…",169.95,7,20,2025,"""2025-7-20""","""Jul""","""Sun""",3,"""Credit"""
"""7/16/2025""","""CORPORATE OFFI…",1.75,7,16,2025,"""2025-7-16""","""Jul""","""Wed""",3,"""Credit"""
"""7/15/2025""","""CORPORATE OFFI…",1.75,7,15,2025,"""2025-7-15""","""Jul""","""Tue""",3,"""Credit"""


# Categorize

In [3]:
df = df.with_columns(category = 
                  # ======================= Food =======================
    pl.when(pl.col('description').str.contains('McDon|CORPORATE|CHICK-FIL-A|SLIM CHICKENS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('MADDIES PLACE|RAISING CANES|Subway|HICKORY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('JAMBA|PIZZA|GOODCENTS|SONIC|TACO BELL|BUFFET')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('Waffle House|DAIRY QUEEN|COLDSTONE|MCGRAWS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('LITTLE CAESARS|MCDON|WENDY|APPLEBEES|PERCY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('EL SUR|BAREFOOT BISTRO|YAMATO|SMOOTHIE|CREAMERY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOLLAR GENERAL|DOLLAR TREE|FLYING BURGER|WWW.HOMECHEF.IL')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOMINO.S|POPEYES|COCA COLA|LA VILLA MEXICAN|PJ\'S COFFEE')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('CRCKR BRRL|OFF THE RAIL CAFE|HOUSE-WYLIE|SWOLE FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DC FUDDRUCKERS|THE BLACK CAT CAFE|RHEA LANA|BYUI FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('WHATABURGER|CUPBOP|TASTY DONUTS|HELLA FRESH|JOHNNY B\'S GRILL')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('WILDCAT SNACK')).then(pl.lit('food'))
                  # ======================= subscriptions =======================
    .when(pl.col('description').str.contains('Adobe|Spotify|Phtoshp Lightrm|Peacock')).then(pl.lit('subscriptions'))
    .when(pl.col('description').str.contains('WMT PLUS|HEALTHWORKS|APPLE.COM/BILL')).then(pl.lit('subscriptions'))
                  # ======================= wmt =======================
    .when(pl.col('description').str.contains('WM SUPER|Wal-Mart|WAL-MART|BROOKSHIRES|BROULIM|ALBERTSONS')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('WALGR|EVERYBODYS ANTIQUE|SMITHS')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('Walmart|WALMART') &
          ~(pl.col('description').str.contains('MURPHY'))).then(pl.lit('wmt'))
                  # ======================= gas =======================
    .when(pl.col('description').str.contains('MURPHY') & 
          ~pl.col('description').str.contains('DEPOSIT')).then(pl.lit('gas'))
    .when(pl.col('description').str.contains('SHELL|CHEVRON|CIRCLE K|LOVE\'S|EXXON EXPRESSWAY')).then(pl.lit('gas'))
                  # ======================= internet =======================
    .when(pl.col('description').str.contains('VIASAT')).then(pl.lit('internet'))
                  # ======================= shopping =======================
    .when(pl.col('description').str.contains('T J MAXX|OLD NAVY|SHEIN|REAL DEALS|SALLY BEAUTY')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('LDS DIST ONLINE STORE|DC ULTA|DC H&amp|SEPHORA.COM')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('American Eagle|SPORTSMANS WAREHOUSE|ZOE FRYE HAIR')).then(pl.lit('shopping'))
                  # ======================= amazon =======================
    .when(pl.col('description').str.contains('AMZN|AMAZON|Amazon.com|temu.com|eBay')).then(pl.lit('amazon'))
                  # ======================= fun =======================
    .when(pl.col('description').str.contains('Amazon Prime|YouTube|ZOO|MUSEUM|AQUARIUM|LION.S CLUB GOLF')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('COMFORT INN|CINEMA|HOLIDAY INN|El Dorado Golf|PlayStation')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('EXCALIBUR FAMILY FUN|AIRBNB|EXPEDIA|EL DORADO GOLF')).then(pl.lit('fun'))
                  # ======================= power =======================
    .when(pl.col('description').str.contains('ENTERGY')).then(pl.lit('power'))
                  # ======================= car =======================
    .when(pl.col('description').str.contains('O.REILLY|MUFFLEX MUFFLER|AUTOZONE|DC TAKE 5|KARL MALONE FORD')).then(pl.lit('car'))
    .when(pl.col('description').str.contains('VAN HOOK TIRE')).then(pl.lit('car'))
                  # ======================= progressive =======================
    .when(pl.col('description').str.contains('PROG DIRECT')).then(pl.lit('progressive'))
                  # ======================= water =======================
    .when(pl.col('description').str.contains('SHARE CHECK')).then(pl.lit('water'))
    .when(pl.col('description').str.contains('EL DORADO WATER UTI')).then(pl.lit('water'))
                  # ======================= tithing =======================
    .when(pl.col('description').str.contains('Ch JesusChrist  DONATION|Ch JesusChrist DONATION')).then(pl.lit('tithing'))
                  # ======================= rent =======================
    .when((pl.col('description').str.contains('VENMO')) & (pl.col('cost') == 775)).then(pl.lit('rent'))
    .when((pl.col('description').str.contains('ACH Withdrawal ELECTRONIC WITHDRAWAL')) & (pl.col('cost') == 775)).then(pl.lit('rent'))
    .otherwise(pl.lit('misc'))
)


controllables = ['food', 'wmt', 'shopping', 'subscriptions', 'fun', 'misc', 'amazon', 'food']
uncontrollables = ['tithing', 'progressive', 'power', 'gas', 'water', 'internet', 'rent', 'car']

df = df.with_columns(controllable = pl.when(pl.col('category').is_in(controllables)).then(pl.lit(1))
                                    .when(pl.col('category').is_in(uncontrollables)).then(pl.lit(0))
                                    .otherwise(pl.lit('error'))) \
                                    .cast({'controllable':pl.Int16})
                                    

df = df.select(['date', 'category', 'description', 'cost', 'controllable', 'monthName', 'month', 'day', 'year', 'weekDay', 'cardType', 'quarter'])

df.limit(10)

# list(df.filter(pl.col('Category') == 'wmt')['description'].unique())
# list(df.filter(pl.col('Category') == 'gas')['description'].unique())
# list(df.filter(pl.col('Category') == 'unknown')['description'].unique())

date,category,description,cost,controllable,monthName,month,day,year,weekDay,cardType,quarter
str,str,str,f64,i16,str,i32,i32,i32,str,str,u32
"""2025-7-30""","""misc""","""SHERWIN-WILLIA…",16.43,1,"""Jul""",7,30,2025,"""Wed""","""Credit""",3
"""2025-7-30""","""food""","""CORPORATE OFFI…",4.38,1,"""Jul""",7,30,2025,"""Wed""","""Credit""",3
"""2025-7-29""","""food""","""CORPORATE OFFI…",1.31,1,"""Jul""",7,29,2025,"""Tue""","""Credit""",3
"""2025-7-23""","""food""","""CORPORATE OFFI…",1.75,1,"""Jul""",7,23,2025,"""Wed""","""Credit""",3
"""2025-7-22""","""food""","""CORPORATE OFFI…",1.75,1,"""Jul""",7,22,2025,"""Tue""","""Credit""",3
"""2025-7-21""","""misc""","""THE HOME DEPOT…",53.99,1,"""Jul""",7,21,2025,"""Mon""","""Credit""",3
"""2025-7-20""","""wmt""","""WAL-MART #0530…",35.97,1,"""Jul""",7,20,2025,"""Sun""","""Credit""",3
"""2025-7-20""","""misc""","""VCN*UNIONCOCIR…",169.95,1,"""Jul""",7,20,2025,"""Sun""","""Credit""",3
"""2025-7-16""","""food""","""CORPORATE OFFI…",1.75,1,"""Jul""",7,16,2025,"""Wed""","""Credit""",3
"""2025-7-15""","""food""","""CORPORATE OFFI…",1.75,1,"""Jul""",7,15,2025,"""Tue""","""Credit""",3


### Look at last month's misc

In [4]:
list(df.filter(pl.col('category') == 'misc')['description'].unique())

['SHERWIN-WILLIAMS707335   EL DORADO    AR',
 'VCN*UNIONCOCIRCUITCLERK  866-255-1857 AR',
 "DAVE 'S FIREWORKS        DERMOTT      AR",
 'THE HOME DEPOT #8537     EL DORADO    AR']

#### Make sure correct month/year is being loaded

In [5]:
print(list(df['month'].unique()))
print(list(df['monthName'].unique()))
print(list(df['weekDay'].unique()))
print(list(df['year'].unique()))
print(df.select(pl.col("date").min()))
print(df.select(pl.col("date").max())) # broken, looks at str, not date

[7]
['Jul']
['Tue', 'Wed', 'Mon', 'Fri', 'Sun', 'Thu']
[2025]
shape: (1, 1)
┌──────────┐
│ date     │
│ ---      │
│ str      │
╞══════════╡
│ 2025-7-1 │
└──────────┘
shape: (1, 1)
┌──────────┐
│ date     │
│ ---      │
│ str      │
╞══════════╡
│ 2025-7-9 │
└──────────┘


### Check number of rows (30ish per month)
Apr: 32
<br>May: 21
<br>June: 20
<br>July: 23

In [7]:
len(df)

23

### Load the data (handle w/ caution)

In [None]:
# path = r'C:\Users\Blake Dennett\Downloads\StreamlineRepo\FinancialStreamline2\BankStreamline\Data\BankDataProd.csv'


# with open(path, mode="a") as f:
#    df.write_csv(f, has_header=False)