#### Notes for later

In [1]:
import polars as pl
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, date2num
import time

## Pull from file / make dates

In [2]:
last_month = time.localtime().tm_mon - 1

current_yr = time.localtime().tm_year

# handling if last month was January
if last_month == 0:
    last_month = 12
    current_yr = current_yr - 1


# current_month = 4
# current_yr = 2025

df = pl.read_csv(r'C:\Users\Blake Dennett\Downloads\StreamlineRepo\FinancialStreamline\BankStreamline\Data\CreditHistory.csv')


# date work
df = df.with_columns(pl.col("Post Date")
                        .str.split("/")
                        .alias("split_str")
                        .list.to_struct()) \
                        .unnest('split_str') \
                        .rename({'field_0':'month', 'field_1':'day', 'field_2':'year'}) \
                        .cast({'day':pl.Int32, 'month':pl.Int32, 'year':pl.Int32}) \
        .filter((pl.col('month') == last_month) & 
                (pl.col('year') == current_yr)) \
        .with_columns(date = pl.concat_str([pl.col('year').cast(pl.Utf8), pl.lit('-'), pl.col('month').cast(pl.Utf8), pl.lit('-'), pl.col('day').cast(pl.Utf8)], separator="")) \
        .with_columns(monthName = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%b")) \
        .with_columns(weekDay = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.strftime("%a")) \
        .with_columns(quarter = pl.col("date").str.strptime(pl.Date, "%Y-%m-%d").dt.quarter())

        
# The gate/clean up        
df = df.filter(pl.col('Amount') < 0) \
        .drop('Category', 'Reference Number') \
        .rename({'Amount':'cost', 'Description': 'description'}) \
        .with_columns(cardType = pl.lit('Credit')) \
        .with_columns(cost = abs(pl.col('cost')))


df.limit(10)

Post Date,description,cost,month,day,year,date,monthName,weekDay,quarter,cardType
str,str,f64,i32,i32,i32,str,str,str,u32,str
"""4/30/2025""","""EL DORADO 7002…",20.32,4,30,2025,"""2025-4-30""","""Apr""","""Wed""",2,"""Credit"""
"""4/30/2025""","""4TE*SOUTH ARKA…",206.1,4,30,2025,"""2025-4-30""","""Apr""","""Wed""",2,"""Credit"""
"""4/28/2025""","""DOLLAR GENERAL…",6.78,4,28,2025,"""2025-4-28""","""Apr""","""Mon""",2,"""Credit"""
"""4/25/2025""","""CORNER POST …",12.23,4,25,2025,"""2025-4-25""","""Apr""","""Fri""",2,"""Credit"""
"""4/24/2025""","""AR.GOV/STPAYME…",22.61,4,24,2025,"""2025-4-24""","""Apr""","""Thu""",2,"""Credit"""
"""4/24/2025""","""AR.GOV/STPAYME…",22.61,4,24,2025,"""2025-4-24""","""Apr""","""Thu""",2,"""Credit"""
"""4/24/2025""","""MELVINS DISCOU…",18.89,4,24,2025,"""2025-4-24""","""Apr""","""Thu""",2,"""Credit"""
"""4/23/2025""","""MURPHY7520ATWA…",29.41,4,23,2025,"""2025-4-23""","""Apr""","""Wed""",2,"""Credit"""
"""4/22/2025""","""WAL-MART #0530…",38.99,4,22,2025,"""2025-4-22""","""Apr""","""Tue""",2,"""Credit"""
"""4/22/2025""","""AUTOZONE #0159…",10.96,4,22,2025,"""2025-4-22""","""Apr""","""Tue""",2,"""Credit"""


# Categorize

In [None]:
df = df.with_columns(category = 
                  # ======================= Food =======================
    pl.when(pl.col('description').str.contains('McDon|CORPORATE|CHICK-FIL-A|SLIM CHICKENS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('MADDIES PLACE|RAISING CANES|Subway|HICKORY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('JAMBA|PIZZA|GOODCENTS|SONIC|TACO BELL|BUFFET')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('Waffle House|DAIRY QUEEN|COLDSTONE|MCGRAWS')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('LITTLE CAESARS|MCDON|WENDY|APPLEBEES|PERCY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('EL SUR|BAREFOOT BISTRO|YAMATO|SMOOTHIE|CREAMERY')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOLLAR GENERAL|DOLLAR TREE|FLYING BURGER|WWW.HOMECHEF.IL')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DOMINO.S|POPEYES|COCA COLA|LA VILLA MEXICAN|PJ\'S COFFEE')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('CRCKR BRRL|OFF THE RAIL CAFE|HOUSE-WYLIE|SWOLE FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('DC FUDDRUCKERS|THE BLACK CAT CAFE|RHEA LANA|BYUI FOOD')).then(pl.lit('food'))
    .when(pl.col('description').str.contains('WHATABURGER|CUPBOP|TASTY DONUTS')).then(pl.lit('food'))
                  # ======================= subscriptions =======================
    .when(pl.col('description').str.contains('Adobe|Spotify|Phtoshp Lightrm|Peacock')).then(pl.lit('subscriptions'))
    .when(pl.col('description').str.contains('WMT PLUS|HEALTHWORKS|APPLE.COM/BILL')).then(pl.lit('subscriptions'))
                  # ======================= wmt =======================
    .when(pl.col('description').str.contains('WM SUPER|Wal-Mart|WAL-MART|BROOKSHIRES|BROULIM|ALBERTSONS')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('WALGR|EVERYBODYS ANTIQUE')).then(pl.lit('wmt'))
    .when(pl.col('description').str.contains('Walmart|WALMART') &
          ~(pl.col('description').str.contains('MURPHY'))).then(pl.lit('wmt'))
                  # ======================= gas =======================
    .when(pl.col('description').str.contains('MURPHY') & 
          ~pl.col('description').str.contains('DEPOSIT')).then(pl.lit('gas'))
    .when(pl.col('description').str.contains('SHELL|CHEVRON|CIRCLE K|LOVE\'S')).then(pl.lit('gas'))
                  # ======================= internet =======================
    .when(pl.col('description').str.contains('VIASAT')).then(pl.lit('internet'))
                  # ======================= shopping =======================
    .when(pl.col('description').str.contains('T J MAXX|OLD NAVY|SHEIN|REAL DEALS|SALLY BEAUTY')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('LDS DIST ONLINE STORE|DC ULTA|DC H&amp|SEPHORA.COM')).then(pl.lit('shopping'))
    .when(pl.col('description').str.contains('American Eagle|SPORTSMANS WAREHOUSE')).then(pl.lit('shopping'))
                  # ======================= amazon =======================
    .when(pl.col('description').str.contains('AMZN|AMAZON|Amazon.com|temu.com|eBay')).then(pl.lit('amazon'))
                  # ======================= fun =======================
    .when(pl.col('description').str.contains('Amazon Prime|YouTube|ZOO|MUSEUM|AQUARIUM|LION.S CLUB GOLF')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('COMFORT INN|CINEMA|HOLIDAY INN|El Dorado Golf|PlayStation')).then(pl.lit('fun'))
    .when(pl.col('description').str.contains('EXCALIBUR FAMILY FUN|AIRBNB|EXPEDIA|EL DORADO GOLF')).then(pl.lit('fun'))
                  # ======================= power =======================
    .when(pl.col('description').str.contains('ENTERGY')).then(pl.lit('power'))
                  # ======================= car =======================
    .when(pl.col('description').str.contains('O.REILLY|MUFFLEX MUFFLER|AUTOZONE|DC TAKE 5|KARL MALONE FORD')).then(pl.lit('car'))
                  # ======================= progressive =======================
    .when(pl.col('description').str.contains('PROG DIRECT')).then(pl.lit('progressive'))
                  # ======================= water =======================
    .when(pl.col('description').str.contains('SHARE CHECK')).then(pl.lit('water'))
                  # ======================= tithing =======================
    .when(pl.col('description').str.contains('Ch JesusChrist  DONATION')).then(pl.lit('tithing'))
                  # ======================= rent =======================
    .when((pl.col('description').str.contains('VENMO')) & (pl.col('cost') == 775)).then(pl.lit('rent'))
    .when((pl.col('description').str.contains('ACH Withdrawal ELECTRONIC WITHDRAWAL')) & (pl.col('cost') == 775)).then(pl.lit('rent'))
    .otherwise(pl.lit('unknown'))
)


controllables = ['food', 'wmt', 'shopping', 'subscriptions', 'fun', 'unknown', 'amazon', 'food']
uncontrollables = ['tithing', 'progressive', 'power', 'gas', 'water', 'internet', 'rent', 'car']

df = df.with_columns(controllable = pl.when(pl.col('category').is_in(controllables)).then(pl.lit(1))
                                    .when(pl.col('category').is_in(uncontrollables)).then(pl.lit(0))
                                    .otherwise(pl.lit('error'))) \
                                    .cast({'controllable':pl.Int16})
                                    

df = df.select(['date', 'category', 'description', 'cost', 'controllable', 'monthName', 'month', 'day', 'year', 'weekDay', 'cardType', 'quarter'])

df.limit(10)

# list(df.filter(pl.col('Category') == 'wmt')['description'].unique())
# list(df.filter(pl.col('Category') == 'gas')['description'].unique())
# list(df.filter(pl.col('Category') == 'unknown')['description'].unique())

date,category,description,cost,controllable,monthName,month,day,year,weekDay,cardType,quarter
str,str,str,f64,i16,str,i32,i32,i32,str,str,u32
"""2025-4-30""","""unknown""","""EL DORADO 7002…",20.32,1,"""Apr""",4,30,2025,"""Wed""","""Credit""",2
"""2025-4-30""","""unknown""","""4TE*SOUTH ARKA…",206.1,1,"""Apr""",4,30,2025,"""Wed""","""Credit""",2
"""2025-4-28""","""food""","""DOLLAR GENERAL…",6.78,1,"""Apr""",4,28,2025,"""Mon""","""Credit""",2
"""2025-4-25""","""unknown""","""CORNER POST …",12.23,1,"""Apr""",4,25,2025,"""Fri""","""Credit""",2
"""2025-4-24""","""unknown""","""AR.GOV/STPAYME…",22.61,1,"""Apr""",4,24,2025,"""Thu""","""Credit""",2
"""2025-4-24""","""unknown""","""AR.GOV/STPAYME…",22.61,1,"""Apr""",4,24,2025,"""Thu""","""Credit""",2
"""2025-4-24""","""unknown""","""MELVINS DISCOU…",18.89,1,"""Apr""",4,24,2025,"""Thu""","""Credit""",2
"""2025-4-23""","""gas""","""MURPHY7520ATWA…",29.41,0,"""Apr""",4,23,2025,"""Wed""","""Credit""",2
"""2025-4-22""","""wmt""","""WAL-MART #0530…",38.99,1,"""Apr""",4,22,2025,"""Tue""","""Credit""",2
"""2025-4-22""","""car""","""AUTOZONE #0159…",10.96,0,"""Apr""",4,22,2025,"""Tue""","""Credit""",2


### Look at last month's unknowns

In [4]:
list(df.filter(pl.col('category') == 'unknown')['description'].unique())

['CORNER POST              EL DORADO    AR',
 'APPLE.COM/BILL           866-712-7753 CA',
 '365 VEND LLC             TROY         MI',
 'AR.GOV/STPAYMENT         EGOV.COM     AR',
 'EL DORADO 7002 DSMV      EL DORADO    AR',
 'SOUTH ARKANSAS REGIONAL HEL DORADO    AR',
 '4TE*SOUTH ARKANSAS REGION870-863-2000 AR',
 'MELVINS DISCOUNT PHARMACYEL DORADO    AR',
 'MY LOCAL TAXES           479-282-1958 AR',
 'PHR*SAMAHealthCareService870-8622400  AR']

#### Make sure correct month/year is being loaded

In [None]:
print(list(df['month'].unique()))
print(list(df['monthName'].unique()))
print(list(df['weekDay'].unique()))
print(list(df['year'].unique()))
print(df.select(pl.col("date").min()))
print(df.select(pl.col("date").max())) # broken, looks at str, not date

[4]
['Apr']
['Thu', 'Sun', 'Fri', 'Wed', 'Mon', 'Tue']
[2025]
shape: (1, 1)
┌──────────┐
│ date     │
│ ---      │
│ str      │
╞══════════╡
│ 2025-4-1 │
└──────────┘
shape: (1, 1)
┌──────────┐
│ date     │
│ ---      │
│ str      │
╞══════════╡
│ 2025-4-9 │
└──────────┘


### Check number of rows (30ish per month)
Apr: 32

In [7]:
len(df)

32

### Load the data (handle w/ caution)

In [None]:
# path = r'C:\Users\Blake Dennett\Downloads\BankStreamline\Data\BankDataProd.csv'


# with open(path, mode="a") as f:
#    df.write_csv(f, has_header=False)