# Replicating YNAB 4's Reports using Python

- Demo of how pretty much any of YNAB 4's reports can be replicated and extended / tweaked using `pandas` and `matplotlib`.

- This is a work in progress!  There's loads more we can do.  I'm also deliberately keeping the code simple.

- As of now, it only consumes the transactions from YNAB (eg `YourBudgetName as of YYYY-MM-DD HHmm-Register.csv`), not the budget csv.  That's on the todo list.

- Start by launching your budget in YNAB4 and exporting to CSV (hint  - `ctrl-e`).  Paste in your YNAB CSV export folder path in to the cell just below this one.

- I have no idea if this would work with nYNAB because I hate nYNAB. :-)

- Tested with 
    - Python 3.9.1, 
    - Pandas 1.2.0 (must update that ...) 
    - matplotlib 3.5.0 etc
    - running on Ubuntu (yes yes I know that's weird because YNAB is Windows software, it's just that's where I've got Python running, OK?

## Part 1 - bringing the transaction data in

In [None]:
## PUT YOUR YNAB EXPORT DIR IN HERE:
ynab_dir = r'/tmp/ynabstuff'   

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import datetime as dt
import os
import numpy as np
import shutil
import decimal
import sqlite3 as sq
dec = decimal.Decimal

today = dt.date.today()
date_today = today.strftime("%Y-%m-%d")
month_today = today.strftime("%Y-%m")
year_today = today.strftime("%Y")
last_year = str(int(year_today) - 1)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

print (f"Ready at {dt.datetime.now()}")



In [None]:
# Open newest YNAB Register CSV

paths = [os.path.join(ynab_dir, f) for f in os.listdir(ynab_dir) if "Register" in f]
ynab_file = max(paths, key=os.path.getmtime)
print (f"Using {ynab_file}\n\n5 random rows from the CSV - check it's reading OK:")
csv = pd.read_csv (ynab_file, parse_dates = ['Date'],  infer_datetime_format=True)
display (csv.sample (n=5))
print (f"CSV has {csv.Account.count()} rows.")

# make a fresh dataframe with just the useful columns and nicer column names
trans = csv[['Account','Date', 'Master Category', 'Sub Category', 'Category', 'Payee', 'Memo', 'Outflow','Inflow', 'Running Balance']].copy()
trans.columns = ['account','date', 'master_cat', 'sub_cat', 'cat', 'payee', 'memo', 'outflow','inflow', 'ynab_bal']

# get correct datatypes 
for fl in ['inflow', 'outflow', 'ynab_bal']:
    for sign in ("$","£"):
        trans[fl] = trans[fl].str.replace (sign,"")
    trans[fl] = trans[fl].astype(float)

# calculated columns
trans['trans'] = trans.inflow - trans.outflow
trans['year_month'] = pd.to_datetime(trans['date']).dt.to_period('M').astype('string')
trans['year'] = pd.to_datetime(trans['date']).dt.to_period('Y').astype('string')

print ("10 Random rows after data cleaning and calculated columns added:")
display (trans.sample (n=10))
# trans.dtypes

print (f"Latest known state of all accounts:")
curr_status = trans.pivot_table (
    index = 'account',
    aggfunc = {'date': 'last', 'ynab_bal' : 'last' }
)
display (curr_status.sort_values (by = 'date'))

## Part 2 - some example reports

Feel free to tweak these to your needs

In [None]:
# Analysis example #1 - mimic the YNAB budget screen (NB. don't have the monthly budget amounts yet, just spending - working on it)

trans [trans.year >= last_year].pivot_table(
    index = ['master_cat', 'sub_cat'],
    columns = 'year_month',
    values = 'trans',
    aggfunc = 'sum',
    fill_value = '',
    margins = 1
)

In [None]:
# Analysis example #2
# Now focus on the categories where we're often in danger of over-spending!

## PUT YOUR FOCUS CATEGORIES HERE!:
watch_cats = ['Christmas and New Year', 'Holidays', 'Birthdays / Gifts', 'Child amusements', 'Supermarket', 
              'Restaurants', 'Proper wine', ]     

mask = ((trans.year >= last_year)  & (trans.sub_cat.isin (watch_cats)))

print ("Monthly summary of dangerous categories!")
display (
    trans[mask].pivot_table (
        index = ['master_cat', 'sub_cat'],#, 'payee'], #, 'payee', 'memo', 'date'],
        values = 'trans',
        columns = 'year_month',
        aggfunc = 'sum',
        fill_value = '',
        margins = 1
    ).T
)

print ("Year-on-year summary of dangerous categories!")
display (
    trans[trans.sub_cat.isin (watch_cats)].pivot_table (
        index = ['master_cat', 'sub_cat'],#, 'payee'], #, 'payee', 'memo', 'date'],
        values = 'trans',
        columns = 'year',
        aggfunc = 'sum',
        fill_value = '',
        #margins = 1
    ).T
)


In [None]:
print ("Dangerous categories per year visualised, stacked and unstacked:")

dang_piv =  trans[trans.sub_cat.isin (watch_cats)].pivot_table (
        index = ['cat'],#, 'payee'], #, 'payee', 'memo', 'date'],
        values = 'outflow',
        columns = 'year',
        aggfunc = 'sum',
        fill_value = '0',
    ).T

#display (dang_piv)
display (
    dang_piv.plot (
        kind='bar', # or 'bar' or 'line' etc
        #y = "newCasesBySpecimenDateRollingSum", 
        legend = 1, 
        stacked = 0, # nb. not needed for 'area'
        figsize = (25,10),
        rot = 45,
        #style = 'steps_post',
    )
)
display (
    dang_piv.plot (
        kind='bar', # or 'bar' or 'line' etc
        #y = "newCasesBySpecimenDateRollingSum", 
        legend = 1, 
        stacked = 1, # nb. not needed for 'area'
        figsize = (25,10),
        rot = 45,
        #style = 'steps_post',
    )
)


In [None]:
# because I'm better at SQL than Python, transfer the transaction stuff to SQLite, to make it easy to calculate running balances:
if os.path.exists ('trans.db'):
    os.remove('trans.db')
conn = sq.connect('trans.db')
trans.to_sql('trans', conn)

# nb trying to use the 'index' column to replicate YNAB's transaction order and therefore its running balances
#  otherwise we're likely to be out until the last transaction of each day
balance_qry = """
    select *
    , sum (trans) over (        -- nb the first YNAB transaction on each account is a starting balance = handy!
        partition by account
        order by date,[index]
    ) as acc_bal
    , sum (trans) over (
        order by date,account,[index]
    ) as bal
    , sum (trans) over (
        partition by year_month, cat
        order by date,account,[index]
    ) as month_cat_bal
    from trans
    order by date, account, [index]
"""

sqldf = pd.read_sql (balance_qry, conn)
display (sqldf.head(20))
conn.close

#display (sqldf.dtypes)

In [None]:
#  Analytics example #3 - running balance over all accounts

sqldf.pivot_table (
    index = ['date'],
    values = 'bal',
    aggfunc = 'min',
    fill_value = 0,   
).plot (
        kind='area', # or 'bar' or 'line' etc
        legend = 1, 
        stacked = 0, # nb. not needed for 'area'
        figsize = (25,10),
        rot = 45,
)

print ('Balance history across all accounts:\n')
print (f'Highest ever balance was {sqldf.bal.max()}:')
display (sqldf[sqldf.bal == sqldf.bal.max()])

In [None]:
# Analysis example #4 - minimum balance each month.
# Arguably analogous to YNAB's Net Worth plot, but easier to code!

baldf = sqldf[sqldf.year_month != month_today].groupby ('year_month').bal.min()

print ("Minimum monthly balances:")
display (baldf.plot(kind = 'bar', figsize = (25,10),rot = 45))
display (baldf.tail(12))

In [None]:
# Analysis example #5 - income vs expenditure each month.

catdf = sqldf[sqldf.year >= last_year].pivot_table(
    index = 'year_month',
    columns = 'master_cat',
    values = 'trans',
    aggfunc = 'sum',
    fill_value = 0
)

print ("Monthly income vs Expenditure:")
display (catdf.plot(kind = 'bar', stacked = 1, figsize = (25,10),rot = 45))
display (catdf)

In [None]:
#  Analysis example #6 - detailed break down of a single category
## PUT YOUR OWN CATEGORY OF CHOICE HERE:
focus_cat = "Utility bills"

print (f"{focus_cat} breakdown:")

trans[trans.sub_cat == focus_cat].pivot_table (
    columns = ['payee'],
    index = 'year_month',
    values = 'outflow',
    aggfunc = 'sum',
    fill_value = 0,
    #margins = 1
).plot(kind='bar', stacked = True, figsize = (25,10),rot = 45)

In [None]:
# Analysis example #7.  This month pie chart break down by payee
print (f"Payees this month:")

trans[trans.year_month == month_today].pivot_table (
    #columns = ['payee'],
    index = 'payee',
    values = 'outflow',
    aggfunc = 'sum',
    fill_value = 0,
    #margins = 1
).plot(kind='pie', y='outflow',  figsize = (25,10),rot = 45)