In [64]:
import pandas as pd
import os
from datetime import datetime
import pandas as pd
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, CDSView, BooleanFilter, HoverTool, CustomJS, Select
from bokeh.layouts import column
from bokeh.transform import factor_cmap
from datetime import datetime, timedelta
from bokeh.palettes import HighContrast3, Spectral

In [2]:
df = pd.read_csv("./ledgered/ledgered_app/resources/transactions/chase_categorized.csv", parse_dates=['date'])
# removed ignore category
df.drop(df[df["category"] == "Ignore"].index, inplace=True)

### Derived features

In [3]:
df["day_of_week"] = df.date.dt.weekday
df["first_day_of_week"] = df.date - df.day_of_week * timedelta(days=1)
df["first_day_of_week"] = df["first_day_of_week"].apply(lambda x: x.date())

In [4]:
df.head()

Unnamed: 0,date,type,amount,account,original_description,pretty_description,category,subcategory,day_of_week,first_day_of_week
0,2022-12-29,Debit,30.0,Chase,AMERICAN AIR0010280316665,American Airlines,Travel,Airplane,3,2022-12-26
1,2022-12-29,Debit,3.7,Chase,DCA SAYSIBON PIER C,,Eating & Drinking Out,Bars,3,2022-12-26
2,2022-12-29,Debit,16.94,Chase,MEZEH - DCA,,Eating & Drinking Out,Restaurants,3,2022-12-26
3,2022-12-30,Debit,23.4,Chase,BIG NIGHT LIVE,Big Night Live,Eating & Drinking Out,Bars,4,2022-12-26
4,2022-12-30,Debit,20.0,Chase,FAMILY PRACTICE GROUP PC,Family Practice Group,Medical,Doctor,4,2022-12-26


# Global plot settings

In [5]:
tools = ["reset", "hover"]
height=800
width=1000

# Simple Scatter Plot

In [30]:
date_amount_df = ColumnDataSource(df[["day_of_week", "amount"]])

p = get_plot()
p.circle(x="day_of_week", y="amount", source=date_amount_df)
show(p)

# hbar category totals

In [88]:
df1 = df.groupby("category", as_index=False).amount.sum().sort_values("amount", ascending=True)
cats = df1["category"].unique()
source = ColumnDataSource(df1)
p = figure(y_range=cats, height=800, width=1000, tools=tools, title="Category Totals")
p.hbar(y='category', right='amount', source=source, height=0.9)
show(p)

# vbar weekly totals

In [96]:
df2 = df.groupby("first_day_of_week", as_index=False).amount.sum().sort_values("first_day_of_week")
df2["first_day_of_week"] = df2["first_day_of_week"].apply(lambda x: str(x))
weeks = df2["first_day_of_week"]
source = ColumnDataSource(df2)
p = figure(x_range=weeks, height=800, width=1000, tools=tools, title="Weekly Totals")
p.vbar(x='first_day_of_week', top='amount', source=source, width=0.9)
show(p)

# vbar weekly totals by category

In [68]:
# setup the data frame
df3 = df.groupby(["first_day_of_week", "category"], as_index=False).amount.sum().sort_values("first_day_of_week")
df3["first_day_of_week"] = df3["first_day_of_week"].apply(lambda x: str(x))
weeks = sorted(df3["first_day_of_week"].unique())
weeks_idx_dict = {x: y for x, y in zip(weeks, range(len(weeks)))}
cats = sorted(df3["category"].unique())

data = {'weeks': weeks}
for cat in cats:
    weekly_amounts = [0] * len(weeks)
    cat_data = list(df3.loc[df3["category"] == cat, ["first_day_of_week", "amount"]].values)
    for week, amount in cat_data:
        weekly_amounts[weeks_idx_dict[week]] = amount
    data[cat] = weekly_amounts
    
p = figure(x_range=weeks, height=800, width=1000, tools="hover", title="Weekly Totals", tooltips="$name @weeks: @$name")

p.vbar_stack(cats, x='weeks', source=data, width=0.9, legend_label=cats, color=Spectral[len(cats)])

p.legend.click_policy="hide"

show(p)

# week average per category pie chart

In [85]:
from math import pi

import pandas as pd

from bokeh.palettes import Category20c
from bokeh.plotting import figure, show
from bokeh.transform import cumsum


group = df.groupby("category", as_index=False).amount.sum()
group["value"] = group["amount"] / len(df["first_day_of_week"].unique())
data = group[["category", "value"]].sort_values("value")

data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category20c[len(data)]

p = figure(height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@category: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='category', source=data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)