# Personal Financial Report
>version 0.5

this jupyter notebook allows me to keep track of my finances.




## Import the Libraries

In [134]:
#import the Libraries

# standard Libraries
import os
import re
import json5 as json 
from datetime import datetime, timedelta
import pandas as pd
import numpy as np

# used for displaying data and stuff
from IPython.display import display, HTML 

# used for charts and Graphs
import plotly
import plotly.express as px
import seaborn as sns


## Pandas Options/Settings

In [135]:
# set pandas options
# show all the columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# turn off a warning
pd.options.mode.chained_assignment = None  # default='warn'

## Variables
variables for directories and for year and month

In [136]:
DIR = os.getcwd()
data_DIR = os.path.join(DIR,'data')
old_data_DIR = os.path.join(DIR,'old_data')

if os.path.exists(data_DIR) == False:
    os.mkdir(data_DIR)

this_year = int(datetime.now().strftime("%Y"))
this_month = int(datetime.now().strftime("%m"))

# print(this_year,this_month)

# functions

## old_data_to_new_format()
take the *legacy* data and convert it to the new format

In [137]:
def old_data_to_new_format():
    """
    takes some of my old files and converts them to the new format
    """
    old_files = []
    
    # there are two files
    old_files.append(os.path.join(old_data_DIR,'finances_2007_2017.xlsx'))
    old_files.append(os.path.join(old_data_DIR,'finances.xlsm'))

    df_old = pd.DataFrame()

    # combining the files
    for f in old_files:
        temp = pd.read_excel(f,sheet_name='HistoryDownload.csv')
        df_old = pd.concat([df_old,temp])

    # making new columns and formatting others
    df_old['Tags'] = ''
    df_old.loc[ df_old['CabinPayback'] == 1.0, 'Tags'] += 'CabinPayback,' 
    df_old.loc[ df_old['MovingExpense'] == 'x', 'Tags'] += 'MovingExpense,' 


    temp['Category'] = temp['Category'].str.upper()
    
    df_old['YYYY'] = pd.to_datetime(df_old['Date']).dt.strftime("%Y")
    df_old['YYYYMMDD'] = pd.to_datetime(df_old['Date']).dt.strftime("%Y%m%d")
    df_old['YYYYMM'] = pd.to_datetime(df_old['Date']).dt.strftime("%Y%m")
    df_old['YYYY.W'] = pd.to_datetime(df_old['Date']).dt.strftime("%Y.%W")

    df_old['Date'] = pd.to_datetime(df_old['Date']).dt.strftime("%Y-%m-%d")

    df_old['YYYY'] = pd.to_numeric(df_old['YYYY'],errors='coerce')
    df_old['YYYY'] = df_old['YYYY'].astype(int,errors='ignore')

    df_old['YYYYMMDD'] = pd.to_numeric(df_old['YYYYMMDD'],errors='coerce')
    df_old['YYYYMMDD'] = df_old['YYYYMMDD'].astype(int,errors='ignore')

    df_old['YYYYMM'] = pd.to_numeric(df_old['YYYYMM'],errors='coerce')
    df_old['YYYYMM'] = df_old['YYYYMM'].astype(int,errors='ignore')

    df_old['YYYY.W'] = pd.to_numeric(df_old['YYYY.W'],errors='coerce')
    df_old['YYYY.W'] = df_old['YYYY.W'].astype(float,errors='ignore')

    # these will be the final columns
    df_old = df_old[['Date','YYYYMMDD','YYYYMM','YYYY.W','YYYY','Location','Card','Delta','Balance','Category','Tags']]

    df_old['Delta'] = pd.to_numeric(df_old['Delta'],errors='coerce')
    df_old['Delta'] = df_old['Delta'].astype(float,errors='ignore')

    df_old['Balance'] = pd.to_numeric(df_old['Balance'],errors='coerce')
    df_old['Balance'] = df_old['Balance'].astype(float,errors='ignore')
    
    year_list = list(df_old.YYYY.drop_duplicates())

    for y in year_list:
        temp = df_old[df_old.YYYY == y]
        temp.to_csv(os.path.join(data_DIR, 'data_' + str(y) + '.csv'),index=False)

## use this for testing...
# old_data_to_new_format()

## process_new_data()
get new data and return a dataframe

In [138]:
def process_new_data(new_csv=r'C:\Users\JGarza\Downloads\stmt.csv'):
    """
    process the newest match of transactions
    """
    
    # removes the header/summary
    lines = None
    with open(new_csv, 'r') as f:
        lines = f.readlines()
    if lines[0] == 'Description,,Summary Amt.\n':
        with open(new_csv, 'w') as f:
            f.writelines(lines[6:])

    # turn into a dataframe
    temp = pd.read_csv(new_csv)

    # reformat columns and stuff
    
    temp['Location'] = temp['Description']
    temp['Delta'] = temp['Amount']
    temp['Balance'] = temp['Running Bal.']

    if 'Tags' not in temp.columns:
        temp['Tags'] = ''

    if 'Category' not in temp.columns:
        temp['Category'] = ''

    if 'Card' not in temp.columns:
        temp['Card'] = 'BankOfAmerica-Debit'
    
    temp['YYYY'] = pd.to_datetime(temp['Date']).dt.strftime("%Y")
    temp['YYYYMMDD'] = pd.to_datetime(temp['Date']).dt.strftime("%Y%m%d")
    temp['YYYYMM'] = pd.to_datetime(temp['Date']).dt.strftime("%Y%m")
    temp['YYYY.W'] = pd.to_datetime(temp['Date']).dt.strftime("%Y.%W")
    
    temp['Date'] = pd.to_datetime(temp['Date']).dt.strftime("%Y-%m-%d")

    temp['YYYY'] = pd.to_numeric(temp['YYYY'],errors='coerce')
    temp['YYYY'] = temp['YYYY'].astype(int,errors='ignore')

    temp['YYYYMMDD'] = pd.to_numeric(temp['YYYYMMDD'],errors='coerce')
    temp['YYYYMMDD'] = temp['YYYYMMDD'].astype(int,errors='ignore')

    temp['YYYYMM'] = pd.to_numeric(temp['YYYYMM'],errors='coerce')
    temp['YYYYMM'] = temp['YYYYMM'].astype(int,errors='ignore')

    temp['YYYY.W'] = pd.to_numeric(temp['YYYY.W'],errors='coerce')
    temp['YYYY.W'] = temp['YYYY.W'].astype(float,errors='ignore')

    # these will be the final columns... and their order
    temp = temp[['Date','YYYYMMDD','YYYYMM','YYYY.W','YYYY','Location','Card','Delta','Balance','Category','Tags']]

    temp['Delta'] = temp['Delta'].str.replace(',', '')
    temp['Delta'] = pd.to_numeric(temp['Delta'],errors='coerce')
    temp['Delta'] = temp['Delta'].astype(float,errors='ignore')
    temp['Delta'] = temp['Delta'].fillna(0.0)

    temp['Balance'] = temp['Balance'].str.replace(',', '')
    temp['Balance'] = pd.to_numeric(temp['Balance'],errors='coerce')
    temp['Balance'] = temp['Balance'].astype(float,errors='ignore')

    return temp 

# # makes sure it works
# nd = process_new_data()
# display(nd)
    

## load_last_x_years()

In [139]:
def load_last_x_years(x,verbose=False):
    """
    used to load multiple years of transactions
    """
    files = os.listdir(data_DIR)
    files.sort()
    files.reverse()

    if verbose:
        print(files)

    result = pd.DataFrame()

    for file in files[:x]:
        if verbose:
            print(file)
        file = os.path.join(data_DIR, file)
        result = pd.concat([result, pd.read_csv(file)])
    
    result = result.sort_values(by = 'YYYYMMDD',ascending=False)
    result = result.reset_index(drop=True)

    return result

# test 
# load_last_x_years(10,verbose=True)


## load_year()

In [140]:
def load_year(year):
    """
    loads one year of data
    """
    file = os.path.join(data_DIR,'data_' + str(year) + '.csv')
    return pd.read_csv(file)


# x = load_year(this_year)
# display(x)

## get_all_data()

In [141]:
def get_all_data(verbose = False):
    """
    gets all the years of data
    """
    files = os.listdir(data_DIR)
    files.sort()
    files.reverse()

    result = pd.DataFrame()

    # looks through all the files and combine them
    for file in files:
        if verbose:
            print(file)
        ffp = os.path.join(data_DIR, file)
        temp = pd.read_csv(ffp)

        if verbose:
            print(*temp.columns,sep=', ')

        result = pd.concat([result, temp])

    return result

## used for testing
# x = get_df_all(verbose=False)

## open_year_in_excel()

In [142]:
def open_year_in_excel(y):
    """
    loads one year of data
    """
    try:
        file = os.path.join(data_DIR, 'data_' + str(y) + '.csv')
        os.system(r' start excel ' + file )
    except Exception as ex:
        print(str(ex))
        print('that year might not be available')


# edit_year(2020)

## save_data()

In [143]:
def save_data(idf,verbose=False):
    """
    saves out the data ...in their own year file
    """
    year_list = list(idf.YYYY.drop_duplicates())

    for y in year_list:
        temp = idf[idf.YYYY == y]
        file = os.path.join(data_DIR, 'data_' + str(y) + '.csv')
        temp.to_csv(file,index=False)
        if verbose:
            print('saved: ',file)

## clean_loc()

In [144]:
def clean_loc(s):
    """
    takes in a string and clean it up...
    """
    s = s.upper()
    s = re.sub('#\d+','',s)
    s = re.sub('\d\d/\d\d','',s)
    s = re.sub('\d{2,10}','',s)
    s = re.sub('X{3,10}','',s)
    s = re.sub('\s+',' ',s)
    s = re.sub(r'(\\|/|\*)',' ',s)
    s = re.sub(r'(JUSTIN|GARZA|SQ|PAYPAL|PURCHASE)',' ',s)
    s = re.sub(r' (NC|CHARLOTTE)',' ',s)
    s = re.sub(r' (CA|CHATSWORTH)',' ',s)
    s = re.sub(r' . ',' ',s)
    s = re.sub('\s+',' ',s)
    s = re.sub(r'(-|#|:|,|\.com|\.|\'|\$|;)','',s)
    
    return s



## get_category_table

In [145]:

def get_category_table(idf):
    """
    creates a lookup table.
    where the clean_loc matches (enough) the transaction will have the same Category
    """
    temp = idf.copy()
    temp['clean_loc'] = temp['Location']
    # temp = temp[['clean_loc','Location','Category']].drop_duplicates()
    temp = temp[['clean_loc','Category']] #.drop_duplicates()
    temp = temp[temp['Category'] != '']
    # temp = temp.drop_duplicates()
    temp = temp.dropna()

    temp['clean_loc'] = temp['clean_loc'].apply(clean_loc)

    temp['count'] = 1

    temp = pd.pivot_table(
        temp,
        index=['clean_loc','Category'],
        values='count',
        aggfunc={'count':sum}
        )
    temp = temp.reset_index()
    temp = temp.sort_values(by='count',ascending=False)
    temp = temp.reset_index()


    temp = temp[temp['count'] > 1 ]
    
    return temp

# #test
# temp = get_category_table(load_last_x_years(2))
# print(len(temp))
# display(temp)

## is_over()

In [146]:
def is_over(A,B,threshold=0.85,verbose=False):
    """
    does fuzzy matching ... anything over the threshold is considered a match
    """
    from difflib import SequenceMatcher as SM
    r = SM(isjunk=None, a=A, b=B).ratio()

    if r >= threshold:
        if verbose:
            print(A,'|',B,'|',r)
        return True
    else:
        return False

# print(is_over('yustin','justin',0.5,True)) 

## fill_in_category()

In [147]:
def fill_in_category(df1,threshold=0.80,using_x_years=2):
    """
    takes in a dataframe and fills in the Category columns
    """
    # this category_table is made based on the last two years of transactions.
    # new locations and types of transactions will be filled in manually.
    catt = get_category_table(load_last_x_years(using_x_years))
    df1['clean_loc'] = df1.Location.apply(clean_loc)
    
    for i in df1.iterrows():
        if i[1]['Category'] == '':
            for c in catt.iterrows():
                # see if they match ... enough
                if is_over(i[1]['clean_loc'],c[1]['clean_loc'],0.80,False) == True:
                    df1.at[i[0],'Category'] = c[1]['Category']

    df1 = df1.drop(columns=['clean_loc'])
    return df1

## incorporated_data

In [168]:
def incorporated_data(current_data = None, new_data = None):
    """
    this will merge the new data with the current data
    """

    if current_data == None:
        try:
            current_data = load_year(this_year)
        except Exception as ex:
            current_data = load_year(this_year-1)
        except Exception as ex:
            print(str(ex))
            print('error loading new data')
            return None            

    
    if new_data == None:
        try:
            new_data = process_new_data()
        except Exception as ex:
            print(str(ex))
            print('error loading new data')
            return None
    
    result = pd.concat([current_data,new_data])
    
    # drop duplicates after the merge 
    result = result.drop_duplicates(['YYYYMMDD','Location','Delta','Balance'])
    
    # sort the values 
    result = result.sort_values(by='YYYYMMDD', ascending = False)

    # fill in the category 
    result.loc[result['Category'].isnull(),'Category'] = ''
    result = result.fillna('')
    result = fill_in_category(result,0.80)

    result['Date'] = pd.to_datetime(result['Date'])
    
    # drop duplicates again
    result = result.drop_duplicates(['YYYYMMDD','Location','Delta','Balance'])
    
    save_data(result)

    # return result

## used for testing
# x = incorporated_data()
# display(x)



Can only use .str accessor with string values!
error loading new data


## edit existsing files

In [149]:
## this can be used to edit the data the currently exists in the files
## uncomment to use ... of course

# dfall = get_all_data()
# dfall = dfall.fillna('')

# # write code here 

# save_data(dfall)


# charts
...not right using the charts too much, might get rid of them

In [150]:
# color choices for the charts below
print(*px.colors.named_colorscales())
colorscale = 'Inferno'
colorscale = 'rdylgn'
colorscale = 'spectral'

aggrnyl agsunset blackbody bluered blues blugrn bluyl brwnyl bugn bupu burg burgyl cividis darkmint electric emrld gnbu greens greys hot inferno jet magenta magma mint orrd oranges oryel peach pinkyl plasma plotly3 pubu pubugn purd purp purples purpor rainbow rdbu rdpu redor reds sunset sunsetdark teal tealgrn turbo viridis ylgn ylgnbu ylorbr ylorrd algae amp deep dense gray haline ice matter solar speed tempo thermal turbid armyrose brbg earth fall geyser prgn piyg picnic portland puor rdgy rdylbu rdylgn spectral tealrose temps tropic balance curl delta oxy edge hsv icefire phase twilight mrybm mygbm


## category_heatmap()

In [151]:
def category_heatmap(df,time_column='YYYYMM'):
    """
    a heatmap for the categories
    """
    df[time_column] = df[time_column].astype(str)
    df = pd.pivot_table(
        df,
        values = 'Delta',
        index = 'Category',
        columns=time_column,
        aggfunc= {
            'Delta':sum,
        }
        )

    

    fig = px.imshow(
        df, 
        text_auto=True,
        # color_continuous_scale='Inferno',
        color_continuous_scale=colorscale,
        width=800,
        # height=800,
        )
    fig.show()

## category_bars()

In [152]:
def category_bars(df):
    """
    shows bars for each category
    """
    df = pd.pivot_table(
    df,
    values = 'Delta',
    index = 'Category',
    aggfunc= {
        'Delta':sum,
    }
    )

    df = df.reset_index()
    df = df.sort_values(by='Delta')
    df = df.set_index('Category')

    # display(df)

    fig = px.bar(
        df.reset_index(), 
        y = 'Category',
        x='Delta',
        color='Delta',
        text_auto=True,
        color_continuous_scale=colorscale,
        )
    fig.show()

## db_lines()

In [153]:
def db_lines(df):
    """
    delta and balance in a line graph
    """
    df['YYYYMMDD'] = df['YYYYMMDD'].astype(int).astype(str)
    df = pd.pivot_table(
        df,
        values = ['Delta','Balance'],
        index = ['YYYYMMDD'],
        aggfunc= {
            'Delta':sum,
            'Balance':min,
        }
        )

    df = df.reset_index()
    df = df.sort_values(by='YYYYMMDD')

    fig = px.line(
        df, 
        x='YYYYMMDD', 
        y=['Delta','Balance'],
        # text_auto=True
        )
    fig.show()

## cat_bar_time()

In [154]:
def cat_bar_time(df):
    """
    shows the bars for each category over time
    """
    df = pd.pivot_table(
        df,
        values = ['Delta','Balance'],
        index = ['YYYYMMDD','Category'],
        aggfunc= {
            'Delta':sum,
            'Balance':min,
        }
        )

    df = df.reset_index()
    df = df.sort_values(by='YYYYMMDD')

    fig = px.bar(
        df, 
        x='YYYYMMDD', 
        y='Delta',
        color='Category',
        text_auto=True
        )
    fig.show()

## heatmapCT

In [155]:
def heatmapCT(idf,time_column='YYYYMM',color_palettes="RdYlGn"):
    """
    a heatmap for the categories over time, showing the sum of delta
    """
    df = idf.copy()
    
    df[time_column] = df[time_column].astype(int).astype(str)
    
    # pivot the table
    df = pd.pivot_table(
        df,
        values = 'Delta',
        index = 'Category',
        columns=time_column,
        aggfunc= {
            'Delta':sum,
        }
        )

    df = df.fillna(0)
    df = df.reset_index()
    df = df[df.columns.tolist()]

    #get list of Colmns
    cols = df.columns.tolist()
    cols = [c for c in cols if c != 'Category']

    # Sum of Rows
    df['Σ'] = df[cols].sum(axis=1)

    # sort by row sum
    df = df.sort_values(by='Σ',ascending=False)

    # Sum of Cols
    sums = {}
    for c in df.columns:
        if c == 'Category':
            sums['Category'] = 'GrandTotal'
        else:
            sums[c] = df[c].sum()
    # print(sums)
    sums = pd.DataFrame(sums,index=[99])

    df = pd.concat([df,sums])
    df = df.reset_index(drop=True)

    # the color is based on 1 percent the ...nevermind... just -1000,1000
    # habsmax = max([ abs(idf['Delta'].min()), abs(idf['Delta'].max())]) *0.01
    habsmax = 1000

    # more colors ... here -> https://seaborn.pydata.org/tutorial/color_palettes.html
    # cm = sns.color_palette("coolwarm_r", as_cmap=True)
    # cm = sns.color_palette("Spectral", as_cmap=True)
    # cm = sns.color_palette("magma", as_cmap=True)
    # cm = sns.color_palette("cubehelix", as_cmap=True)
    # cm = sns.color_palette("icefire", as_cmap=True)
    cm = sns.color_palette(color_palettes, as_cmap=True)
    # return df.style.background_gradient(cmap=cm, vmin=habsmax*-1, vmax=habsmax).format(precision=2)
    return df.style.background_gradient(cmap=cm, vmin=habsmax*-1, vmax=habsmax).format(precision=2,thousands=',')
    


## heatmapIT()

In [156]:
def heatmapIT(idf,time_column='YYYYMMDD',color_palettes="RdYlGn",simple_loc = False):
    """
    a heatmap for the items over time
    """
    df = idf.copy()
    
    df[time_column] = df[time_column].astype(int).astype(str)
    
    if simple_loc:
        df['Location'] = df['Location'].apply(clean_loc)
    
    # pivot the table
    df = pd.pivot_table(
        df,
        values = 'Delta',
        index = 'Location',
        columns=time_column,
        aggfunc= {
            'Delta':sum,
        }
        )

    df = df.fillna(0)
    df = df.reset_index()
    df = df[df.columns.tolist()]

    #get list of Colmns
    cols = df.columns.tolist()
    cols = [c for c in cols if c != 'Location']

    # Sum of Rows
    df['Σ'] = df[cols].sum(axis=1)

    # sort by row sum
    df = df.sort_values(by='Σ',ascending=False)

    # Sum of Cols
    sums = {}
    for c in df.columns:
        if c == 'Location':
            sums['Location'] = 'GrandTotal'
        else:
            sums[c] = df[c].sum()
    # print(sums)
    sums = pd.DataFrame(sums,index=[99])

    df = pd.concat([df,sums])
    df = df.reset_index(drop=True)

    # the color is based on 1 percent the ...nevermind... just -1000,1000
    # habsmax = max([ abs(idf['Delta'].min()), abs(idf['Delta'].max())]) *0.01
    habsmax = 1000

    # more colors ... here -> https://seaborn.pydata.org/tutorial/color_palettes.html
    # cm = sns.color_palette("coolwarm_r", as_cmap=True)
    # cm = sns.color_palette("Spectral", as_cmap=True)
    # cm = sns.color_palette("magma", as_cmap=True)
    # cm = sns.color_palette("cubehelix", as_cmap=True)
    # cm = sns.color_palette("icefire", as_cmap=True)
    cm = sns.color_palette(color_palettes, as_cmap=True)
    # return df.style.background_gradient(cmap=cm, vmin=habsmax*-1, vmax=habsmax).format(precision=2)
    return df.style.background_gradient(cmap=cm, vmin=habsmax*-1, vmax=habsmax).format(precision=2,thousands=',')

## heatmapBal

In [157]:
def heatmapBal(idf,time_column='YYYYMM',color_palettes="RdYlGn"):
    """
    shows the heatmap of the Balance (avg,min,max) over time
    """
    
    df = idf.copy()
    
    df['avg'] = df['Balance']
    df['min'] = df['Balance']
    df['max'] = df['Balance']
    
    df[time_column] = df[time_column].astype(int).astype(str)
    df = pd.pivot_table(
        df,
        values = [
            'avg',
            'min',
            'max'
            ],
        # index = 'Category',
        columns=time_column,
        aggfunc= {
            'avg':np.mean,
            'min':min,
            'max':max,
        }
        )

    df = df.fillna(0)
    df = df.reset_index()
    df = df[df.columns.tolist()]


    df = df.reset_index(drop=True)

    # the color is based on 50 percent the .
    # habsmax = max([ abs(idf['Balance'].min()), abs(idf['Balance'].max())]) 
    
    vmin = idf['Balance'].min()
    vmax = idf['Balance'].max()

    # more colors ... here -> https://seaborn.pydata.org/tutorial/color_palettes.html
    # cm = sns.color_palette("coolwarm_r", as_cmap=True)
    # cm = sns.color_palette("Spectral", as_cmap=True)
    # cm = sns.color_palette("magma", as_cmap=True)
    # cm = sns.color_palette("cubehelix", as_cmap=True)
    # cm = sns.color_palette("icefire", as_cmap=True)
    cm = sns.color_palette(color_palettes, as_cmap=True)
    return df.style.background_gradient(cmap=cm,vmin=vmin, vmax=vmax).format(precision=2,thousands=',')
    


# Main Section

## incorporated new data

In [165]:
# merges the old data with the new data...
# and opens the file so the user can fill in the spots that are not automatically filled
incorporated_data()
open_year_in_excel(this_year)

Can only use .str accessor with string values!
error loading new data


## create dataframes

dfow : 1 weeks  
dffw : last 5 weeks  
dfnd : last 90 days  
dfy : this year's data  
dfa : all data  



In [159]:
dfy = load_year(this_year)
dfy['loc'] = dfy['Location'].apply(clean_loc)

ninety_days = int( (datetime.now() - timedelta(days=90)).strftime('%Y%m%d')) 
dfnd = dfy[dfy['YYYYMMDD'] >= ninety_days]

a_bit_ago = int( (datetime.now() - timedelta(days=28)).strftime('%Y%m%d')) 
dffw = dfy[dfy['YYYYMMDD'] >= a_bit_ago]

a_bit_ago = int( (datetime.now() - timedelta(days=7)).strftime('%Y%m%d')) 
dfow = dfy[dfy['YYYYMMDD'] >= a_bit_ago]

# dfa = get_all_data()



## HeatMaps

In [160]:

display(HTML('<h3>last 4 weeks</h3>'))
heatmapCT(dffw,time_column='YYYYMMDD'
    # ,color_palettes="icefire_r")
    # ,color_palettes="magma_r")
    # ,color_palettes="turbo_r")
     )


Unnamed: 0,Category,20220829,20220830,20220831,20220901,20220902,20220906,20220907,20220908,20220909,20220912,20220913,20220914,20220915,20220916,20220919,20220920,20220921,20220922,Σ
0,debt,-231.0,0.0,0.0,0.0,0.0,677.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,829.0,0.0,0.0,0.0,1275.0
1,utility,0.0,0.0,0.0,238.0,0.0,828.0,0.0,0.0,0.0,357.0,0.0,0.0,-881.0,0.0,0.0,0.0,0.0,0.0,542.0
2,clothing,-646.0,623.0,46.0,0.0,0.0,0.0,0.0,-996.0,0.0,966.0,251.0,0.0,0.0,0.0,950.0,-818.0,0.0,0.0,376.0
3,health,0.0,0.0,0.0,0.0,214.0,11.0,519.0,0.0,0.0,901.0,0.0,0.0,-573.0,-86.0,0.0,387.0,0.0,-1146.0,227.0
4,tech,0.0,0.0,230.0,0.0,0.0,-464.0,-910.0,-56.0,0.0,765.0,-413.0,0.0,0.0,158.0,0.0,0.0,488.0,0.0,-202.0
5,misc,0.0,0.0,0.0,0.0,-38.0,-873.0,0.0,0.0,0.0,0.0,331.0,0.0,0.0,0.0,-3.0,0.0,0.0,0.0,-583.0
6,paypal,-579.0,0.0,140.0,0.0,0.0,-1606.0,832.0,0.0,-822.0,0.0,0.0,0.0,0.0,943.0,0.0,0.0,0.0,0.0,-1092.0
7,travel,455.0,-613.0,0.0,0.0,0.0,606.0,0.0,620.0,0.0,0.0,0.0,0.0,-998.0,-318.0,-1562.0,0.0,0.0,0.0,-1810.0
8,unk,308.0,0.0,0.0,0.0,0.0,-445.0,320.0,0.0,-802.0,-501.0,0.0,-318.0,0.0,0.0,20.0,0.0,0.0,-570.0,-1988.0
9,invest,-267.0,0.0,-1989.0,0.0,0.0,-654.0,0.0,430.0,84.0,-1034.0,0.0,0.0,0.0,0.0,1261.0,853.0,0.0,-904.0,-2220.0


In [161]:
display(HTML('<h3>last 1 weeks</h3>'))
heatmapIT(dfow,time_column='YYYYMMDD',simple_loc=True)

Unnamed: 0,Location,20220919,20220920,20220921,20220922,Σ
0,VISA,967.0,0.0,488.0,0.0,1455.0
1,TAPESTRY,110.0,853.0,0.0,0.0,963.0
2,QUEST DIAGNOSTICS,950.0,0.0,0.0,0.0,950.0
3,TIAA,840.0,0.0,0.0,0.0,840.0
4,UNITED CONTINENTAL HOLDINGS,829.0,0.0,0.0,0.0,829.0
5,OMNICOM GROUP,729.0,0.0,0.0,0.0,729.0
6,SYSCO,0.0,387.0,0.0,0.0,387.0
7,DELTA AIR LINES,-3.0,0.0,0.0,0.0,-3.0
8,HERSHEY,-342.0,0.0,0.0,0.0,-342.0
9,COGNIZANT TECHNOLOGY SOLUTIONS,-910.0,0.0,0.0,0.0,-910.0


In [162]:
display(HTML('<h3>This Year ... so far</h3>'))
heatmapBal(dfy,time_column='YYYYMM')

YYYYMM,index,202201,202202,202203,202204,202205,202206,202207,202208,202209
0,avg,2897.0,6540.92,9469.6,13895.94,11109.37,8185.49,8262.45,9235.38,4192.2
1,max,7419.0,7542.0,12283.0,19150.0,16416.0,12670.0,12095.0,11090.0,9417.0
2,min,-209.0,5145.0,7007.0,8812.0,7338.0,3894.0,4658.0,6892.0,390.0


In [163]:
# dfa = get_all_data()
# heatmapBal(dfa,time_column='YYYYMM')

# Experimental Stuff

## open pivot_ui


In [164]:
# used for pivot table
# from pivottablejs import pivot_ui

# file = os.path.join(DIR,'pivot.html')

# pivot_ui(
#     dfa, 
#     rows=['Category'], 
#     cols=['YYYYMM'],
#     vals=['Delta'], 
#     aggregatorName = 'Sum',
#     rendererName = 'Heatmap',
#     outfile_path = file
#     )

# os.system(file)
