In [2]:
from dashboard.logic.io import GSHEETS_URL, read_gsheet, comment_button
import pandas as pd
import numpy as np

df = read_gsheet(
    GSHEETS_URL, 
    header=None
)

In [3]:
def findRowColRegex(df: pd.DataFrame, pat: str, case: bool=True, regex:bool=True):
    
    # make sure only one such pat exists in the df
    df_mask = df.apply(lambda x: x.str.contains(pat, case=case, regex=regex) if x.dtype == 'object' else None)
    
    count = df_mask.sum().sum()
    
    if count < 1: 
        raise ValueError(f'Given {pat} did not give any results.')
    if count > 1:
        raise ValueError(f'Given {pat} gave more than 1 results.')
     
    # col and row values where pat
    row = df_mask.any(axis='columns').argmax()
    col = df_mask.any(axis='index').argmax()
    
    return row, col

def getDataFrames(df: pd.DataFrame):

    df = df.reset_index(drop=True)
    dataframes_dict = {}
    
    # MAIN DF
    main_df_idx2 = (df == 'Monthly Income').any(axis='columns').argmax()
    dataframes_dict['main'] = df.iloc[:main_df_idx2+1,:3].dropna(how='all', axis='rows')
    
    # ADS DF
    r1, c1 = findRowColRegex(df, "My Finance Course")
    
    
    ads_df = df.iloc[r1:,c1:].reset_index(drop=True)
    ads_df = (ads_df
        .loc[:(ads_df[c1].isna()).argmax()-1, :]
        .dropna(how='all', axis='columns')
    )
    dataframes_dict['ads'] = ads_df
    
    # OTHER DFS
    df_names = [
    'announcements',
    'advice',
    'error_warning',
    'risk',
    'skip',
    'skip',
    'skip',
    'historical',
    'cash_pos',
    'general_notes',
    'success',
    ]
    
    # Find rows after main df where all values are NaN-s.
    nan_rows = df.iloc[main_df_idx2+1:,].isna().all(axis='columns')
    
    # Indices where all NaN-s in the row
    dfs_idxs = []
    
    for i,bool in nan_rows.items():
        
        if i == nan_rows.index[-1]:
            dfs_idxs.append(None)
            break
        if bool:
            dfs_idxs.append(i)
    
    # raw dataframes
    dfs = []        
    for i,idx in enumerate(dfs_idxs):
        if i == len(dfs_idxs) - 1:
            break
        dfs.append(df.iloc[idx:dfs_idxs[i+1]])
    
    # strip dataframes from NaN-s and add to dictionary
    for df_, name in zip(dfs, df_names):
        
        if name == 'skip': continue
        
        dataframes_dict[name] = (df_
            .dropna(how='all', axis='columns')
            .dropna(how='all', axis='rows')
        )
    
    return dataframes_dict

df1 = df.copy()

#findRowColRegex(df1, 'My Finance Course')

df_dict = getDataFrames(df1)
df_dict.keys()



In [192]:
EX_PORTF_URL = "https://docs.google.com/spreadsheets/d/1J4QvPR5mJDc44Z3_WzhWuVKB3QbxWWDzj1gLPA53ZZM/edit#gid=2145734043"

df = read_gsheet(
    url=EX_PORTF_URL,
    header=None,
    names=['asset', 'percent']
)
df.head()

Unnamed: 0,asset,percent
0,"IDEAL PORTFOLIO 2023 (HIGH VOLATILITY, ASSET C...",
1,,
2,Stocks (Old fashioned) 'Value' Stocks - Energy...,15%
3,Real Estate (Farms are the safest during a hig...,15%
4,Precious Metals (Silver & Gold),30%


In [429]:
# 1. chop into dict of DFs using 'IDEAL PORTFOLIO' as references
# 2. extract main part with % data and extra parts for extra info

# 1
mask_df = df.apply(lambda x: x.str.startswith("IDEAL PORTFOLIO"))
ref_idxs = mask_df[mask_df.asset == True].index

df_names = ['2023', 'crash_risk', 'high_inflation', 'normal']
df_dict = {}
for i, (name, i1) in enumerate(zip(df_names, ref_idxs)):
    i2 = None if i == len(ref_idxs)-1 else ref_idxs[i+1]
    df_ = df.iloc[i1:i2,:]
    
    # dissect 
    info_dict = {}
    info_dict['title'] = df_.asset.iloc[0]
    info_dict['df'] = df_.dropna()
    info_dict['extra'] = df_.loc[info_dict['df'].index[-1]+1:,:].dropna(how='all').asset
    
    df_dict[name] = info_dict
    
df_dict['2023']['extra'].iloc[0] += ' ' + df_dict['2023']['extra'].iloc[1]

In [458]:
import re
pd.options.mode.chained_assignment = None

# prepare the DFs
dfs = df_dict.copy()
for k,v in dfs.items():
    v['df_plot'] = v['df'].iloc[:-1,] # strip total
    v['df_plot']['percent_n'] = v['df_plot']['percent'].str[:-1].astype(int) # remove '%'
    v['df_plot']['asset_hover'] = v['df_plot']['asset'].apply(lambda x: re.match(r"^([^\(:]+)", x)[0] if len(x)>35 else x)

In [462]:
dfs['2023']['df_plot']

Unnamed: 0,asset,percent,percent_n,asset_hover
2,Stocks (Old fashioned) 'Value' Stocks - Energy...,15%,15,Stocks
3,Real Estate (Farms are the safest during a hig...,15%,15,Real Estate
4,Precious Metals (Silver & Gold),30%,30,Precious Metals (Silver & Gold)
5,Bonds,0%,0,Bonds
6,Cryptocurrencies (Bitcoin Only),10%,10,Cryptocurrencies (Bitcoin Only)
7,"Cash (Spread between accounts, ensure you keep...",30%,30,Cash


In [None]:
a = df_dict['2023']['df'].iloc[:-1,].copy()
# a['p_n'] = a.percent.str[:-1].astype(int)
# #a.asset.str.extract(r"^([^\(]+)")
# a['asset_hover'] = a.asset.apply(lambda x: re.match(r"^([^\(]+)", x)[0] if len(x)>40 else x)
a

In [442]:
df_dict['2023']['df']

Unnamed: 0,asset,percent
2,Stocks (Old fashioned) 'Value' Stocks - Energy...,15%
3,Real Estate (Farms are the safest during a hig...,15%
4,Precious Metals (Silver & Gold),30%
5,Bonds,0%
6,Cryptocurrencies (Bitcoin Only),10%
7,"Cash (Spread between accounts, ensure you keep...",30%
8,Total,100%


In [335]:
from dashboard.logic.plots import pie_chart
from bokeh.io import show, output_notebook

In [334]:
output_notebook()

In [359]:
# 3rd part imports
import pandas as pd
import numpy as np

# bokeh
from bokeh.palettes import Category10
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, Legend, LabelSet, LegendItem, HoverTool
from bokeh.embed import components
from bokeh.resources import CDN

In [411]:
def pie_chart(
    df: pd.DataFrame, 
    x: str, 
    y: str,
    x_hover: str=None,
    y_hover: str=None, 
    radius: float=0.8,
    x_range: tuple[float, float]=(-1, 1.0),
    percentage_decimal: int=1,
    label_distance: float=3,
    fig_height: int=350,
    background_color: str='#212529',
    pallette: dict=Category10,
    hover_tooltip: str='default',
    legend_place: str='right',
    fig_kwargs: dict={},
    wedge_kwargs: dict=dict(line_width=1.5, alpha=0.7),
    legend_kwargs: dict=dict(location='center', click_policy="hide",
                             label_text_color='white', border_line_width=0,
                             inactive_fill_color='#9fcf2e', inactive_fill_alpha=0.15),
    label_kwargs: dict=dict(text_font_size='10pt', text_align='center')
    ):
    
    
    # sort df by "y"
    df = df.sort_values(by=y, ignore_index=True)
    
    # calculate sector start and end angles
    df['angle'] = df[y] / df[y].sum() * 2 * np.pi
    df['cumsum_start'] = df['angle'].cumsum(axis='rows').shift(1).fillna(0)
    df['cumsum_end'] = df['angle'].cumsum(axis='rows')
    
    # calculate y percentages for hover & labels
    df['percentage_number'] = (df[y] / df[y].sum() * 100).round(percentage_decimal)
    df['percentage_hover'] = df['percentage_number'].astype(str)
    df['percentage_label'] = df['percentage_number'].apply(lambda x: "" if x < 5 else f"{x:.{percentage_decimal}f}%")
    
    # project label text coordinates to polar coordinates
    df['label_x_pos'] = np.cos(df['angle'].cumsum() - df['angle'].div(2)) * label_distance * radius/4
    df['label_y_pos'] = np.sin(df['angle'].cumsum() - df['angle'].div(2)) * label_distance * radius/4
    
    # remove assets that are 0
    df = df[df[y] > 0]
    
    # reset dataframe index to start with 0
    df = df.reset_index(drop=True)
    
    # init the figure/canvas for the plot
    p = figure(height=fig_height, toolbar_location=None, x_range=x_range, **fig_kwargs)
    
    legend_items = []
    for idx, color in enumerate(pallette[df.shape[0]]):
        
        source = ColumnDataSource(df.iloc[idx,:].to_frame().T)
        
        # create the glyphs renderers
        wedge = p.wedge(x=0, y=0, radius=radius, start_angle="cumsum_start", 
                        end_angle="cumsum_end", source=source, **wedge_kwargs,
                        fill_color=color, hover_fill_color=color,
                        line_color=background_color, hover_line_color=background_color,
                        line_alpha=1, hover_alpha=1, hover_line_alpha=1)
        
        label = LabelSet(x='label_x_pos', y='label_y_pos', text='percentage_label',
                         source=source, level='glyph', text_color=background_color, **label_kwargs)
        
        x_hover = x if x_hover is None else x_hover
        y_hover = y if y_hover is None else y_hover
        
        hover_tooltip = hover_tooltip if hover_tooltip != 'default' else \
            f"""
                <div>
                    <p style="margin:0;font-weight:bold;color:grey;">@{x_hover}</p>
                    <p style="padding:0;margin:0;font-weight:bold;">@{y_hover} (@percentage_hover%)</p>
                </div>
            """
        
        p.add_layout(label)
        p.add_tools(HoverTool(renderers=[wedge],
                              tooltips=hover_tooltip))

        legend_items.append(LegendItem(label=df[x][idx], renderers=[wedge]))
    
    # legend
    legend = Legend(items=legend_items, **legend_kwargs,
                    background_fill_color=background_color) 
    p.add_layout(legend, place=legend_place)
    
    # figure attributes
    p.toolbar.active_drag = None
    p.axis.axis_label = None
    p.axis.visible = False
    p.grid.grid_line_color = None
    
    p.min_border=0
    p.outline_line_alpha=0
    p.outline_line_width=0
    p.outline_line_color = p.background_fill_color = p.border_fill_color = background_color

    return p

In [428]:
hover_tt = f"""
                <div>
                    <p style="margin:0;font-weight:bold;color:grey;">@asset_hover</p>
                    <p style="padding:0;margin:0;font-weight:bold;">@percentage_hover{{0,0}}%</p>
                </div>
            """

show(pie_chart(
    df=a,
    x='asset',
    y='p_n',
    x_hover='asset_hover',
    percentage_decimal=0,
    label_distance=3.15,
    hover_tooltip=hover_tt,
    legend_place='below',
    fig_height=500,
    radius=0.5,
    label_kwargs=dict(text_font_size='12pt', text_align='center', text_font_style='bold')
))