# Plotly Viszualizations

## Preparations

In [1]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
test_df = pd.read_csv(".\\test_data\\2024-09-28.csv", sep=";", decimal=",")

In [3]:
test_df

Unnamed: 0,ausgabe_haushalt,bemerkung,datum,kategorie,preis,shop,monat_jahr
0,False,,2024-08-31,Drogerie/Apotheke,3.49,Feneberg,2024-08
1,False,,2024-08-31,Drogerie/Apotheke,13.38,Apotheke,2024-08
2,True,,2024-08-31,Lebensmittel,8.95,Feneberg,2024-08
3,False,Grillen,2024-08-30,Lebensmittel,13.17,Feneberg,2024-08
4,True,,2024-08-30,Lebensmittel,4.05,Feneberg,2024-08
...,...,...,...,...,...,...,...
763,True,,2023-10-03,Lebensmittel,5.70,MPreis,2023-10
764,False,,2023-10-02,Vorsorge/Sparen,50.00,Bank,2023-10
765,False,,2023-10-02,Wohnung,25.04,GEZ,2023-10
766,True,,2023-10-02,Lebensmittel,18.77,Feneberg,2023-10


In [4]:
test_grouped_df = test_df.groupby(['monat_jahr', 'kategorie'])['preis'].sum().unstack(fill_value=0).stack() # fügt 0 bei Kategorien hinzu, wo es keine Ausgaben im Monat gab

In [5]:
test_grouped_df = test_grouped_df.reset_index()

In [6]:
test_grouped_df.rename(columns={0: 'preis'}, inplace=True)

In [7]:
test_grouped_df

Unnamed: 0,monat_jahr,kategorie,preis
0,2023-10,Aktivitäten,5.00
1,2023-10,Ausgehen/Alkohol,193.00
2,2023-10,Auto,200.00
3,2023-10,Drogerie/Apotheke,30.62
4,2023-10,Elektronik/Telekommunikation,20.19
...,...,...,...
204,2024-08,Urlaub,288.12
205,2024-08,Versicherungen,0.00
206,2024-08,Vorsorge/Sparen,217.00
207,2024-08,Wohnung,1131.04


## Paretor Chart

In [8]:
def create_pareto(df, x_axis, y_axis, title_y_axis, unit=None, title="Pareto-Chart", subtitle="", show_share=False):

    # Sort df descending and calculate share and cumulated share
    df = df.sort_values(by=[y_axis], ascending=False)
    sum = df[y_axis].sum()
    df['share'] = df[y_axis] / sum * 100
    df['cumulated share'] = df['share'].cumsum()

    # Create pareto
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        go.Bar(x=df[x_axis],
               y=df[y_axis],
               name=title_y_axis,
               text=df[y_axis].round(0),
               textposition="auto"),
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(x=df[x_axis],
                   y=df['cumulated share'],
                   name="Cumulated Share"),
        secondary_y=True
    )

    if show_share:
        fig.add_trace(
            go.Scatter(x=df[x_axis],
                    y=df['share'],
                    name='Share'),
            secondary_y=True
        )

    fig.update_yaxes(secondary_y=False, showgrid=True)

    if not unit == None:
        fig.update_yaxes(secondary_y=False, ticksuffix=f" {unit}")

    fig.update_yaxes(secondary_y=True, ticksuffix=" %", range=[0, 100], showgrid=True)

    fig.update_layout(title=f"{title}<br><sub>{subtitle}</sub>")


    return fig

In [10]:
test_pareto = create_pareto(df=test_grouped_df[test_grouped_df["monat_jahr"] == "2024-08"], 
                     x_axis="kategorie", 
                     y_axis="preis",
                     title_y_axis="Expenses",
                     unit="€",
                     show_share=True)
test_pareto.show()

## Stacked bar chart with total sum on each bar

In [11]:
test_grouped_df

Unnamed: 0,monat_jahr,kategorie,preis
0,2023-10,Aktivitäten,5.00
1,2023-10,Ausgehen/Alkohol,193.00
2,2023-10,Auto,200.00
3,2023-10,Drogerie/Apotheke,30.62
4,2023-10,Elektronik/Telekommunikation,20.19
...,...,...,...
204,2024-08,Urlaub,288.12
205,2024-08,Versicherungen,0.00
206,2024-08,Vorsorge/Sparen,217.00
207,2024-08,Wohnung,1131.04


In [12]:
def stacked_bar(df, x_axis, y_axis, color, unit=None, title="Stacked Bar Chart", subtitle="", show_sum=False, x_axis_title=None, y_axis_title=None, show_avg_expense=False):
    
    fig = px.bar(df,
                 x=x_axis,
                 y=y_axis,
                 color=color,
                 title=f"{title}<br><sub>{subtitle}</sub>")
    
    if show_sum:
        df_sum = df.groupby([x_axis])[y_axis].sum().round(0)

        fig.add_trace(
            go.Scatter(
                x=df_sum.index,
                y=df_sum,
                text=df_sum,
                mode='text',
                textposition='top center',
                showlegend=False
            )
        )

    if show_avg_expense:
        df_sum = df.groupby([x_axis])[y_axis].sum()

        mean= df_sum.mean()

        fig.add_hline(y=mean, line_width=1, line_dash="dash", line_color="black")


    if not x_axis_title == None:
        fig.update_xaxes(title=x_axis_title)

    if not y_axis_title == None:
        fig.update_yaxes(title=y_axis_title)

    if not unit == None:
        fig.update_yaxes(ticksuffix=f" {unit}")
    
    return fig

In [13]:
test_bar = stacked_bar(test_grouped_df, 
                       x_axis="monat_jahr", 
                       y_axis="preis", 
                       color="kategorie",
                       show_sum=True,
                       show_avg_expense=True)
test_bar.show()

## Write Plotly figures to HTML-file

In [16]:
def write_to_html(filename, figure, path=None, overwrite=False):

    if path == None:
        html_path = f"./{filename}.html"
    else:
        html_path = f"{path}/{filename}.html"

    if overwrite:
        if os.path.isfile(html_path):
            os.remove(html_path)

    with open(html_path, 'a') as f:
        f.write(figure.to_html(full_html=False, include_plotlyjs='cdn'))

    return print(f"Figure saved in {html_path}")

In [18]:
write_to_html(filename="Test",
              figure=test_bar)

Figure saved in ./Test.html
