# Plotly Viszualizations

## 1 Preparations

### 1.1 Import modules

In [153]:
import os
import math
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

### 1.2 Specify plotly theme

In [2]:
import plotly.io as pio

Show all available themes

In [4]:
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

Add own themes:

- draft → Adds a "DRAFT" watermark in grey on the figure
- confidental → Add a "CONFIDENTAL" watermark in red on the figure

In [5]:
pio.templates["draft"] = go.layout.Template(
    layout_annotations=[
        dict(
            name="draft watermark",
            text="DRAFT",
            textangle=-30,
            opacity=0.1,
            font=dict(color="black", size=100),
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.5,
            showarrow=False,
        )
    ]
)

In [6]:
pio.templates["confidental"] = go.layout.Template(
    layout_annotations=[
        dict(
            name="draft watermark",
            text="CONFIDENTAL",
            textangle=-30,
            opacity=0.1,
            font=dict(color="red", size=100),
            xref="paper",
            yref="paper",
            x=0.5,
            y=0.5,
            showarrow=False,
        )
    ]
)

draft and confidental is now available

In [7]:
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none', 'draft', 'confidental']

Set default layout

In [8]:
# pio.templates.default = "seaborn+draft"
pio.templates.default = "seaborn"

### 1.3 Get plotly default colors

#### 1.3.1 Get line colors of default-theme

In [9]:
# Get active plotly-theme
active_plotly_theme = pio.templates.default

# Get default line colors for theme
default_theme_colors = pio.templates[active_plotly_theme].layout['colorway']
default_theme_colors

('rgb(76,114,176)',
 'rgb(221,132,82)',
 'rgb(85,168,104)',
 'rgb(196,78,82)',
 'rgb(129,114,179)',
 'rgb(147,120,96)',
 'rgb(218,139,195)',
 'rgb(140,140,140)',
 'rgb(204,185,116)',
 'rgb(100,181,205)')

#### 1.3.2 Discrete Colors in Python

[Plotly discrete colors documentation](https://plotly.com/python/discrete-color/)

In [10]:
discrete_colors_fig = px.colors.qualitative.swatches()
discrete_colors_fig.show()

In [11]:
px.colors.qualitative.Plotly

['#636EFA',
 '#EF553B',
 '#00CC96',
 '#AB63FA',
 '#FFA15A',
 '#19D3F3',
 '#FF6692',
 '#B6E880',
 '#FF97FF',
 '#FECB52']

#### 1.3.3 Continuous Colors in Python

[Plotly colorscales documentation](https://plotly.com/python/builtin-colorscales/)

In [12]:
sequential_colors_fig = px.colors.sequential.swatches_continuous()
sequential_colors_fig.show()

In [13]:
px.colors.sequential.Plasma

['#0d0887',
 '#46039f',
 '#7201a8',
 '#9c179e',
 '#bd3786',
 '#d8576b',
 '#ed7953',
 '#fb9f3a',
 '#fdca26',
 '#f0f921']

### 1.4 Load and manipulate test data

In [14]:
test_df = pd.read_csv("./test_data_private/2024-09-28.csv", sep=";", decimal=",")

In [15]:
test_df

Unnamed: 0,ausgabe_haushalt,bemerkung,datum,kategorie,preis,shop,monat_jahr
0,False,,2024-08-31,Drogerie/Apotheke,3.49,Feneberg,2024-08
1,False,,2024-08-31,Drogerie/Apotheke,13.38,Apotheke,2024-08
2,True,,2024-08-31,Lebensmittel,8.95,Feneberg,2024-08
3,False,Grillen,2024-08-30,Lebensmittel,13.17,Feneberg,2024-08
4,True,,2024-08-30,Lebensmittel,4.05,Feneberg,2024-08
...,...,...,...,...,...,...,...
763,True,,2023-10-03,Lebensmittel,5.70,MPreis,2023-10
764,False,,2023-10-02,Vorsorge/Sparen,50.00,Bank,2023-10
765,False,,2023-10-02,Wohnung,25.04,GEZ,2023-10
766,True,,2023-10-02,Lebensmittel,18.77,Feneberg,2023-10


In [16]:
test_grouped_df = test_df.groupby(['monat_jahr', 'kategorie'])['preis'].sum().unstack(fill_value=0).stack() # fügt 0 bei Kategorien hinzu, wo es keine Ausgaben im Monat gab

In [17]:
test_grouped_df = test_grouped_df.reset_index()

In [18]:
test_grouped_df.rename(columns={0: 'preis'}, inplace=True)

In [19]:
test_grouped_df

Unnamed: 0,monat_jahr,kategorie,preis
0,2023-10,Aktivitäten,5.00
1,2023-10,Ausgehen/Alkohol,193.00
2,2023-10,Auto,200.00
3,2023-10,Drogerie/Apotheke,30.62
4,2023-10,Elektronik/Telekommunikation,20.19
...,...,...,...
204,2024-08,Urlaub,288.12
205,2024-08,Versicherungen,0.00
206,2024-08,Vorsorge/Sparen,217.00
207,2024-08,Wohnung,1131.04


In [165]:
whr = pd.read_csv("./test_data_public/WHR_data.csv")

## 2 Visualization

### 2.1 Pareto Chart

In [20]:
def create_pareto(df, x_axis, y_axis, title_y_axis, unit=None, title="Pareto-Chart", subtitle="", show_share=False):

    # Sort df descending and calculate share and cumulated share
    df = df.sort_values(by=[y_axis], ascending=False)
    sum = df[y_axis].sum()
    df['share'] = df[y_axis] / sum * 100
    df['cumulated share'] = df['share'].cumsum()

    # Create pareto
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    fig.add_trace(
        go.Bar(x=df[x_axis],
               y=df[y_axis],
               name=title_y_axis,
               text=df[y_axis].round(0),
               textposition="auto"),
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(x=df[x_axis],
                   y=df['cumulated share'],
                   name="Cumulated Share"),
        secondary_y=True
    )

    if show_share:
        fig.add_trace(
            go.Scatter(x=df[x_axis],
                    y=df['share'],
                    name='Share'),
            secondary_y=True
        )

    fig.update_yaxes(secondary_y=False, showgrid=True)

    if not unit == None:
        fig.update_yaxes(secondary_y=False, ticksuffix=f" {unit}")

    fig.update_yaxes(secondary_y=True, ticksuffix=" %", range=[0, 100], showgrid=True)

    fig.update_layout(title=f"{title}<br><sub>{subtitle}</sub>")


    return fig

In [21]:
test_pareto = create_pareto(df=test_grouped_df[test_grouped_df["monat_jahr"] == "2024-08"], 
                     x_axis="kategorie", 
                     y_axis="preis",
                     title_y_axis="Expenses",
                     unit="€",
                     show_share=True)
test_pareto.show()

### 2.2 Stacked bar chart with total sum on each bar

In [23]:
def stacked_bar(df, x_axis, y_axis, color, unit=None, title="Stacked Bar Chart", subtitle="", show_sum=False, x_axis_title=None, y_axis_title=None, show_avg_expense=False):
    
    fig = px.bar(df,
                 x=x_axis,
                 y=y_axis,
                 color=color,
                 title=f"{title}<br><sub>{subtitle}</sub>")
    
    if show_sum:
        df_sum = df.groupby([x_axis])[y_axis].sum().round(0)

        fig.add_trace(
            go.Scatter(
                x=df_sum.index,
                y=df_sum,
                text=df_sum,
                mode='text',
                textposition='top center',
                showlegend=False
            )
        )

    if show_avg_expense:
        df_sum = df.groupby([x_axis])[y_axis].sum()

        mean= df_sum.mean()

        fig.add_hline(y=mean, line_width=1, line_dash="dash", line_color="black")


    if not x_axis_title == None:
        fig.update_xaxes(title=x_axis_title)

    if not y_axis_title == None:
        fig.update_yaxes(title=y_axis_title)

    if not unit == None:
        fig.update_yaxes(ticksuffix=f" {unit}")
    
    return fig

In [24]:
test_bar = stacked_bar(test_grouped_df, 
                       x_axis="monat_jahr", 
                       y_axis="preis", 
                       color="kategorie",
                       show_sum=True,
                       show_avg_expense=True)
test_bar.show()

### 2.3 Subplots

In [34]:
whr

Unnamed: 0,Year,country,region,Ranking,happiness_score,gdp_per_capita,social_support,healthy_life_expectancy,freedom_to_make_life_choices,generosity,perceptions_of_corruption
0,2015,Switzerland,Western Europe,1.0,7.587,1.39651,1.34951,0.94143,0.66557,0.29678,0.41978
1,2015,Iceland,Western Europe,2.0,7.561,1.30232,1.40223,0.94784,0.62877,0.43630,0.14145
2,2015,Denmark,Western Europe,3.0,7.527,1.32548,1.36058,0.87464,0.64938,0.34139,0.48357
3,2015,Norway,Western Europe,4.0,7.522,1.45900,1.33095,0.88521,0.66973,0.34699,0.36503
4,2015,Canada,North America and ANZ,5.0,7.427,1.32629,1.32261,0.90563,0.63297,0.45811,0.32957
...,...,...,...,...,...,...,...,...,...,...,...
1362,2023,Congo (Kinshasa),Sub-Saharan Africa,133.0,3.207,0.53100,0.78400,0.10500,0.37500,0.18300,0.06800
1363,2023,Zimbabwe,Sub-Saharan Africa,134.0,3.204,0.75800,0.88100,0.06900,0.36300,0.11200,0.11700
1364,2023,Sierra Leone,Sub-Saharan Africa,135.0,3.138,0.67000,0.54000,0.09200,0.37100,0.19300,0.05100
1365,2023,Lebanon,Middle East and North Africa,136.0,2.392,1.41700,0.47600,0.39800,0.12300,0.06100,0.02700


In [136]:
def get_color_sequenze(color_sequenze):
    if color_sequenze == "default":
        # Get active plotly-theme
        active_plotly_theme = pio.templates.default
        # Get default line colors for theme
        color_sequenze = pio.templates[active_plotly_theme].layout['colorway']
    elif color_sequenze == "Plotly":
        color_sequenze = px.colors.qualitative.Plotly
    elif color_sequenze == "D3":
        color_sequenze = px.colors.qualitative.D3
    elif color_sequenze == "T10":
        color_sequenze = px.colors.qualitative.T10
    elif color_sequenze == "Alphabet":
        color_sequenze = px.colors.qualitative.Alphabet
    elif color_sequenze == "Dark24":
        color_sequenze = px.colors.qualitative.Dark24
    elif color_sequenze == "Light24":
        color_sequenze = px.colors.qualitative.Light24
    elif color_sequenze == "Set1":
        color_sequenze = px.colors.qualitative.Set1
    elif color_sequenze == "Pastel1":
        color_sequenze = px.colors.qualitative.Pastel1
    elif color_sequenze == "Dark2":
        color_sequenze = px.colors.qualitative.Dark2
    elif color_sequenze == "Pastel2":
        color_sequenze = px.colors.qualitative.Pastel2
    elif color_sequenze == "Set3":
        color_sequenze = px.colors.qualitative.Set3
    elif color_sequenze == "Antique":
        color_sequenze = px.colors.qualitative.Antique
    elif color_sequenze == "Bold":
        color_sequenze = px.colors.qualitative.Bold
    elif color_sequenze == "Pastel":
        color_sequenze = px.colors.qualitative.Pastel
    elif color_sequenze == "Prism":
        color_sequenze = px.colors.qualitative.Prism
    elif color_sequenze == "Safe":
        color_sequenze = px.colors.qualitative.Safe
    elif color_sequenze == "Vivid":
        color_sequenze = px.colors.qualitative.Vivid

    return color_sequenze

In [154]:
math.ceil(11/10)

2

In [157]:
def create_subplots(
  df,
  x,
  y,
  rows,
  cols,
  hue,
  title,
  color_sequenze_name = "default",
  fig_height = None,
  fig_width = None   
):
    
    legends = df[hue].unique()

    color_sequenze = get_color_sequenze(color_sequenze=color_sequenze_name)

    if len(color_sequenze) < len(legends):
        color_sequenze = color_sequenze * math.ceil(len(legends)/len(color_sequenze))

    colors = dict(zip(legends, color_sequenze))

    fig = make_subplots(rows=rows, cols=cols, subplot_titles=y)

    for legend in legends:

        row = 1
        col = 1

        temp_df = df[df[hue] == legend]

        for x_item, y_item in zip(x, y):

            if (row == 1) & (col == 1):

                fig.add_trace(
                    go.Scatter(
                        x=temp_df[x_item],
                        y=temp_df[y_item],
                        name=legend,
                        legendgroup = legend,
                        line=dict(color=colors[legend]),
                        showlegend=True
                    ), row=row, col=col
                )

                col += 1

                if col > cols:
                    col = 1
                    row += 1


            else:
                fig.add_trace(
                    go.Scatter(
                        x=temp_df[x_item],
                        y=temp_df[y_item],
                        name=legend,
                        legendgroup = legend,
                        line=dict(color=colors[legend]),
                        showlegend=False
                    ), row=row, col=col
                )

                col += 1

                if col > cols:
                    col = 1
                    row += 1

    fig.update_layout(title_text=title)

    if not (fig_height == None) & (fig_height == None):
        fig.update_layout(height=fig_height, width=fig_width)

    return fig

In [163]:
create_subplots(
    df = whr[whr['country'].isin(['Germany', 'Austria', 'Switzerland', 'Italy', 'Iceland', 'France', 'Norway', 'Sweden', 'Denmark', 'Belgium', 'Netherlands'])],
    x=['Year', 'Year', 'Year', 'Year', 'Year', 'Year'],
    y=['Ranking', 'happiness_score', 'gdp_per_capita', 'social_support', 'healthy_life_expectancy', 'freedom_to_make_life_choices'],
    rows=2,
    cols=3,
    hue='country',
    title='World Happiness Report',
    color_sequenze_name="Plotly",
    fig_height=600,
    fig_width=1400
).show()

## 3 Write Plotly figures to HTML-file

In [20]:
def write_to_html(filename, figure, path=None, overwrite=False):

    if path == None:
        html_path = f"./{filename}.html"
    else:
        html_path = f"{path}/{filename}.html"

    if overwrite:
        if os.path.isfile(html_path):
            os.remove(html_path)

    with open(html_path, 'a') as f:
        f.write(figure.to_html(full_html=False, include_plotlyjs='cdn'))

    return print(f"Figure saved in {html_path}")

In [21]:
write_to_html(filename="Test",
              figure=test_bar)

Figure saved in ./Test.html
